{ "best_metric": null, "best_model_checkpoint": null, "epoch": 7.0, "eval_steps": 500, "global_step": 68166, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0003082297338949964, "grad_norm": 0.5791122913360596, "learning_rate": 1.2325390304026295e-05, "loss": 11.5149, "step": 3 }, { "epoch": 0.0006164594677899928, "grad_norm": 1.7733268737792969, "learning_rate": 2.465078060805259e-05, "loss": 11.504, "step": 6 }, { "epoch": 0.0009246892016849893, "grad_norm": 3.047468423843384, "learning_rate": 3.697617091207888e-05, "loss": 11.4391, "step": 9 }, { "epoch": 0.0012329189355799856, "grad_norm": 3.346104621887207, "learning_rate": 4.930156121610518e-05, "loss": 11.2635, "step": 12 }, { "epoch": 0.001541148669474982, "grad_norm": 3.2847304344177246, "learning_rate": 6.162695152013147e-05, "loss": 10.9864, "step": 15 }, { "epoch": 0.0018493784033699785, "grad_norm": 3.2979729175567627, "learning_rate": 7.395234182415776e-05, "loss": 10.6487, "step": 18 }, { "epoch": 0.0021576081372649747, "grad_norm": 3.445396661758423, "learning_rate": 8.627773212818406e-05, "loss": 10.2667, "step": 21 }, { "epoch": 0.0024658378711599712, "grad_norm": 3.620011568069458, "learning_rate": 9.860312243221036e-05, "loss": 9.8679, "step": 24 }, { "epoch": 0.002774067605054968, "grad_norm": 3.66719913482666, "learning_rate": 0.00011092851273623665, "loss": 9.4532, "step": 27 }, { "epoch": 0.003082297338949964, "grad_norm": 3.709792375564575, "learning_rate": 0.00012325390304026294, "loss": 9.0393, "step": 30 }, { "epoch": 0.0033905270728449605, "grad_norm": 3.5757813453674316, "learning_rate": 0.00013557929334428925, "loss": 8.6449, "step": 33 }, { "epoch": 0.003698756806739957, "grad_norm": 3.200732707977295, "learning_rate": 0.00014790468364831553, "loss": 8.2932, "step": 36 }, { "epoch": 0.004006986540634953, "grad_norm": 2.7463502883911133, "learning_rate": 0.0001602300739523418, "loss": 8.0151, "step": 39 }, { "epoch": 0.004315216274529949, "grad_norm": 1.961776852607727, "learning_rate": 0.00017255546425636812, "loss": 7.7988, "step": 42 }, { "epoch": 0.004623446008424946, "grad_norm": 1.5326848030090332, "learning_rate": 0.0001848808545603944, "loss": 7.6688, "step": 45 }, { "epoch": 0.0049316757423199424, "grad_norm": 2.266148090362549, "learning_rate": 0.00019720624486442071, "loss": 7.6276, "step": 48 }, { "epoch": 0.005239905476214939, "grad_norm": 2.563206672668457, "learning_rate": 0.000209531635168447, "loss": 7.5954, "step": 51 }, { "epoch": 0.005548135210109936, "grad_norm": 1.9326294660568237, "learning_rate": 0.0002218570254724733, "loss": 7.5136, "step": 54 }, { "epoch": 0.005856364944004932, "grad_norm": 1.0131096839904785, "learning_rate": 0.0002341824157764996, "loss": 7.4167, "step": 57 }, { "epoch": 0.006164594677899928, "grad_norm": 1.1923354864120483, "learning_rate": 0.00024650780608052587, "loss": 7.3273, "step": 60 }, { "epoch": 0.006472824411794925, "grad_norm": 1.2690105438232422, "learning_rate": 0.00025883319638455215, "loss": 7.2615, "step": 63 }, { "epoch": 0.006781054145689921, "grad_norm": 0.8375588059425354, "learning_rate": 0.0002711585866885785, "loss": 7.2088, "step": 66 }, { "epoch": 0.007089283879584917, "grad_norm": 0.719011127948761, "learning_rate": 0.0002834839769926048, "loss": 7.1474, "step": 69 }, { "epoch": 0.007397513613479914, "grad_norm": 0.7095780372619629, "learning_rate": 0.00029580936729663106, "loss": 7.0923, "step": 72 }, { "epoch": 0.00770574334737491, "grad_norm": 0.5094121098518372, "learning_rate": 0.00030813475760065734, "loss": 7.0406, "step": 75 }, { "epoch": 0.008013973081269906, "grad_norm": 0.4085525572299957, "learning_rate": 0.0003204601479046836, "loss": 6.9586, "step": 78 }, { "epoch": 0.008322202815164903, "grad_norm": 0.4122146666049957, "learning_rate": 0.0003327855382087099, "loss": 6.9295, "step": 81 }, { "epoch": 0.008630432549059899, "grad_norm": 0.41669413447380066, "learning_rate": 0.00034511092851273624, "loss": 6.8629, "step": 84 }, { "epoch": 0.008938662282954896, "grad_norm": 0.40379494428634644, "learning_rate": 0.0003574363188167625, "loss": 6.7987, "step": 87 }, { "epoch": 0.009246892016849893, "grad_norm": 0.4202142059803009, "learning_rate": 0.0003697617091207888, "loss": 6.7779, "step": 90 }, { "epoch": 0.009555121750744888, "grad_norm": 0.3880140781402588, "learning_rate": 0.0003820870994248151, "loss": 6.7204, "step": 93 }, { "epoch": 0.009863351484639885, "grad_norm": 0.419883668422699, "learning_rate": 0.00039441248972884143, "loss": 6.6553, "step": 96 }, { "epoch": 0.010171581218534882, "grad_norm": 0.4570387601852417, "learning_rate": 0.0004067378800328677, "loss": 6.5936, "step": 99 }, { "epoch": 0.010479810952429877, "grad_norm": 0.5889524221420288, "learning_rate": 0.000419063270336894, "loss": 6.5271, "step": 102 }, { "epoch": 0.010788040686324874, "grad_norm": 0.5578189492225647, "learning_rate": 0.0004313886606409203, "loss": 6.4701, "step": 105 }, { "epoch": 0.011096270420219871, "grad_norm": 0.4710846245288849, "learning_rate": 0.0004437140509449466, "loss": 6.4254, "step": 108 }, { "epoch": 0.011404500154114866, "grad_norm": 0.30558162927627563, "learning_rate": 0.0004560394412489729, "loss": 6.4217, "step": 111 }, { "epoch": 0.011712729888009863, "grad_norm": 0.3634621500968933, "learning_rate": 0.0004683648315529992, "loss": 6.3422, "step": 114 }, { "epoch": 0.01202095962190486, "grad_norm": 0.369437575340271, "learning_rate": 0.00048069022185702546, "loss": 6.303, "step": 117 }, { "epoch": 0.012329189355799856, "grad_norm": 0.7418856024742126, "learning_rate": 0.0004930156121610517, "loss": 6.2535, "step": 120 }, { "epoch": 0.012637419089694853, "grad_norm": 0.855678915977478, "learning_rate": 0.000505341002465078, "loss": 6.2037, "step": 123 }, { "epoch": 0.01294564882358985, "grad_norm": 0.42414671182632446, "learning_rate": 0.0005176663927691043, "loss": 6.175, "step": 126 }, { "epoch": 0.013253878557484845, "grad_norm": 0.44572392106056213, "learning_rate": 0.0005299917830731307, "loss": 6.1548, "step": 129 }, { "epoch": 0.013562108291379842, "grad_norm": 0.24437515437602997, "learning_rate": 0.000542317173377157, "loss": 6.0967, "step": 132 }, { "epoch": 0.013870338025274839, "grad_norm": 0.4750615954399109, "learning_rate": 0.0005546425636811833, "loss": 6.0776, "step": 135 }, { "epoch": 0.014178567759169834, "grad_norm": 0.9504273533821106, "learning_rate": 0.0005669679539852095, "loss": 6.0471, "step": 138 }, { "epoch": 0.014486797493064831, "grad_norm": 0.5452646613121033, "learning_rate": 0.0005792933442892358, "loss": 5.9987, "step": 141 }, { "epoch": 0.014795027226959828, "grad_norm": 0.4314074218273163, "learning_rate": 0.0005916187345932621, "loss": 5.9874, "step": 144 }, { "epoch": 0.015103256960854823, "grad_norm": 0.48243793845176697, "learning_rate": 0.0006039441248972884, "loss": 5.962, "step": 147 }, { "epoch": 0.01541148669474982, "grad_norm": 0.7570855617523193, "learning_rate": 0.0006162695152013147, "loss": 5.921, "step": 150 }, { "epoch": 0.015719716428644816, "grad_norm": 0.4731276333332062, "learning_rate": 0.000628594905505341, "loss": 5.9032, "step": 153 }, { "epoch": 0.016027946162539813, "grad_norm": 0.3078465759754181, "learning_rate": 0.0006409202958093672, "loss": 5.8932, "step": 156 }, { "epoch": 0.01633617589643481, "grad_norm": 0.3441495895385742, "learning_rate": 0.0006532456861133935, "loss": 5.8695, "step": 159 }, { "epoch": 0.016644405630329807, "grad_norm": 0.43056854605674744, "learning_rate": 0.0006655710764174198, "loss": 5.8642, "step": 162 }, { "epoch": 0.016952635364224804, "grad_norm": 1.3783445358276367, "learning_rate": 0.0006778964667214461, "loss": 5.845, "step": 165 }, { "epoch": 0.017260865098119797, "grad_norm": 0.77984619140625, "learning_rate": 0.0006902218570254725, "loss": 5.8216, "step": 168 }, { "epoch": 0.017569094832014794, "grad_norm": 0.2990098297595978, "learning_rate": 0.0007025472473294988, "loss": 5.7801, "step": 171 }, { "epoch": 0.01787732456590979, "grad_norm": 0.29485219717025757, "learning_rate": 0.000714872637633525, "loss": 5.7757, "step": 174 }, { "epoch": 0.018185554299804788, "grad_norm": 0.4363936483860016, "learning_rate": 0.0007271980279375513, "loss": 5.7598, "step": 177 }, { "epoch": 0.018493784033699785, "grad_norm": 0.8902605175971985, "learning_rate": 0.0007395234182415776, "loss": 5.7719, "step": 180 }, { "epoch": 0.018802013767594782, "grad_norm": 0.7133249044418335, "learning_rate": 0.0007518488085456039, "loss": 5.7568, "step": 183 }, { "epoch": 0.019110243501489776, "grad_norm": 0.9105846881866455, "learning_rate": 0.0007641741988496302, "loss": 5.7191, "step": 186 }, { "epoch": 0.019418473235384773, "grad_norm": 0.9680726528167725, "learning_rate": 0.0007764995891536565, "loss": 5.7077, "step": 189 }, { "epoch": 0.01972670296927977, "grad_norm": 0.535446047782898, "learning_rate": 0.0007888249794576829, "loss": 5.6982, "step": 192 }, { "epoch": 0.020034932703174767, "grad_norm": 0.7894541621208191, "learning_rate": 0.0008011503697617091, "loss": 5.668, "step": 195 }, { "epoch": 0.020343162437069764, "grad_norm": 0.6975138187408447, "learning_rate": 0.0008134757600657354, "loss": 5.6432, "step": 198 }, { "epoch": 0.02065139217096476, "grad_norm": 0.6306262016296387, "learning_rate": 0.0008258011503697617, "loss": 5.6647, "step": 201 }, { "epoch": 0.020959621904859754, "grad_norm": 0.5615081787109375, "learning_rate": 0.000838126540673788, "loss": 5.6208, "step": 204 }, { "epoch": 0.02126785163875475, "grad_norm": 0.6468993425369263, "learning_rate": 0.0008504519309778143, "loss": 5.6272, "step": 207 }, { "epoch": 0.02157608137264975, "grad_norm": 0.8359414339065552, "learning_rate": 0.0008627773212818406, "loss": 5.6114, "step": 210 }, { "epoch": 0.021884311106544745, "grad_norm": 0.8909689784049988, "learning_rate": 0.0008751027115858668, "loss": 5.5957, "step": 213 }, { "epoch": 0.022192540840439742, "grad_norm": 0.39673465490341187, "learning_rate": 0.0008874281018898932, "loss": 5.568, "step": 216 }, { "epoch": 0.022500770574334736, "grad_norm": 0.9037743806838989, "learning_rate": 0.0008997534921939195, "loss": 5.5746, "step": 219 }, { "epoch": 0.022809000308229733, "grad_norm": 0.6929497122764587, "learning_rate": 0.0009120788824979458, "loss": 5.5482, "step": 222 }, { "epoch": 0.02311723004212473, "grad_norm": 0.5773665308952332, "learning_rate": 0.0009244042728019721, "loss": 5.5389, "step": 225 }, { "epoch": 0.023425459776019727, "grad_norm": 0.9532020092010498, "learning_rate": 0.0009367296631059984, "loss": 5.5369, "step": 228 }, { "epoch": 0.023733689509914724, "grad_norm": 1.2347012758255005, "learning_rate": 0.0009490550534100246, "loss": 5.5251, "step": 231 }, { "epoch": 0.02404191924380972, "grad_norm": 1.2062091827392578, "learning_rate": 0.0009613804437140509, "loss": 5.5042, "step": 234 }, { "epoch": 0.024350148977704714, "grad_norm": 1.4920969009399414, "learning_rate": 0.0009737058340180772, "loss": 5.4851, "step": 237 }, { "epoch": 0.02465837871159971, "grad_norm": 0.5619600415229797, "learning_rate": 0.0009860312243221035, "loss": 5.4889, "step": 240 }, { "epoch": 0.02496660844549471, "grad_norm": 0.8607615828514099, "learning_rate": 0.0009983566146261299, "loss": 5.4747, "step": 243 }, { "epoch": 0.025274838179389705, "grad_norm": 0.6228588223457336, "learning_rate": 0.001010682004930156, "loss": 5.4502, "step": 246 }, { "epoch": 0.025583067913284702, "grad_norm": 1.1925005912780762, "learning_rate": 0.0010230073952341824, "loss": 5.4449, "step": 249 }, { "epoch": 0.0258912976471797, "grad_norm": 0.7956414818763733, "learning_rate": 0.0010353327855382086, "loss": 5.4623, "step": 252 }, { "epoch": 0.026199527381074693, "grad_norm": 0.654242992401123, "learning_rate": 0.001047658175842235, "loss": 5.4287, "step": 255 }, { "epoch": 0.02650775711496969, "grad_norm": 0.592880368232727, "learning_rate": 0.0010599835661462614, "loss": 5.3891, "step": 258 }, { "epoch": 0.026815986848864687, "grad_norm": 0.9015865921974182, "learning_rate": 0.0010723089564502876, "loss": 5.4127, "step": 261 }, { "epoch": 0.027124216582759684, "grad_norm": 0.593488335609436, "learning_rate": 0.001084634346754314, "loss": 5.3887, "step": 264 }, { "epoch": 0.02743244631665468, "grad_norm": 0.7008156180381775, "learning_rate": 0.0010969597370583401, "loss": 5.386, "step": 267 }, { "epoch": 0.027740676050549678, "grad_norm": 0.32653194665908813, "learning_rate": 0.0011092851273623665, "loss": 5.3479, "step": 270 }, { "epoch": 0.02804890578444467, "grad_norm": 0.551142692565918, "learning_rate": 0.0011216105176663927, "loss": 5.3613, "step": 273 }, { "epoch": 0.02835713551833967, "grad_norm": 2.3521084785461426, "learning_rate": 0.001133935907970419, "loss": 5.3653, "step": 276 }, { "epoch": 0.028665365252234665, "grad_norm": 1.3452407121658325, "learning_rate": 0.0011462612982744455, "loss": 5.3559, "step": 279 }, { "epoch": 0.028973594986129662, "grad_norm": 1.0670260190963745, "learning_rate": 0.0011585866885784717, "loss": 5.3299, "step": 282 }, { "epoch": 0.02928182472002466, "grad_norm": 0.7768902778625488, "learning_rate": 0.001170912078882498, "loss": 5.3346, "step": 285 }, { "epoch": 0.029590054453919656, "grad_norm": 0.48641496896743774, "learning_rate": 0.0011832374691865242, "loss": 5.3178, "step": 288 }, { "epoch": 0.02989828418781465, "grad_norm": 0.5284126400947571, "learning_rate": 0.0011955628594905506, "loss": 5.3061, "step": 291 }, { "epoch": 0.030206513921709647, "grad_norm": 0.9099608659744263, "learning_rate": 0.0012078882497945768, "loss": 5.2764, "step": 294 }, { "epoch": 0.030514743655604644, "grad_norm": 0.7352691888809204, "learning_rate": 0.0012202136400986032, "loss": 5.2853, "step": 297 }, { "epoch": 0.03082297338949964, "grad_norm": 0.8361043334007263, "learning_rate": 0.0012325390304026294, "loss": 5.2838, "step": 300 }, { "epoch": 0.031131203123394638, "grad_norm": 1.525067925453186, "learning_rate": 0.0012448644207066558, "loss": 5.2612, "step": 303 }, { "epoch": 0.03143943285728963, "grad_norm": 0.6117688417434692, "learning_rate": 0.001257189811010682, "loss": 5.2488, "step": 306 }, { "epoch": 0.03174766259118463, "grad_norm": 0.9976358413696289, "learning_rate": 0.0012695152013147083, "loss": 5.2327, "step": 309 }, { "epoch": 0.032055892325079625, "grad_norm": 0.8152816891670227, "learning_rate": 0.0012818405916187345, "loss": 5.2095, "step": 312 }, { "epoch": 0.03236412205897462, "grad_norm": 0.8640046715736389, "learning_rate": 0.0012941659819227609, "loss": 5.1932, "step": 315 }, { "epoch": 0.03267235179286962, "grad_norm": 0.9461572170257568, "learning_rate": 0.001306491372226787, "loss": 5.1822, "step": 318 }, { "epoch": 0.03298058152676461, "grad_norm": 0.7717807292938232, "learning_rate": 0.0013188167625308134, "loss": 5.183, "step": 321 }, { "epoch": 0.03328881126065961, "grad_norm": 0.9057526588439941, "learning_rate": 0.0013311421528348396, "loss": 5.1686, "step": 324 }, { "epoch": 0.03359704099455461, "grad_norm": 0.5352618098258972, "learning_rate": 0.001343467543138866, "loss": 5.1378, "step": 327 }, { "epoch": 0.03390527072844961, "grad_norm": 1.2399810552597046, "learning_rate": 0.0013557929334428922, "loss": 5.1436, "step": 330 }, { "epoch": 0.0342135004623446, "grad_norm": 0.6678963303565979, "learning_rate": 0.0013681183237469186, "loss": 5.1488, "step": 333 }, { "epoch": 0.034521730196239594, "grad_norm": 0.6166791915893555, "learning_rate": 0.001380443714050945, "loss": 5.1239, "step": 336 }, { "epoch": 0.034829959930134595, "grad_norm": 1.1305850744247437, "learning_rate": 0.0013927691043549711, "loss": 5.1145, "step": 339 }, { "epoch": 0.03513818966402959, "grad_norm": 0.46510085463523865, "learning_rate": 0.0014050944946589975, "loss": 5.1041, "step": 342 }, { "epoch": 0.03544641939792459, "grad_norm": 0.4835362136363983, "learning_rate": 0.0014174198849630237, "loss": 5.0699, "step": 345 }, { "epoch": 0.03575464913181958, "grad_norm": 0.6595330238342285, "learning_rate": 0.00142974527526705, "loss": 5.0744, "step": 348 }, { "epoch": 0.036062878865714576, "grad_norm": 0.7306437492370605, "learning_rate": 0.0014420706655710763, "loss": 5.0703, "step": 351 }, { "epoch": 0.036371108599609576, "grad_norm": 0.5263068079948425, "learning_rate": 0.0014543960558751027, "loss": 5.0666, "step": 354 }, { "epoch": 0.03667933833350457, "grad_norm": 0.5896726250648499, "learning_rate": 0.001466721446179129, "loss": 5.0326, "step": 357 }, { "epoch": 0.03698756806739957, "grad_norm": 0.9357500672340393, "learning_rate": 0.0014790468364831552, "loss": 5.0363, "step": 360 }, { "epoch": 0.037295797801294564, "grad_norm": 0.7629897594451904, "learning_rate": 0.0014913722267871816, "loss": 5.0304, "step": 363 }, { "epoch": 0.037604027535189564, "grad_norm": 0.6347280144691467, "learning_rate": 0.0015036976170912078, "loss": 4.9962, "step": 366 }, { "epoch": 0.03791225726908456, "grad_norm": 0.4810947775840759, "learning_rate": 0.0015160230073952342, "loss": 4.9856, "step": 369 }, { "epoch": 0.03822048700297955, "grad_norm": 0.5907162427902222, "learning_rate": 0.0015283483976992604, "loss": 4.9712, "step": 372 }, { "epoch": 0.03852871673687455, "grad_norm": 0.5781192183494568, "learning_rate": 0.0015406737880032868, "loss": 5.0078, "step": 375 }, { "epoch": 0.038836946470769546, "grad_norm": 0.6017566323280334, "learning_rate": 0.001552999178307313, "loss": 4.9563, "step": 378 }, { "epoch": 0.039145176204664546, "grad_norm": 1.208348035812378, "learning_rate": 0.0015653245686113393, "loss": 4.9696, "step": 381 }, { "epoch": 0.03945340593855954, "grad_norm": 0.6113926768302917, "learning_rate": 0.0015776499589153657, "loss": 4.9461, "step": 384 }, { "epoch": 0.03976163567245453, "grad_norm": 0.6794010996818542, "learning_rate": 0.0015899753492193919, "loss": 4.9668, "step": 387 }, { "epoch": 0.040069865406349534, "grad_norm": 0.4383271038532257, "learning_rate": 0.0016023007395234183, "loss": 4.9283, "step": 390 }, { "epoch": 0.04037809514024453, "grad_norm": 0.9564613699913025, "learning_rate": 0.0016146261298274444, "loss": 4.8814, "step": 393 }, { "epoch": 0.04068632487413953, "grad_norm": 0.6730177402496338, "learning_rate": 0.0016269515201314708, "loss": 4.9158, "step": 396 }, { "epoch": 0.04099455460803452, "grad_norm": 0.5306158661842346, "learning_rate": 0.001639276910435497, "loss": 4.904, "step": 399 }, { "epoch": 0.04130278434192952, "grad_norm": 0.48708540201187134, "learning_rate": 0.0016516023007395234, "loss": 4.9002, "step": 402 }, { "epoch": 0.041611014075824515, "grad_norm": 0.4917944371700287, "learning_rate": 0.0016639276910435496, "loss": 4.8913, "step": 405 }, { "epoch": 0.04191924380971951, "grad_norm": 1.0929678678512573, "learning_rate": 0.001676253081347576, "loss": 4.8986, "step": 408 }, { "epoch": 0.04222747354361451, "grad_norm": 0.5417898297309875, "learning_rate": 0.0016885784716516024, "loss": 4.8702, "step": 411 }, { "epoch": 0.0425357032775095, "grad_norm": 1.1427472829818726, "learning_rate": 0.0017009038619556285, "loss": 4.8396, "step": 414 }, { "epoch": 0.0428439330114045, "grad_norm": 0.8225170969963074, "learning_rate": 0.001713229252259655, "loss": 4.8439, "step": 417 }, { "epoch": 0.0431521627452995, "grad_norm": 0.5638198256492615, "learning_rate": 0.001725554642563681, "loss": 4.8271, "step": 420 }, { "epoch": 0.04346039247919449, "grad_norm": 0.3389821946620941, "learning_rate": 0.0017378800328677075, "loss": 4.8207, "step": 423 }, { "epoch": 0.04376862221308949, "grad_norm": 0.38620057702064514, "learning_rate": 0.0017502054231717337, "loss": 4.8082, "step": 426 }, { "epoch": 0.044076851946984484, "grad_norm": 1.1568442583084106, "learning_rate": 0.00176253081347576, "loss": 4.7946, "step": 429 }, { "epoch": 0.044385081680879485, "grad_norm": 0.650175154209137, "learning_rate": 0.0017748562037797865, "loss": 4.7798, "step": 432 }, { "epoch": 0.04469331141477448, "grad_norm": 0.5364396572113037, "learning_rate": 0.0017871815940838126, "loss": 4.7732, "step": 435 }, { "epoch": 0.04500154114866947, "grad_norm": 0.7013806700706482, "learning_rate": 0.001799506984387839, "loss": 4.7733, "step": 438 }, { "epoch": 0.04530977088256447, "grad_norm": 0.4559784233570099, "learning_rate": 0.0018118323746918652, "loss": 4.7789, "step": 441 }, { "epoch": 0.045618000616459466, "grad_norm": 0.3456243872642517, "learning_rate": 0.0018241577649958916, "loss": 4.7456, "step": 444 }, { "epoch": 0.045926230350354466, "grad_norm": 0.6245532631874084, "learning_rate": 0.0018364831552999178, "loss": 4.7408, "step": 447 }, { "epoch": 0.04623446008424946, "grad_norm": 1.1933598518371582, "learning_rate": 0.0018488085456039441, "loss": 4.7728, "step": 450 }, { "epoch": 0.04654268981814446, "grad_norm": 0.8743248581886292, "learning_rate": 0.0018611339359079703, "loss": 4.7595, "step": 453 }, { "epoch": 0.046850919552039454, "grad_norm": 0.4980567693710327, "learning_rate": 0.0018734593262119967, "loss": 4.7222, "step": 456 }, { "epoch": 0.04715914928593445, "grad_norm": 0.6380690932273865, "learning_rate": 0.001885784716516023, "loss": 4.7175, "step": 459 }, { "epoch": 0.04746737901982945, "grad_norm": 0.3606894612312317, "learning_rate": 0.0018981101068200493, "loss": 4.7075, "step": 462 }, { "epoch": 0.04777560875372444, "grad_norm": 0.5618919730186462, "learning_rate": 0.0019104354971240757, "loss": 4.6939, "step": 465 }, { "epoch": 0.04808383848761944, "grad_norm": 0.639410138130188, "learning_rate": 0.0019227608874281018, "loss": 4.6748, "step": 468 }, { "epoch": 0.048392068221514435, "grad_norm": 0.7849680185317993, "learning_rate": 0.0019350862777321282, "loss": 4.6895, "step": 471 }, { "epoch": 0.04870029795540943, "grad_norm": 0.5419800877571106, "learning_rate": 0.0019474116680361544, "loss": 4.64, "step": 474 }, { "epoch": 0.04900852768930443, "grad_norm": 0.40359726548194885, "learning_rate": 0.001959737058340181, "loss": 4.6564, "step": 477 }, { "epoch": 0.04931675742319942, "grad_norm": 0.742076575756073, "learning_rate": 0.001972062448644207, "loss": 4.6434, "step": 480 }, { "epoch": 0.04962498715709442, "grad_norm": 0.620801568031311, "learning_rate": 0.0019843878389482336, "loss": 4.6509, "step": 483 }, { "epoch": 0.04993321689098942, "grad_norm": 0.5293563008308411, "learning_rate": 0.0019967132292522598, "loss": 4.6459, "step": 486 }, { "epoch": 0.05024144662488442, "grad_norm": 0.7527710795402527, "learning_rate": 0.002009038619556286, "loss": 4.6557, "step": 489 }, { "epoch": 0.05054967635877941, "grad_norm": 0.47365424036979675, "learning_rate": 0.002021364009860312, "loss": 4.6223, "step": 492 }, { "epoch": 0.050857906092674404, "grad_norm": 0.5232967734336853, "learning_rate": 0.0020336894001643387, "loss": 4.6186, "step": 495 }, { "epoch": 0.051166135826569405, "grad_norm": 0.40717506408691406, "learning_rate": 0.002046014790468365, "loss": 4.6125, "step": 498 }, { "epoch": 0.0514743655604644, "grad_norm": 0.5403701066970825, "learning_rate": 0.002058340180772391, "loss": 4.6143, "step": 501 }, { "epoch": 0.0517825952943594, "grad_norm": 0.7209203839302063, "learning_rate": 0.0020706655710764172, "loss": 4.5713, "step": 504 }, { "epoch": 0.05209082502825439, "grad_norm": 0.6991008520126343, "learning_rate": 0.002082990961380444, "loss": 4.6044, "step": 507 }, { "epoch": 0.052399054762149386, "grad_norm": 0.7478086352348328, "learning_rate": 0.00209531635168447, "loss": 4.5685, "step": 510 }, { "epoch": 0.052707284496044386, "grad_norm": 0.5864932537078857, "learning_rate": 0.002107641741988496, "loss": 4.588, "step": 513 }, { "epoch": 0.05301551422993938, "grad_norm": 0.44748950004577637, "learning_rate": 0.002119967132292523, "loss": 4.5823, "step": 516 }, { "epoch": 0.05332374396383438, "grad_norm": 0.32787564396858215, "learning_rate": 0.002132292522596549, "loss": 4.5522, "step": 519 }, { "epoch": 0.053631973697729374, "grad_norm": 0.30747687816619873, "learning_rate": 0.002144617912900575, "loss": 4.5429, "step": 522 }, { "epoch": 0.05394020343162437, "grad_norm": 0.3548784554004669, "learning_rate": 0.0021569433032046013, "loss": 4.5207, "step": 525 }, { "epoch": 0.05424843316551937, "grad_norm": 0.6617491841316223, "learning_rate": 0.002169268693508628, "loss": 4.5373, "step": 528 }, { "epoch": 0.05455666289941436, "grad_norm": 0.9917429089546204, "learning_rate": 0.002181594083812654, "loss": 4.5504, "step": 531 }, { "epoch": 0.05486489263330936, "grad_norm": 0.6506537795066833, "learning_rate": 0.0021939194741166803, "loss": 4.5385, "step": 534 }, { "epoch": 0.055173122367204355, "grad_norm": 0.3738003075122833, "learning_rate": 0.002206244864420707, "loss": 4.5169, "step": 537 }, { "epoch": 0.055481352101099356, "grad_norm": 0.3488200008869171, "learning_rate": 0.002218570254724733, "loss": 4.5119, "step": 540 }, { "epoch": 0.05578958183499435, "grad_norm": 0.31217944622039795, "learning_rate": 0.0022308956450287592, "loss": 4.4796, "step": 543 }, { "epoch": 0.05609781156888934, "grad_norm": 0.26770153641700745, "learning_rate": 0.0022432210353327854, "loss": 4.4699, "step": 546 }, { "epoch": 0.05640604130278434, "grad_norm": 0.3656662702560425, "learning_rate": 0.002255546425636812, "loss": 4.4817, "step": 549 }, { "epoch": 0.05671427103667934, "grad_norm": 0.5845988392829895, "learning_rate": 0.002267871815940838, "loss": 4.4525, "step": 552 }, { "epoch": 0.05702250077057434, "grad_norm": 0.41006627678871155, "learning_rate": 0.0022801972062448644, "loss": 4.4649, "step": 555 }, { "epoch": 0.05733073050446933, "grad_norm": 1.2013694047927856, "learning_rate": 0.002292522596548891, "loss": 4.4666, "step": 558 }, { "epoch": 0.057638960238364324, "grad_norm": 0.6116489171981812, "learning_rate": 0.002304847986852917, "loss": 4.4853, "step": 561 }, { "epoch": 0.057947189972259325, "grad_norm": 0.30115845799446106, "learning_rate": 0.0023171733771569433, "loss": 4.4409, "step": 564 }, { "epoch": 0.05825541970615432, "grad_norm": 0.2863396108150482, "learning_rate": 0.0023294987674609695, "loss": 4.4358, "step": 567 }, { "epoch": 0.05856364944004932, "grad_norm": 0.3191300928592682, "learning_rate": 0.002341824157764996, "loss": 4.454, "step": 570 }, { "epoch": 0.05887187917394431, "grad_norm": 0.4280944764614105, "learning_rate": 0.0023541495480690223, "loss": 4.3943, "step": 573 }, { "epoch": 0.05918010890783931, "grad_norm": 0.49310484528541565, "learning_rate": 0.0023664749383730485, "loss": 4.4097, "step": 576 }, { "epoch": 0.059488338641734306, "grad_norm": 0.4923991858959198, "learning_rate": 0.002378800328677075, "loss": 4.4454, "step": 579 }, { "epoch": 0.0597965683756293, "grad_norm": 0.5043625235557556, "learning_rate": 0.0023911257189811012, "loss": 4.3975, "step": 582 }, { "epoch": 0.0601047981095243, "grad_norm": 0.5404270887374878, "learning_rate": 0.0024034511092851274, "loss": 4.3957, "step": 585 }, { "epoch": 0.060413027843419294, "grad_norm": 0.9954332709312439, "learning_rate": 0.0024157764995891536, "loss": 4.3864, "step": 588 }, { "epoch": 0.060721257577314294, "grad_norm": 0.3632584512233734, "learning_rate": 0.00242810188989318, "loss": 4.38, "step": 591 }, { "epoch": 0.06102948731120929, "grad_norm": 0.2620343267917633, "learning_rate": 0.0024404272801972064, "loss": 4.3538, "step": 594 }, { "epoch": 0.06133771704510428, "grad_norm": 0.25050923228263855, "learning_rate": 0.0024527526705012325, "loss": 4.351, "step": 597 }, { "epoch": 0.06164594677899928, "grad_norm": 0.27279627323150635, "learning_rate": 0.0024650780608052587, "loss": 4.3335, "step": 600 }, { "epoch": 0.061954176512894275, "grad_norm": 0.6038771271705627, "learning_rate": 0.0024774034511092853, "loss": 4.3409, "step": 603 }, { "epoch": 0.062262406246789276, "grad_norm": 0.6948337554931641, "learning_rate": 0.0024897288414133115, "loss": 4.3555, "step": 606 }, { "epoch": 0.06257063598068427, "grad_norm": 0.5086238980293274, "learning_rate": 0.0025020542317173377, "loss": 4.3491, "step": 609 }, { "epoch": 0.06287886571457926, "grad_norm": 0.475999116897583, "learning_rate": 0.002514379622021364, "loss": 4.3412, "step": 612 }, { "epoch": 0.06318709544847426, "grad_norm": 0.3968357741832733, "learning_rate": 0.0025267050123253905, "loss": 4.3139, "step": 615 }, { "epoch": 0.06349532518236926, "grad_norm": 0.6681760549545288, "learning_rate": 0.0025390304026294166, "loss": 4.2999, "step": 618 }, { "epoch": 0.06380355491626426, "grad_norm": 0.3453294634819031, "learning_rate": 0.002551355792933443, "loss": 4.2873, "step": 621 }, { "epoch": 0.06411178465015925, "grad_norm": 0.3346744775772095, "learning_rate": 0.002563681183237469, "loss": 4.2868, "step": 624 }, { "epoch": 0.06442001438405424, "grad_norm": 0.39689645171165466, "learning_rate": 0.0025760065735414956, "loss": 4.2846, "step": 627 }, { "epoch": 0.06472824411794924, "grad_norm": 0.4017212688922882, "learning_rate": 0.0025883319638455218, "loss": 4.2625, "step": 630 }, { "epoch": 0.06503647385184425, "grad_norm": 0.3414025902748108, "learning_rate": 0.0026006573541495484, "loss": 4.2657, "step": 633 }, { "epoch": 0.06534470358573924, "grad_norm": 0.4091610312461853, "learning_rate": 0.002612982744453574, "loss": 4.2414, "step": 636 }, { "epoch": 0.06565293331963423, "grad_norm": 0.3916926085948944, "learning_rate": 0.0026253081347576007, "loss": 4.1801, "step": 639 }, { "epoch": 0.06596116305352923, "grad_norm": 1.0324465036392212, "learning_rate": 0.002637633525061627, "loss": 4.2162, "step": 642 }, { "epoch": 0.06626939278742423, "grad_norm": 0.4595172107219696, "learning_rate": 0.0026499589153656535, "loss": 4.2352, "step": 645 }, { "epoch": 0.06657762252131923, "grad_norm": 0.3215947151184082, "learning_rate": 0.0026622843056696792, "loss": 4.1865, "step": 648 }, { "epoch": 0.06688585225521422, "grad_norm": 0.2739149034023285, "learning_rate": 0.002674609695973706, "loss": 4.1644, "step": 651 }, { "epoch": 0.06719408198910921, "grad_norm": 0.250794917345047, "learning_rate": 0.002686935086277732, "loss": 4.1164, "step": 654 }, { "epoch": 0.06750231172300421, "grad_norm": 0.38465654850006104, "learning_rate": 0.0026992604765817586, "loss": 4.0844, "step": 657 }, { "epoch": 0.06781054145689921, "grad_norm": 0.5341691970825195, "learning_rate": 0.0027115858668857844, "loss": 4.1149, "step": 660 }, { "epoch": 0.06811877119079421, "grad_norm": 0.3479110896587372, "learning_rate": 0.002723911257189811, "loss": 4.1186, "step": 663 }, { "epoch": 0.0684270009246892, "grad_norm": 1.026038646697998, "learning_rate": 0.002736236647493837, "loss": 4.1293, "step": 666 }, { "epoch": 0.0687352306585842, "grad_norm": 0.445689857006073, "learning_rate": 0.0027485620377978638, "loss": 4.1319, "step": 669 }, { "epoch": 0.06904346039247919, "grad_norm": 0.3061058819293976, "learning_rate": 0.00276088742810189, "loss": 4.0338, "step": 672 }, { "epoch": 0.0693516901263742, "grad_norm": 0.26792746782302856, "learning_rate": 0.002773212818405916, "loss": 4.0154, "step": 675 }, { "epoch": 0.06965991986026919, "grad_norm": 0.2843894064426422, "learning_rate": 0.0027855382087099423, "loss": 4.0364, "step": 678 }, { "epoch": 0.06996814959416418, "grad_norm": 0.3073459565639496, "learning_rate": 0.002797863599013969, "loss": 3.9836, "step": 681 }, { "epoch": 0.07027637932805918, "grad_norm": 0.5893545746803284, "learning_rate": 0.002810188989317995, "loss": 4.0062, "step": 684 }, { "epoch": 0.07058460906195417, "grad_norm": 0.5386547446250916, "learning_rate": 0.0028225143796220217, "loss": 4.0066, "step": 687 }, { "epoch": 0.07089283879584918, "grad_norm": 0.7944250106811523, "learning_rate": 0.0028348397699260474, "loss": 3.9816, "step": 690 }, { "epoch": 0.07120106852974417, "grad_norm": 0.32200196385383606, "learning_rate": 0.002847165160230074, "loss": 3.9551, "step": 693 }, { "epoch": 0.07150929826363916, "grad_norm": 0.28814995288848877, "learning_rate": 0.0028594905505341, "loss": 3.912, "step": 696 }, { "epoch": 0.07181752799753416, "grad_norm": 0.2727998197078705, "learning_rate": 0.002871815940838127, "loss": 3.9203, "step": 699 }, { "epoch": 0.07212575773142915, "grad_norm": 0.2785607576370239, "learning_rate": 0.0028841413311421525, "loss": 3.8865, "step": 702 }, { "epoch": 0.07243398746532416, "grad_norm": 0.4318368136882782, "learning_rate": 0.002896466721446179, "loss": 3.8364, "step": 705 }, { "epoch": 0.07274221719921915, "grad_norm": 0.5888954997062683, "learning_rate": 0.0029087921117502053, "loss": 3.8772, "step": 708 }, { "epoch": 0.07305044693311415, "grad_norm": 0.5866847634315491, "learning_rate": 0.002921117502054232, "loss": 3.9039, "step": 711 }, { "epoch": 0.07335867666700914, "grad_norm": 0.40300968289375305, "learning_rate": 0.002933442892358258, "loss": 3.8332, "step": 714 }, { "epoch": 0.07366690640090415, "grad_norm": 0.2894107401371002, "learning_rate": 0.0029457682826622843, "loss": 3.8533, "step": 717 }, { "epoch": 0.07397513613479914, "grad_norm": 0.2637479901313782, "learning_rate": 0.0029580936729663105, "loss": 3.7962, "step": 720 }, { "epoch": 0.07428336586869413, "grad_norm": 0.5004228353500366, "learning_rate": 0.002970419063270337, "loss": 3.7759, "step": 723 }, { "epoch": 0.07459159560258913, "grad_norm": 0.30835986137390137, "learning_rate": 0.0029827444535743632, "loss": 3.7819, "step": 726 }, { "epoch": 0.07489982533648412, "grad_norm": 0.5601911544799805, "learning_rate": 0.00299506984387839, "loss": 3.7716, "step": 729 }, { "epoch": 0.07520805507037913, "grad_norm": 0.48242396116256714, "learning_rate": 0.0030073952341824156, "loss": 3.803, "step": 732 }, { "epoch": 0.07551628480427412, "grad_norm": 0.355916827917099, "learning_rate": 0.003019720624486442, "loss": 3.7532, "step": 735 }, { "epoch": 0.07582451453816912, "grad_norm": 0.4205069839954376, "learning_rate": 0.0030320460147904684, "loss": 3.7657, "step": 738 }, { "epoch": 0.07613274427206411, "grad_norm": 0.35680562257766724, "learning_rate": 0.003044371405094495, "loss": 3.7348, "step": 741 }, { "epoch": 0.0764409740059591, "grad_norm": 0.36372673511505127, "learning_rate": 0.0030566967953985207, "loss": 3.7569, "step": 744 }, { "epoch": 0.07674920373985411, "grad_norm": 0.2887914776802063, "learning_rate": 0.0030690221857025473, "loss": 3.7704, "step": 747 }, { "epoch": 0.0770574334737491, "grad_norm": 0.255290687084198, "learning_rate": 0.0030813475760065735, "loss": 3.7054, "step": 750 }, { "epoch": 0.0773656632076441, "grad_norm": 0.2969897389411926, "learning_rate": 0.0030936729663106, "loss": 3.708, "step": 753 }, { "epoch": 0.07767389294153909, "grad_norm": 0.491763710975647, "learning_rate": 0.003105998356614626, "loss": 3.7279, "step": 756 }, { "epoch": 0.07798212267543408, "grad_norm": 0.6437285542488098, "learning_rate": 0.0031183237469186525, "loss": 3.7129, "step": 759 }, { "epoch": 0.07829035240932909, "grad_norm": 0.3605806827545166, "learning_rate": 0.0031306491372226786, "loss": 3.6754, "step": 762 }, { "epoch": 0.07859858214322409, "grad_norm": 0.26162126660346985, "learning_rate": 0.0031429745275267052, "loss": 3.6869, "step": 765 }, { "epoch": 0.07890681187711908, "grad_norm": 0.3107220530509949, "learning_rate": 0.0031552999178307314, "loss": 3.6278, "step": 768 }, { "epoch": 0.07921504161101407, "grad_norm": 0.30417200922966003, "learning_rate": 0.0031676253081347576, "loss": 3.6046, "step": 771 }, { "epoch": 0.07952327134490907, "grad_norm": 0.5612326860427856, "learning_rate": 0.0031799506984387838, "loss": 3.6578, "step": 774 }, { "epoch": 0.07983150107880407, "grad_norm": 0.6136355996131897, "learning_rate": 0.0031922760887428104, "loss": 3.6897, "step": 777 }, { "epoch": 0.08013973081269907, "grad_norm": 0.4560060501098633, "learning_rate": 0.0032046014790468366, "loss": 3.6976, "step": 780 }, { "epoch": 0.08044796054659406, "grad_norm": 0.23871034383773804, "learning_rate": 0.003216926869350863, "loss": 3.657, "step": 783 }, { "epoch": 0.08075619028048905, "grad_norm": 0.17063000798225403, "learning_rate": 0.003229252259654889, "loss": 3.5905, "step": 786 }, { "epoch": 0.08106442001438405, "grad_norm": 0.35351842641830444, "learning_rate": 0.0032415776499589155, "loss": 3.603, "step": 789 }, { "epoch": 0.08137264974827906, "grad_norm": 0.340762197971344, "learning_rate": 0.0032539030402629417, "loss": 3.5978, "step": 792 }, { "epoch": 0.08168087948217405, "grad_norm": 0.22542034089565277, "learning_rate": 0.0032662284305669683, "loss": 3.5821, "step": 795 }, { "epoch": 0.08198910921606904, "grad_norm": 0.25130555033683777, "learning_rate": 0.003278553820870994, "loss": 3.5491, "step": 798 }, { "epoch": 0.08229733894996404, "grad_norm": 0.5155714750289917, "learning_rate": 0.0032908792111750206, "loss": 3.5605, "step": 801 }, { "epoch": 0.08260556868385904, "grad_norm": 0.3964254856109619, "learning_rate": 0.003303204601479047, "loss": 3.581, "step": 804 }, { "epoch": 0.08291379841775404, "grad_norm": 0.27110666036605835, "learning_rate": 0.0033155299917830734, "loss": 3.5995, "step": 807 }, { "epoch": 0.08322202815164903, "grad_norm": 0.38535767793655396, "learning_rate": 0.003327855382087099, "loss": 3.6029, "step": 810 }, { "epoch": 0.08353025788554402, "grad_norm": 0.6176712512969971, "learning_rate": 0.0033401807723911258, "loss": 3.5694, "step": 813 }, { "epoch": 0.08383848761943902, "grad_norm": 0.33828550577163696, "learning_rate": 0.003352506162695152, "loss": 3.5507, "step": 816 }, { "epoch": 0.08414671735333402, "grad_norm": 0.2286808043718338, "learning_rate": 0.0033648315529991786, "loss": 3.5345, "step": 819 }, { "epoch": 0.08445494708722902, "grad_norm": 0.30232542753219604, "learning_rate": 0.0033771569433032047, "loss": 3.5154, "step": 822 }, { "epoch": 0.08476317682112401, "grad_norm": 0.31767842173576355, "learning_rate": 0.0033894823336072313, "loss": 3.5442, "step": 825 }, { "epoch": 0.085071406555019, "grad_norm": 0.4275444746017456, "learning_rate": 0.003401807723911257, "loss": 3.5715, "step": 828 }, { "epoch": 0.085379636288914, "grad_norm": 0.3426364064216614, "learning_rate": 0.0034141331142152837, "loss": 3.5224, "step": 831 }, { "epoch": 0.085687866022809, "grad_norm": 0.33871403336524963, "learning_rate": 0.00342645850451931, "loss": 3.5119, "step": 834 }, { "epoch": 0.085996095756704, "grad_norm": 0.2641143202781677, "learning_rate": 0.0034387838948233365, "loss": 3.5179, "step": 837 }, { "epoch": 0.086304325490599, "grad_norm": 0.22955679893493652, "learning_rate": 0.003451109285127362, "loss": 3.4807, "step": 840 }, { "epoch": 0.08661255522449399, "grad_norm": 0.3795819878578186, "learning_rate": 0.003463434675431389, "loss": 3.4916, "step": 843 }, { "epoch": 0.08692078495838898, "grad_norm": 0.2942325174808502, "learning_rate": 0.003475760065735415, "loss": 3.4601, "step": 846 }, { "epoch": 0.08722901469228399, "grad_norm": 0.49732574820518494, "learning_rate": 0.0034880854560394416, "loss": 3.5021, "step": 849 }, { "epoch": 0.08753724442617898, "grad_norm": 0.4395911991596222, "learning_rate": 0.0035004108463434673, "loss": 3.4976, "step": 852 }, { "epoch": 0.08784547416007397, "grad_norm": 0.24201816320419312, "learning_rate": 0.003512736236647494, "loss": 3.4648, "step": 855 }, { "epoch": 0.08815370389396897, "grad_norm": 0.32818078994750977, "learning_rate": 0.00352506162695152, "loss": 3.4826, "step": 858 }, { "epoch": 0.08846193362786396, "grad_norm": 0.4433400630950928, "learning_rate": 0.0035373870172555467, "loss": 3.4763, "step": 861 }, { "epoch": 0.08877016336175897, "grad_norm": 0.2911035716533661, "learning_rate": 0.003549712407559573, "loss": 3.5024, "step": 864 }, { "epoch": 0.08907839309565396, "grad_norm": 0.27419009804725647, "learning_rate": 0.003562037797863599, "loss": 3.4382, "step": 867 }, { "epoch": 0.08938662282954896, "grad_norm": 0.2970244586467743, "learning_rate": 0.0035743631881676253, "loss": 3.4362, "step": 870 }, { "epoch": 0.08969485256344395, "grad_norm": 0.34221401810646057, "learning_rate": 0.003586688578471652, "loss": 3.4469, "step": 873 }, { "epoch": 0.09000308229733894, "grad_norm": 0.31807199120521545, "learning_rate": 0.003599013968775678, "loss": 3.3974, "step": 876 }, { "epoch": 0.09031131203123395, "grad_norm": 0.31519362330436707, "learning_rate": 0.0036113393590797046, "loss": 3.4275, "step": 879 }, { "epoch": 0.09061954176512894, "grad_norm": 0.5152423977851868, "learning_rate": 0.0036236647493837304, "loss": 3.4468, "step": 882 }, { "epoch": 0.09092777149902394, "grad_norm": 0.32447418570518494, "learning_rate": 0.003635990139687757, "loss": 3.4505, "step": 885 }, { "epoch": 0.09123600123291893, "grad_norm": 0.19884614646434784, "learning_rate": 0.003648315529991783, "loss": 3.4228, "step": 888 }, { "epoch": 0.09154423096681394, "grad_norm": 0.2726935148239136, "learning_rate": 0.0036606409202958098, "loss": 3.3957, "step": 891 }, { "epoch": 0.09185246070070893, "grad_norm": 0.29470425844192505, "learning_rate": 0.0036729663105998355, "loss": 3.3813, "step": 894 }, { "epoch": 0.09216069043460393, "grad_norm": 0.27806392312049866, "learning_rate": 0.003685291700903862, "loss": 3.3871, "step": 897 }, { "epoch": 0.09246892016849892, "grad_norm": 0.23773950338363647, "learning_rate": 0.0036976170912078883, "loss": 3.3941, "step": 900 }, { "epoch": 0.09277714990239391, "grad_norm": 0.45804303884506226, "learning_rate": 0.003709942481511915, "loss": 3.3752, "step": 903 }, { "epoch": 0.09308537963628892, "grad_norm": 0.45320865511894226, "learning_rate": 0.0037222678718159406, "loss": 3.4068, "step": 906 }, { "epoch": 0.09339360937018391, "grad_norm": 0.277089387178421, "learning_rate": 0.0037345932621199673, "loss": 3.4052, "step": 909 }, { "epoch": 0.09370183910407891, "grad_norm": 0.26548513770103455, "learning_rate": 0.0037469186524239934, "loss": 3.3753, "step": 912 }, { "epoch": 0.0940100688379739, "grad_norm": 0.24219335615634918, "learning_rate": 0.00375924404272802, "loss": 3.3913, "step": 915 }, { "epoch": 0.0943182985718689, "grad_norm": 0.2855617105960846, "learning_rate": 0.003771569433032046, "loss": 3.3636, "step": 918 }, { "epoch": 0.0946265283057639, "grad_norm": 0.35244864225387573, "learning_rate": 0.003783894823336073, "loss": 3.3603, "step": 921 }, { "epoch": 0.0949347580396589, "grad_norm": 0.3226896822452545, "learning_rate": 0.0037962202136400986, "loss": 3.3267, "step": 924 }, { "epoch": 0.09524298777355389, "grad_norm": 0.279863178730011, "learning_rate": 0.003808545603944125, "loss": 3.3192, "step": 927 }, { "epoch": 0.09555121750744888, "grad_norm": 0.35309404134750366, "learning_rate": 0.0038208709942481513, "loss": 3.2978, "step": 930 }, { "epoch": 0.09585944724134388, "grad_norm": 0.2359645515680313, "learning_rate": 0.003833196384552178, "loss": 3.3627, "step": 933 }, { "epoch": 0.09616767697523888, "grad_norm": 0.22583429515361786, "learning_rate": 0.0038455217748562037, "loss": 3.2669, "step": 936 }, { "epoch": 0.09647590670913388, "grad_norm": 0.2914174199104309, "learning_rate": 0.0038578471651602303, "loss": 3.3238, "step": 939 }, { "epoch": 0.09678413644302887, "grad_norm": 0.37748411297798157, "learning_rate": 0.0038701725554642565, "loss": 3.3232, "step": 942 }, { "epoch": 0.09709236617692386, "grad_norm": 0.28686878085136414, "learning_rate": 0.003882497945768283, "loss": 3.3143, "step": 945 }, { "epoch": 0.09740059591081886, "grad_norm": 0.22591544687747955, "learning_rate": 0.003894823336072309, "loss": 3.3285, "step": 948 }, { "epoch": 0.09770882564471386, "grad_norm": 0.24365665018558502, "learning_rate": 0.003907148726376336, "loss": 3.2799, "step": 951 }, { "epoch": 0.09801705537860886, "grad_norm": 0.3929263651371002, "learning_rate": 0.003919474116680362, "loss": 3.29, "step": 954 }, { "epoch": 0.09832528511250385, "grad_norm": 0.20268237590789795, "learning_rate": 0.003931799506984388, "loss": 3.2412, "step": 957 }, { "epoch": 0.09863351484639885, "grad_norm": 0.3333010673522949, "learning_rate": 0.003944124897288414, "loss": 3.2523, "step": 960 }, { "epoch": 0.09894174458029384, "grad_norm": 0.32760193943977356, "learning_rate": 0.0039564502875924406, "loss": 3.2958, "step": 963 }, { "epoch": 0.09924997431418885, "grad_norm": 0.27670565247535706, "learning_rate": 0.003968775677896467, "loss": 3.2683, "step": 966 }, { "epoch": 0.09955820404808384, "grad_norm": 0.32110410928726196, "learning_rate": 0.003981101068200493, "loss": 3.2576, "step": 969 }, { "epoch": 0.09986643378197883, "grad_norm": 0.43541696667671204, "learning_rate": 0.0039934264585045195, "loss": 3.2924, "step": 972 }, { "epoch": 0.10017466351587383, "grad_norm": 0.3483084738254547, "learning_rate": 0.004005751848808546, "loss": 3.2936, "step": 975 }, { "epoch": 0.10048289324976883, "grad_norm": 0.29586124420166016, "learning_rate": 0.004018077239112572, "loss": 3.2511, "step": 978 }, { "epoch": 0.10079112298366383, "grad_norm": 0.21434040367603302, "learning_rate": 0.0040304026294165985, "loss": 3.242, "step": 981 }, { "epoch": 0.10109935271755882, "grad_norm": 0.35204213857650757, "learning_rate": 0.004042728019720624, "loss": 3.2156, "step": 984 }, { "epoch": 0.10140758245145381, "grad_norm": 0.25223758816719055, "learning_rate": 0.004055053410024651, "loss": 3.257, "step": 987 }, { "epoch": 0.10171581218534881, "grad_norm": 0.2969653010368347, "learning_rate": 0.004067378800328677, "loss": 3.2576, "step": 990 }, { "epoch": 0.10202404191924382, "grad_norm": 0.26683250069618225, "learning_rate": 0.004079704190632704, "loss": 3.1998, "step": 993 }, { "epoch": 0.10233227165313881, "grad_norm": 0.26404044032096863, "learning_rate": 0.00409202958093673, "loss": 3.2303, "step": 996 }, { "epoch": 0.1026405013870338, "grad_norm": 0.2442736029624939, "learning_rate": 0.004104354971240756, "loss": 3.2428, "step": 999 }, { "epoch": 0.1029487311209288, "grad_norm": 0.2192964255809784, "learning_rate": 0.004116680361544782, "loss": 3.2661, "step": 1002 }, { "epoch": 0.10325696085482379, "grad_norm": 0.21057608723640442, "learning_rate": 0.004129005751848809, "loss": 3.1995, "step": 1005 }, { "epoch": 0.1035651905887188, "grad_norm": 0.3122745454311371, "learning_rate": 0.0041413311421528345, "loss": 3.2104, "step": 1008 }, { "epoch": 0.10387342032261379, "grad_norm": 0.643337607383728, "learning_rate": 0.004153656532456861, "loss": 3.2196, "step": 1011 }, { "epoch": 0.10418165005650878, "grad_norm": 0.265302449464798, "learning_rate": 0.004165981922760888, "loss": 3.2163, "step": 1014 }, { "epoch": 0.10448987979040378, "grad_norm": 0.27250421047210693, "learning_rate": 0.004178307313064914, "loss": 3.1781, "step": 1017 }, { "epoch": 0.10479810952429877, "grad_norm": 0.3951704800128937, "learning_rate": 0.00419063270336894, "loss": 3.2405, "step": 1020 }, { "epoch": 0.10510633925819378, "grad_norm": 0.20837850868701935, "learning_rate": 0.004202958093672967, "loss": 3.2269, "step": 1023 }, { "epoch": 0.10541456899208877, "grad_norm": 0.3887670338153839, "learning_rate": 0.004215283483976992, "loss": 3.219, "step": 1026 }, { "epoch": 0.10572279872598377, "grad_norm": 0.18901754915714264, "learning_rate": 0.004227608874281019, "loss": 3.1759, "step": 1029 }, { "epoch": 0.10603102845987876, "grad_norm": 0.3570176362991333, "learning_rate": 0.004239934264585046, "loss": 3.1544, "step": 1032 }, { "epoch": 0.10633925819377375, "grad_norm": 0.2346538007259369, "learning_rate": 0.004252259654889072, "loss": 3.1834, "step": 1035 }, { "epoch": 0.10664748792766876, "grad_norm": 0.1956055760383606, "learning_rate": 0.004264585045193098, "loss": 3.1597, "step": 1038 }, { "epoch": 0.10695571766156375, "grad_norm": 0.19475719332695007, "learning_rate": 0.0042769104354971246, "loss": 3.1818, "step": 1041 }, { "epoch": 0.10726394739545875, "grad_norm": 0.20991206169128418, "learning_rate": 0.00428923582580115, "loss": 3.148, "step": 1044 }, { "epoch": 0.10757217712935374, "grad_norm": 0.45754027366638184, "learning_rate": 0.004301561216105177, "loss": 3.1838, "step": 1047 }, { "epoch": 0.10788040686324873, "grad_norm": 0.2500004470348358, "learning_rate": 0.004313886606409203, "loss": 3.158, "step": 1050 }, { "epoch": 0.10818863659714374, "grad_norm": 0.29174116253852844, "learning_rate": 0.004326211996713229, "loss": 3.1619, "step": 1053 }, { "epoch": 0.10849686633103874, "grad_norm": 0.1642913520336151, "learning_rate": 0.004338537387017256, "loss": 3.1313, "step": 1056 }, { "epoch": 0.10880509606493373, "grad_norm": 0.20638629794120789, "learning_rate": 0.004350862777321282, "loss": 3.1553, "step": 1059 }, { "epoch": 0.10911332579882872, "grad_norm": 0.2534577548503876, "learning_rate": 0.004363188167625308, "loss": 3.146, "step": 1062 }, { "epoch": 0.10942155553272373, "grad_norm": 0.3894107937812805, "learning_rate": 0.004375513557929334, "loss": 3.1702, "step": 1065 }, { "epoch": 0.10972978526661872, "grad_norm": 0.18316411972045898, "learning_rate": 0.0043878389482333606, "loss": 3.1306, "step": 1068 }, { "epoch": 0.11003801500051372, "grad_norm": 0.22901946306228638, "learning_rate": 0.004400164338537387, "loss": 3.1012, "step": 1071 }, { "epoch": 0.11034624473440871, "grad_norm": 0.3013692796230316, "learning_rate": 0.004412489728841414, "loss": 3.1266, "step": 1074 }, { "epoch": 0.1106544744683037, "grad_norm": 0.26568275690078735, "learning_rate": 0.0044248151191454395, "loss": 3.1161, "step": 1077 }, { "epoch": 0.11096270420219871, "grad_norm": 0.23559318482875824, "learning_rate": 0.004437140509449466, "loss": 3.125, "step": 1080 }, { "epoch": 0.1112709339360937, "grad_norm": 0.29804936051368713, "learning_rate": 0.004449465899753492, "loss": 3.1212, "step": 1083 }, { "epoch": 0.1115791636699887, "grad_norm": 0.2965604066848755, "learning_rate": 0.0044617912900575185, "loss": 3.1435, "step": 1086 }, { "epoch": 0.11188739340388369, "grad_norm": 0.22977206110954285, "learning_rate": 0.004474116680361544, "loss": 3.1355, "step": 1089 }, { "epoch": 0.11219562313777869, "grad_norm": 0.2511363923549652, "learning_rate": 0.004486442070665571, "loss": 3.1041, "step": 1092 }, { "epoch": 0.11250385287167369, "grad_norm": 0.13533104956150055, "learning_rate": 0.004498767460969597, "loss": 3.1006, "step": 1095 }, { "epoch": 0.11281208260556869, "grad_norm": 0.1323193609714508, "learning_rate": 0.004511092851273624, "loss": 3.0623, "step": 1098 }, { "epoch": 0.11312031233946368, "grad_norm": 0.24355067312717438, "learning_rate": 0.00452341824157765, "loss": 3.109, "step": 1101 }, { "epoch": 0.11342854207335867, "grad_norm": 0.45989617705345154, "learning_rate": 0.004535743631881676, "loss": 3.1102, "step": 1104 }, { "epoch": 0.11373677180725367, "grad_norm": 0.27389761805534363, "learning_rate": 0.004548069022185702, "loss": 3.1058, "step": 1107 }, { "epoch": 0.11404500154114867, "grad_norm": 0.3120715320110321, "learning_rate": 0.004560394412489729, "loss": 3.0936, "step": 1110 }, { "epoch": 0.11435323127504367, "grad_norm": 0.3641244173049927, "learning_rate": 0.004572719802793755, "loss": 3.0895, "step": 1113 }, { "epoch": 0.11466146100893866, "grad_norm": 0.16439078748226166, "learning_rate": 0.004585045193097782, "loss": 3.0697, "step": 1116 }, { "epoch": 0.11496969074283366, "grad_norm": 0.21766935288906097, "learning_rate": 0.004597370583401808, "loss": 3.0952, "step": 1119 }, { "epoch": 0.11527792047672865, "grad_norm": 0.1682632714509964, "learning_rate": 0.004609695973705834, "loss": 3.0644, "step": 1122 }, { "epoch": 0.11558615021062366, "grad_norm": 0.18391060829162598, "learning_rate": 0.00462202136400986, "loss": 3.0565, "step": 1125 }, { "epoch": 0.11589437994451865, "grad_norm": 0.2503467798233032, "learning_rate": 0.004634346754313887, "loss": 3.0798, "step": 1128 }, { "epoch": 0.11620260967841364, "grad_norm": 0.3139159083366394, "learning_rate": 0.004646672144617912, "loss": 3.0784, "step": 1131 }, { "epoch": 0.11651083941230864, "grad_norm": 0.2205217182636261, "learning_rate": 0.004658997534921939, "loss": 3.0696, "step": 1134 }, { "epoch": 0.11681906914620364, "grad_norm": 0.322355180978775, "learning_rate": 0.004671322925225966, "loss": 3.0811, "step": 1137 }, { "epoch": 0.11712729888009864, "grad_norm": 0.27023863792419434, "learning_rate": 0.004683648315529992, "loss": 3.0955, "step": 1140 }, { "epoch": 0.11743552861399363, "grad_norm": 0.2672137916088104, "learning_rate": 0.004695973705834018, "loss": 3.0584, "step": 1143 }, { "epoch": 0.11774375834788862, "grad_norm": 0.271323561668396, "learning_rate": 0.0047082990961380446, "loss": 3.0483, "step": 1146 }, { "epoch": 0.11805198808178362, "grad_norm": 0.1428508758544922, "learning_rate": 0.00472062448644207, "loss": 3.0661, "step": 1149 }, { "epoch": 0.11836021781567863, "grad_norm": 0.29395970702171326, "learning_rate": 0.004732949876746097, "loss": 3.0391, "step": 1152 }, { "epoch": 0.11866844754957362, "grad_norm": 0.22083403170108795, "learning_rate": 0.0047452752670501235, "loss": 3.0579, "step": 1155 }, { "epoch": 0.11897667728346861, "grad_norm": 0.2015424370765686, "learning_rate": 0.00475760065735415, "loss": 3.0356, "step": 1158 }, { "epoch": 0.1192849070173636, "grad_norm": 0.21997034549713135, "learning_rate": 0.004769926047658176, "loss": 3.0301, "step": 1161 }, { "epoch": 0.1195931367512586, "grad_norm": 0.16206422448158264, "learning_rate": 0.0047822514379622025, "loss": 3.0407, "step": 1164 }, { "epoch": 0.11990136648515361, "grad_norm": 0.22591377794742584, "learning_rate": 0.004794576828266228, "loss": 3.0414, "step": 1167 }, { "epoch": 0.1202095962190486, "grad_norm": 0.2582632601261139, "learning_rate": 0.004806902218570255, "loss": 3.0148, "step": 1170 }, { "epoch": 0.1205178259529436, "grad_norm": 0.273416131734848, "learning_rate": 0.004819227608874281, "loss": 3.0023, "step": 1173 }, { "epoch": 0.12082605568683859, "grad_norm": 0.16373753547668457, "learning_rate": 0.004831552999178307, "loss": 3.0127, "step": 1176 }, { "epoch": 0.12113428542073358, "grad_norm": 0.2623594105243683, "learning_rate": 0.004843878389482334, "loss": 3.0635, "step": 1179 }, { "epoch": 0.12144251515462859, "grad_norm": 0.34809616208076477, "learning_rate": 0.00485620377978636, "loss": 3.0222, "step": 1182 }, { "epoch": 0.12175074488852358, "grad_norm": 0.23841938376426697, "learning_rate": 0.004868529170090386, "loss": 3.019, "step": 1185 }, { "epoch": 0.12205897462241858, "grad_norm": 0.2161986231803894, "learning_rate": 0.004880854560394413, "loss": 2.9934, "step": 1188 }, { "epoch": 0.12236720435631357, "grad_norm": 0.2870507836341858, "learning_rate": 0.0048931799506984385, "loss": 3.0438, "step": 1191 }, { "epoch": 0.12267543409020856, "grad_norm": 0.20796675980091095, "learning_rate": 0.004905505341002465, "loss": 2.9947, "step": 1194 }, { "epoch": 0.12298366382410357, "grad_norm": 0.1762983798980713, "learning_rate": 0.004917830731306492, "loss": 2.9729, "step": 1197 }, { "epoch": 0.12329189355799856, "grad_norm": 0.1240881159901619, "learning_rate": 0.0049301561216105174, "loss": 3.0149, "step": 1200 }, { "epoch": 0.12360012329189356, "grad_norm": 0.16968263685703278, "learning_rate": 0.004942481511914544, "loss": 2.9944, "step": 1203 }, { "epoch": 0.12390835302578855, "grad_norm": 0.1743592470884323, "learning_rate": 0.004954806902218571, "loss": 2.9947, "step": 1206 }, { "epoch": 0.12421658275968354, "grad_norm": 0.29677319526672363, "learning_rate": 0.004967132292522596, "loss": 2.9922, "step": 1209 }, { "epoch": 0.12452481249357855, "grad_norm": 0.273882657289505, "learning_rate": 0.004979457682826623, "loss": 2.9698, "step": 1212 }, { "epoch": 0.12483304222747355, "grad_norm": 0.3060019910335541, "learning_rate": 0.004991783073130649, "loss": 2.9925, "step": 1215 }, { "epoch": 0.12514127196136854, "grad_norm": 0.13856515288352966, "learning_rate": 0.005004108463434675, "loss": 3.0212, "step": 1218 }, { "epoch": 0.12544950169526353, "grad_norm": 0.12940354645252228, "learning_rate": 0.005016433853738702, "loss": 2.9472, "step": 1221 }, { "epoch": 0.12575773142915853, "grad_norm": 0.15493866801261902, "learning_rate": 0.005028759244042728, "loss": 2.9859, "step": 1224 }, { "epoch": 0.12606596116305352, "grad_norm": 0.4994816184043884, "learning_rate": 0.005041084634346754, "loss": 2.949, "step": 1227 }, { "epoch": 0.1263741908969485, "grad_norm": 0.37235137820243835, "learning_rate": 0.005053410024650781, "loss": 3.006, "step": 1230 }, { "epoch": 0.12668242063084353, "grad_norm": 0.24599948525428772, "learning_rate": 0.0050657354149548075, "loss": 2.9954, "step": 1233 }, { "epoch": 0.12699065036473853, "grad_norm": 0.1838703751564026, "learning_rate": 0.005078060805258833, "loss": 2.9886, "step": 1236 }, { "epoch": 0.12729888009863352, "grad_norm": 0.19366377592086792, "learning_rate": 0.005090386195562859, "loss": 2.9715, "step": 1239 }, { "epoch": 0.12760710983252851, "grad_norm": 0.11911759525537491, "learning_rate": 0.005102711585866886, "loss": 2.965, "step": 1242 }, { "epoch": 0.1279153395664235, "grad_norm": 0.12456653267145157, "learning_rate": 0.005115036976170912, "loss": 2.9343, "step": 1245 }, { "epoch": 0.1282235693003185, "grad_norm": 0.322380393743515, "learning_rate": 0.005127362366474938, "loss": 2.9604, "step": 1248 }, { "epoch": 0.1285317990342135, "grad_norm": 0.40975773334503174, "learning_rate": 0.005139687756778965, "loss": 2.9386, "step": 1251 }, { "epoch": 0.1288400287681085, "grad_norm": 0.2045045793056488, "learning_rate": 0.005152013147082991, "loss": 2.9459, "step": 1254 }, { "epoch": 0.12914825850200348, "grad_norm": 0.20005717873573303, "learning_rate": 0.005164338537387018, "loss": 2.9631, "step": 1257 }, { "epoch": 0.12945648823589848, "grad_norm": 0.18930204212665558, "learning_rate": 0.0051766639276910435, "loss": 2.9014, "step": 1260 }, { "epoch": 0.1297647179697935, "grad_norm": 0.3180810213088989, "learning_rate": 0.00518898931799507, "loss": 2.9242, "step": 1263 }, { "epoch": 0.1300729477036885, "grad_norm": 0.17843572795391083, "learning_rate": 0.005201314708299097, "loss": 2.9063, "step": 1266 }, { "epoch": 0.13038117743758348, "grad_norm": 0.12591248750686646, "learning_rate": 0.005213640098603123, "loss": 2.9095, "step": 1269 }, { "epoch": 0.13068940717147848, "grad_norm": 0.17976878583431244, "learning_rate": 0.005225965488907148, "loss": 2.928, "step": 1272 }, { "epoch": 0.13099763690537347, "grad_norm": 0.16759532690048218, "learning_rate": 0.005238290879211175, "loss": 2.9202, "step": 1275 }, { "epoch": 0.13130586663926846, "grad_norm": 0.27441859245300293, "learning_rate": 0.0052506162695152014, "loss": 2.9242, "step": 1278 }, { "epoch": 0.13161409637316346, "grad_norm": 0.23654502630233765, "learning_rate": 0.005262941659819228, "loss": 2.9175, "step": 1281 }, { "epoch": 0.13192232610705845, "grad_norm": 0.3399145007133484, "learning_rate": 0.005275267050123254, "loss": 2.9277, "step": 1284 }, { "epoch": 0.13223055584095345, "grad_norm": 0.199320450425148, "learning_rate": 0.00528759244042728, "loss": 2.9184, "step": 1287 }, { "epoch": 0.13253878557484847, "grad_norm": 0.16563403606414795, "learning_rate": 0.005299917830731307, "loss": 2.9166, "step": 1290 }, { "epoch": 0.13284701530874346, "grad_norm": 0.18119758367538452, "learning_rate": 0.005312243221035334, "loss": 2.9239, "step": 1293 }, { "epoch": 0.13315524504263845, "grad_norm": 0.1558375358581543, "learning_rate": 0.0053245686113393585, "loss": 2.9028, "step": 1296 }, { "epoch": 0.13346347477653345, "grad_norm": 0.36665746569633484, "learning_rate": 0.005336894001643385, "loss": 2.9081, "step": 1299 }, { "epoch": 0.13377170451042844, "grad_norm": 0.186012864112854, "learning_rate": 0.005349219391947412, "loss": 2.8836, "step": 1302 }, { "epoch": 0.13407993424432343, "grad_norm": 0.14102259278297424, "learning_rate": 0.005361544782251438, "loss": 2.8906, "step": 1305 }, { "epoch": 0.13438816397821843, "grad_norm": 0.12519022822380066, "learning_rate": 0.005373870172555464, "loss": 2.9148, "step": 1308 }, { "epoch": 0.13469639371211342, "grad_norm": 0.14027029275894165, "learning_rate": 0.005386195562859491, "loss": 2.9108, "step": 1311 }, { "epoch": 0.13500462344600841, "grad_norm": 0.2553085684776306, "learning_rate": 0.005398520953163517, "loss": 2.8837, "step": 1314 }, { "epoch": 0.1353128531799034, "grad_norm": 0.2809675335884094, "learning_rate": 0.005410846343467544, "loss": 2.8795, "step": 1317 }, { "epoch": 0.13562108291379843, "grad_norm": 0.19451378285884857, "learning_rate": 0.005423171733771569, "loss": 2.8648, "step": 1320 }, { "epoch": 0.13592931264769342, "grad_norm": 0.22285006940364838, "learning_rate": 0.005435497124075595, "loss": 2.8994, "step": 1323 }, { "epoch": 0.13623754238158842, "grad_norm": 0.14703693985939026, "learning_rate": 0.005447822514379622, "loss": 2.8984, "step": 1326 }, { "epoch": 0.1365457721154834, "grad_norm": 0.23260341584682465, "learning_rate": 0.005460147904683649, "loss": 2.863, "step": 1329 }, { "epoch": 0.1368540018493784, "grad_norm": 0.16448146104812622, "learning_rate": 0.005472473294987674, "loss": 2.8895, "step": 1332 }, { "epoch": 0.1371622315832734, "grad_norm": 0.1994483470916748, "learning_rate": 0.005484798685291701, "loss": 2.9012, "step": 1335 }, { "epoch": 0.1374704613171684, "grad_norm": 0.2786753177642822, "learning_rate": 0.0054971240755957275, "loss": 2.8753, "step": 1338 }, { "epoch": 0.13777869105106338, "grad_norm": 0.13169367611408234, "learning_rate": 0.005509449465899754, "loss": 2.8567, "step": 1341 }, { "epoch": 0.13808692078495838, "grad_norm": 0.21205192804336548, "learning_rate": 0.00552177485620378, "loss": 2.8523, "step": 1344 }, { "epoch": 0.1383951505188534, "grad_norm": 0.3462331295013428, "learning_rate": 0.0055341002465078065, "loss": 2.881, "step": 1347 }, { "epoch": 0.1387033802527484, "grad_norm": 0.26768332719802856, "learning_rate": 0.005546425636811832, "loss": 2.8803, "step": 1350 }, { "epoch": 0.1390116099866434, "grad_norm": 0.22518084943294525, "learning_rate": 0.005558751027115859, "loss": 2.874, "step": 1353 }, { "epoch": 0.13931983972053838, "grad_norm": 0.1767919361591339, "learning_rate": 0.005571076417419885, "loss": 2.8593, "step": 1356 }, { "epoch": 0.13962806945443337, "grad_norm": 0.14405187964439392, "learning_rate": 0.005583401807723911, "loss": 2.8576, "step": 1359 }, { "epoch": 0.13993629918832837, "grad_norm": 0.15364724397659302, "learning_rate": 0.005595727198027938, "loss": 2.856, "step": 1362 }, { "epoch": 0.14024452892222336, "grad_norm": 0.26737314462661743, "learning_rate": 0.005608052588331964, "loss": 2.8225, "step": 1365 }, { "epoch": 0.14055275865611835, "grad_norm": 0.14594382047653198, "learning_rate": 0.00562037797863599, "loss": 2.8397, "step": 1368 }, { "epoch": 0.14086098839001335, "grad_norm": 0.1974790245294571, "learning_rate": 0.005632703368940017, "loss": 2.8294, "step": 1371 }, { "epoch": 0.14116921812390834, "grad_norm": 0.12267682701349258, "learning_rate": 0.005645028759244043, "loss": 2.8543, "step": 1374 }, { "epoch": 0.14147744785780336, "grad_norm": 0.14111129939556122, "learning_rate": 0.00565735414954807, "loss": 2.8181, "step": 1377 }, { "epoch": 0.14178567759169836, "grad_norm": 0.1846015751361847, "learning_rate": 0.005669679539852095, "loss": 2.8272, "step": 1380 }, { "epoch": 0.14209390732559335, "grad_norm": 0.26931676268577576, "learning_rate": 0.0056820049301561214, "loss": 2.8286, "step": 1383 }, { "epoch": 0.14240213705948834, "grad_norm": 0.17969557642936707, "learning_rate": 0.005694330320460148, "loss": 2.8315, "step": 1386 }, { "epoch": 0.14271036679338334, "grad_norm": 0.2056432068347931, "learning_rate": 0.005706655710764175, "loss": 2.835, "step": 1389 }, { "epoch": 0.14301859652727833, "grad_norm": 0.29306477308273315, "learning_rate": 0.0057189811010682, "loss": 2.8294, "step": 1392 }, { "epoch": 0.14332682626117332, "grad_norm": 0.1792561262845993, "learning_rate": 0.005731306491372227, "loss": 2.8321, "step": 1395 }, { "epoch": 0.14363505599506832, "grad_norm": 0.11323501914739609, "learning_rate": 0.005743631881676254, "loss": 2.83, "step": 1398 }, { "epoch": 0.1439432857289633, "grad_norm": 0.2804841101169586, "learning_rate": 0.00575595727198028, "loss": 2.8271, "step": 1401 }, { "epoch": 0.1442515154628583, "grad_norm": 0.33056163787841797, "learning_rate": 0.005768282662284305, "loss": 2.7976, "step": 1404 }, { "epoch": 0.14455974519675333, "grad_norm": 0.12834665179252625, "learning_rate": 0.005780608052588332, "loss": 2.8169, "step": 1407 }, { "epoch": 0.14486797493064832, "grad_norm": 0.15917035937309265, "learning_rate": 0.005792933442892358, "loss": 2.8124, "step": 1410 }, { "epoch": 0.1451762046645433, "grad_norm": 0.28015008568763733, "learning_rate": 0.005805258833196385, "loss": 2.8019, "step": 1413 }, { "epoch": 0.1454844343984383, "grad_norm": 0.16829009354114532, "learning_rate": 0.005817584223500411, "loss": 2.8357, "step": 1416 }, { "epoch": 0.1457926641323333, "grad_norm": 0.14804339408874512, "learning_rate": 0.005829909613804437, "loss": 2.8102, "step": 1419 }, { "epoch": 0.1461008938662283, "grad_norm": 0.20360830426216125, "learning_rate": 0.005842235004108464, "loss": 2.8211, "step": 1422 }, { "epoch": 0.1464091236001233, "grad_norm": 0.22152036428451538, "learning_rate": 0.0058545603944124905, "loss": 2.8103, "step": 1425 }, { "epoch": 0.14671735333401828, "grad_norm": 0.20746375620365143, "learning_rate": 0.005866885784716516, "loss": 2.7994, "step": 1428 }, { "epoch": 0.14702558306791327, "grad_norm": 0.16845661401748657, "learning_rate": 0.005879211175020542, "loss": 2.8286, "step": 1431 }, { "epoch": 0.1473338128018083, "grad_norm": 0.1094370111823082, "learning_rate": 0.005891536565324569, "loss": 2.7888, "step": 1434 }, { "epoch": 0.1476420425357033, "grad_norm": 0.14844520390033722, "learning_rate": 0.005903861955628595, "loss": 2.8035, "step": 1437 }, { "epoch": 0.14795027226959828, "grad_norm": 0.12289691716432571, "learning_rate": 0.005916187345932621, "loss": 2.7852, "step": 1440 }, { "epoch": 0.14825850200349328, "grad_norm": 0.1203322485089302, "learning_rate": 0.0059285127362366475, "loss": 2.8101, "step": 1443 }, { "epoch": 0.14856673173738827, "grad_norm": 0.1871965080499649, "learning_rate": 0.005940838126540674, "loss": 2.7485, "step": 1446 }, { "epoch": 0.14887496147128326, "grad_norm": 0.1567300707101822, "learning_rate": 0.005953163516844701, "loss": 2.8097, "step": 1449 }, { "epoch": 0.14918319120517826, "grad_norm": 0.18046674132347107, "learning_rate": 0.0059654889071487265, "loss": 2.8118, "step": 1452 }, { "epoch": 0.14949142093907325, "grad_norm": 0.23180244863033295, "learning_rate": 0.005977814297452753, "loss": 2.7836, "step": 1455 }, { "epoch": 0.14979965067296824, "grad_norm": 0.2300175577402115, "learning_rate": 0.00599013968775678, "loss": 2.7675, "step": 1458 }, { "epoch": 0.15010788040686324, "grad_norm": 0.11340396106243134, "learning_rate": 0.006002465078060806, "loss": 2.8012, "step": 1461 }, { "epoch": 0.15041611014075826, "grad_norm": 0.10667074471712112, "learning_rate": 0.006014790468364831, "loss": 2.8154, "step": 1464 }, { "epoch": 0.15072433987465325, "grad_norm": 0.10800652205944061, "learning_rate": 0.006027115858668858, "loss": 2.7646, "step": 1467 }, { "epoch": 0.15103256960854824, "grad_norm": 0.2588643431663513, "learning_rate": 0.006039441248972884, "loss": 2.7912, "step": 1470 }, { "epoch": 0.15134079934244324, "grad_norm": 0.32462435960769653, "learning_rate": 0.006051766639276911, "loss": 2.7666, "step": 1473 }, { "epoch": 0.15164902907633823, "grad_norm": 0.23754975199699402, "learning_rate": 0.006064092029580937, "loss": 2.7694, "step": 1476 }, { "epoch": 0.15195725881023323, "grad_norm": 0.14895015954971313, "learning_rate": 0.006076417419884963, "loss": 2.7678, "step": 1479 }, { "epoch": 0.15226548854412822, "grad_norm": 0.3228299021720886, "learning_rate": 0.00608874281018899, "loss": 2.7786, "step": 1482 }, { "epoch": 0.1525737182780232, "grad_norm": 0.15597562491893768, "learning_rate": 0.006101068200493017, "loss": 2.7967, "step": 1485 }, { "epoch": 0.1528819480119182, "grad_norm": 0.09748488664627075, "learning_rate": 0.0061133935907970414, "loss": 2.7673, "step": 1488 }, { "epoch": 0.1531901777458132, "grad_norm": 0.12523339688777924, "learning_rate": 0.006125718981101068, "loss": 2.7391, "step": 1491 }, { "epoch": 0.15349840747970822, "grad_norm": 0.16529253125190735, "learning_rate": 0.006138044371405095, "loss": 2.7642, "step": 1494 }, { "epoch": 0.15380663721360321, "grad_norm": 0.2083311527967453, "learning_rate": 0.006150369761709121, "loss": 2.764, "step": 1497 }, { "epoch": 0.1541148669474982, "grad_norm": 0.13263079524040222, "learning_rate": 0.006162695152013147, "loss": 2.7828, "step": 1500 }, { "epoch": 0.1544230966813932, "grad_norm": 0.1473417580127716, "learning_rate": 0.006175020542317174, "loss": 2.7574, "step": 1503 }, { "epoch": 0.1547313264152882, "grad_norm": 0.22629734873771667, "learning_rate": 0.0061873459326212, "loss": 2.7792, "step": 1506 }, { "epoch": 0.1550395561491832, "grad_norm": 0.21652548015117645, "learning_rate": 0.006199671322925227, "loss": 2.7785, "step": 1509 }, { "epoch": 0.15534778588307818, "grad_norm": 0.1948641836643219, "learning_rate": 0.006211996713229252, "loss": 2.7969, "step": 1512 }, { "epoch": 0.15565601561697318, "grad_norm": 0.13890105485916138, "learning_rate": 0.006224322103533278, "loss": 2.7856, "step": 1515 }, { "epoch": 0.15596424535086817, "grad_norm": 0.09859870374202728, "learning_rate": 0.006236647493837305, "loss": 2.7523, "step": 1518 }, { "epoch": 0.1562724750847632, "grad_norm": 0.10258977860212326, "learning_rate": 0.0062489728841413315, "loss": 2.7466, "step": 1521 }, { "epoch": 0.15658070481865818, "grad_norm": 0.11476584523916245, "learning_rate": 0.006261298274445357, "loss": 2.7314, "step": 1524 }, { "epoch": 0.15688893455255318, "grad_norm": 0.1920320987701416, "learning_rate": 0.006273623664749384, "loss": 2.7647, "step": 1527 }, { "epoch": 0.15719716428644817, "grad_norm": 0.18576020002365112, "learning_rate": 0.0062859490550534105, "loss": 2.7632, "step": 1530 }, { "epoch": 0.15750539402034316, "grad_norm": 0.128046452999115, "learning_rate": 0.006298274445357437, "loss": 2.7237, "step": 1533 }, { "epoch": 0.15781362375423816, "grad_norm": 0.30617430806159973, "learning_rate": 0.006310599835661463, "loss": 2.7907, "step": 1536 }, { "epoch": 0.15812185348813315, "grad_norm": 0.140928253531456, "learning_rate": 0.0063229252259654894, "loss": 2.7879, "step": 1539 }, { "epoch": 0.15843008322202815, "grad_norm": 0.2537645399570465, "learning_rate": 0.006335250616269515, "loss": 2.7513, "step": 1542 }, { "epoch": 0.15873831295592314, "grad_norm": 0.40944191813468933, "learning_rate": 0.006347576006573542, "loss": 2.7418, "step": 1545 }, { "epoch": 0.15904654268981813, "grad_norm": 0.1284068077802658, "learning_rate": 0.0063599013968775675, "loss": 2.7235, "step": 1548 }, { "epoch": 0.15935477242371315, "grad_norm": 0.08984164893627167, "learning_rate": 0.006372226787181594, "loss": 2.7414, "step": 1551 }, { "epoch": 0.15966300215760815, "grad_norm": 0.13366155326366425, "learning_rate": 0.006384552177485621, "loss": 2.7456, "step": 1554 }, { "epoch": 0.15997123189150314, "grad_norm": 0.1179983913898468, "learning_rate": 0.006396877567789647, "loss": 2.7313, "step": 1557 }, { "epoch": 0.16027946162539813, "grad_norm": 0.15718503296375275, "learning_rate": 0.006409202958093673, "loss": 2.7315, "step": 1560 }, { "epoch": 0.16058769135929313, "grad_norm": 0.14405110478401184, "learning_rate": 0.0064215283483977, "loss": 2.7275, "step": 1563 }, { "epoch": 0.16089592109318812, "grad_norm": 0.13050544261932373, "learning_rate": 0.006433853738701726, "loss": 2.6935, "step": 1566 }, { "epoch": 0.16120415082708311, "grad_norm": 0.2343079298734665, "learning_rate": 0.006446179129005751, "loss": 2.6932, "step": 1569 }, { "epoch": 0.1615123805609781, "grad_norm": 0.2493698000907898, "learning_rate": 0.006458504519309778, "loss": 2.7414, "step": 1572 }, { "epoch": 0.1618206102948731, "grad_norm": 0.17371931672096252, "learning_rate": 0.006470829909613804, "loss": 2.7522, "step": 1575 }, { "epoch": 0.1621288400287681, "grad_norm": 0.16282691061496735, "learning_rate": 0.006483155299917831, "loss": 2.7659, "step": 1578 }, { "epoch": 0.16243706976266312, "grad_norm": 0.12791027128696442, "learning_rate": 0.006495480690221857, "loss": 2.7077, "step": 1581 }, { "epoch": 0.1627452994965581, "grad_norm": 0.09789251536130905, "learning_rate": 0.006507806080525883, "loss": 2.7041, "step": 1584 }, { "epoch": 0.1630535292304531, "grad_norm": 0.10156393051147461, "learning_rate": 0.00652013147082991, "loss": 2.685, "step": 1587 }, { "epoch": 0.1633617589643481, "grad_norm": 0.1974211484193802, "learning_rate": 0.006532456861133937, "loss": 2.7183, "step": 1590 }, { "epoch": 0.1636699886982431, "grad_norm": 0.1420728713274002, "learning_rate": 0.0065447822514379615, "loss": 2.7095, "step": 1593 }, { "epoch": 0.16397821843213808, "grad_norm": 0.3637617528438568, "learning_rate": 0.006557107641741988, "loss": 2.7578, "step": 1596 }, { "epoch": 0.16428644816603308, "grad_norm": 0.09830935299396515, "learning_rate": 0.006569433032046015, "loss": 2.6937, "step": 1599 }, { "epoch": 0.16459467789992807, "grad_norm": 0.15821218490600586, "learning_rate": 0.006581758422350041, "loss": 2.7031, "step": 1602 }, { "epoch": 0.16490290763382306, "grad_norm": 0.17226357758045197, "learning_rate": 0.006594083812654067, "loss": 2.6702, "step": 1605 }, { "epoch": 0.16521113736771809, "grad_norm": 0.21252015233039856, "learning_rate": 0.006606409202958094, "loss": 2.6893, "step": 1608 }, { "epoch": 0.16551936710161308, "grad_norm": 0.11433108150959015, "learning_rate": 0.00661873459326212, "loss": 2.6852, "step": 1611 }, { "epoch": 0.16582759683550807, "grad_norm": 0.15884144604206085, "learning_rate": 0.006631059983566147, "loss": 2.7164, "step": 1614 }, { "epoch": 0.16613582656940307, "grad_norm": 0.1429038643836975, "learning_rate": 0.006643385373870173, "loss": 2.6976, "step": 1617 }, { "epoch": 0.16644405630329806, "grad_norm": 0.09187953919172287, "learning_rate": 0.006655710764174198, "loss": 2.7134, "step": 1620 }, { "epoch": 0.16675228603719305, "grad_norm": 0.13670755922794342, "learning_rate": 0.006668036154478225, "loss": 2.6951, "step": 1623 }, { "epoch": 0.16706051577108805, "grad_norm": 0.17965632677078247, "learning_rate": 0.0066803615447822515, "loss": 2.6911, "step": 1626 }, { "epoch": 0.16736874550498304, "grad_norm": 0.21141032874584198, "learning_rate": 0.006692686935086277, "loss": 2.67, "step": 1629 }, { "epoch": 0.16767697523887803, "grad_norm": 0.30064719915390015, "learning_rate": 0.006705012325390304, "loss": 2.6837, "step": 1632 }, { "epoch": 0.16798520497277303, "grad_norm": 0.11874115467071533, "learning_rate": 0.0067173377156943305, "loss": 2.6968, "step": 1635 }, { "epoch": 0.16829343470666805, "grad_norm": 0.10265806317329407, "learning_rate": 0.006729663105998357, "loss": 2.6632, "step": 1638 }, { "epoch": 0.16860166444056304, "grad_norm": 0.10916320979595184, "learning_rate": 0.006741988496302383, "loss": 2.6749, "step": 1641 }, { "epoch": 0.16890989417445804, "grad_norm": 0.2549231946468353, "learning_rate": 0.0067543138866064095, "loss": 2.636, "step": 1644 }, { "epoch": 0.16921812390835303, "grad_norm": 0.15071339905261993, "learning_rate": 0.006766639276910436, "loss": 2.6933, "step": 1647 }, { "epoch": 0.16952635364224802, "grad_norm": 0.1088666021823883, "learning_rate": 0.006778964667214463, "loss": 2.6477, "step": 1650 }, { "epoch": 0.16983458337614302, "grad_norm": 0.0984036773443222, "learning_rate": 0.0067912900575184875, "loss": 2.6801, "step": 1653 }, { "epoch": 0.170142813110038, "grad_norm": 0.15402089059352875, "learning_rate": 0.006803615447822514, "loss": 2.6877, "step": 1656 }, { "epoch": 0.170451042843933, "grad_norm": 0.1299775093793869, "learning_rate": 0.006815940838126541, "loss": 2.6717, "step": 1659 }, { "epoch": 0.170759272577828, "grad_norm": 0.15615323185920715, "learning_rate": 0.006828266228430567, "loss": 2.6578, "step": 1662 }, { "epoch": 0.171067502311723, "grad_norm": 0.122567318379879, "learning_rate": 0.006840591618734593, "loss": 2.6959, "step": 1665 }, { "epoch": 0.171375732045618, "grad_norm": 0.1386043280363083, "learning_rate": 0.00685291700903862, "loss": 2.6491, "step": 1668 }, { "epoch": 0.171683961779513, "grad_norm": 0.1900375783443451, "learning_rate": 0.006865242399342646, "loss": 2.6643, "step": 1671 }, { "epoch": 0.171992191513408, "grad_norm": 0.1118064671754837, "learning_rate": 0.006877567789646673, "loss": 2.6496, "step": 1674 }, { "epoch": 0.172300421247303, "grad_norm": 0.1593448519706726, "learning_rate": 0.006889893179950698, "loss": 2.6833, "step": 1677 }, { "epoch": 0.172608650981198, "grad_norm": 0.17275281250476837, "learning_rate": 0.006902218570254724, "loss": 2.6909, "step": 1680 }, { "epoch": 0.17291688071509298, "grad_norm": 0.13396479189395905, "learning_rate": 0.006914543960558751, "loss": 2.692, "step": 1683 }, { "epoch": 0.17322511044898797, "grad_norm": 0.09812068939208984, "learning_rate": 0.006926869350862778, "loss": 2.6939, "step": 1686 }, { "epoch": 0.17353334018288297, "grad_norm": 0.08181022852659225, "learning_rate": 0.006939194741166803, "loss": 2.6408, "step": 1689 }, { "epoch": 0.17384156991677796, "grad_norm": 0.15573051571846008, "learning_rate": 0.00695152013147083, "loss": 2.6647, "step": 1692 }, { "epoch": 0.17414979965067298, "grad_norm": 0.2834240198135376, "learning_rate": 0.006963845521774857, "loss": 2.6585, "step": 1695 }, { "epoch": 0.17445802938456798, "grad_norm": 0.23794801533222198, "learning_rate": 0.006976170912078883, "loss": 2.6559, "step": 1698 }, { "epoch": 0.17476625911846297, "grad_norm": 0.1332167536020279, "learning_rate": 0.006988496302382908, "loss": 2.6695, "step": 1701 }, { "epoch": 0.17507448885235796, "grad_norm": 0.09555593878030777, "learning_rate": 0.007000821692686935, "loss": 2.6811, "step": 1704 }, { "epoch": 0.17538271858625296, "grad_norm": 0.10987939685583115, "learning_rate": 0.007013147082990961, "loss": 2.6524, "step": 1707 }, { "epoch": 0.17569094832014795, "grad_norm": 0.11458218097686768, "learning_rate": 0.007025472473294988, "loss": 2.6085, "step": 1710 }, { "epoch": 0.17599917805404294, "grad_norm": 0.12646709382534027, "learning_rate": 0.007037797863599014, "loss": 2.6561, "step": 1713 }, { "epoch": 0.17630740778793794, "grad_norm": 0.15338967740535736, "learning_rate": 0.00705012325390304, "loss": 2.6471, "step": 1716 }, { "epoch": 0.17661563752183293, "grad_norm": 0.14660318195819855, "learning_rate": 0.007062448644207067, "loss": 2.6532, "step": 1719 }, { "epoch": 0.17692386725572792, "grad_norm": 0.2730877995491028, "learning_rate": 0.0070747740345110935, "loss": 2.6565, "step": 1722 }, { "epoch": 0.17723209698962294, "grad_norm": 0.26743727922439575, "learning_rate": 0.007087099424815119, "loss": 2.6707, "step": 1725 }, { "epoch": 0.17754032672351794, "grad_norm": 0.13842618465423584, "learning_rate": 0.007099424815119146, "loss": 2.6652, "step": 1728 }, { "epoch": 0.17784855645741293, "grad_norm": 0.15871621668338776, "learning_rate": 0.0071117502054231715, "loss": 2.6464, "step": 1731 }, { "epoch": 0.17815678619130793, "grad_norm": 0.11526347696781158, "learning_rate": 0.007124075595727198, "loss": 2.662, "step": 1734 }, { "epoch": 0.17846501592520292, "grad_norm": 0.21620534360408783, "learning_rate": 0.007136400986031224, "loss": 2.6603, "step": 1737 }, { "epoch": 0.1787732456590979, "grad_norm": 0.0905444398522377, "learning_rate": 0.0071487263763352505, "loss": 2.6523, "step": 1740 }, { "epoch": 0.1790814753929929, "grad_norm": 0.28233054280281067, "learning_rate": 0.007161051766639277, "loss": 2.6597, "step": 1743 }, { "epoch": 0.1793897051268879, "grad_norm": 0.2363336831331253, "learning_rate": 0.007173377156943304, "loss": 2.6483, "step": 1746 }, { "epoch": 0.1796979348607829, "grad_norm": 0.11012139916419983, "learning_rate": 0.0071857025472473295, "loss": 2.6513, "step": 1749 }, { "epoch": 0.1800061645946779, "grad_norm": 0.09720948338508606, "learning_rate": 0.007198027937551356, "loss": 2.6511, "step": 1752 }, { "epoch": 0.1803143943285729, "grad_norm": 0.13130852580070496, "learning_rate": 0.007210353327855383, "loss": 2.6509, "step": 1755 }, { "epoch": 0.1806226240624679, "grad_norm": 0.14865098893642426, "learning_rate": 0.007222678718159409, "loss": 2.6253, "step": 1758 }, { "epoch": 0.1809308537963629, "grad_norm": 0.20482710003852844, "learning_rate": 0.007235004108463434, "loss": 2.6312, "step": 1761 }, { "epoch": 0.1812390835302579, "grad_norm": 0.12063097953796387, "learning_rate": 0.007247329498767461, "loss": 2.6007, "step": 1764 }, { "epoch": 0.18154731326415288, "grad_norm": 0.23084934055805206, "learning_rate": 0.007259654889071487, "loss": 2.6129, "step": 1767 }, { "epoch": 0.18185554299804788, "grad_norm": 0.10387217253446579, "learning_rate": 0.007271980279375514, "loss": 2.6309, "step": 1770 }, { "epoch": 0.18216377273194287, "grad_norm": 0.14229682087898254, "learning_rate": 0.00728430566967954, "loss": 2.6074, "step": 1773 }, { "epoch": 0.18247200246583786, "grad_norm": 0.12009115517139435, "learning_rate": 0.007296631059983566, "loss": 2.6407, "step": 1776 }, { "epoch": 0.18278023219973286, "grad_norm": 0.15677185356616974, "learning_rate": 0.007308956450287593, "loss": 2.6268, "step": 1779 }, { "epoch": 0.18308846193362788, "grad_norm": 0.13304303586483002, "learning_rate": 0.0073212818405916195, "loss": 2.6463, "step": 1782 }, { "epoch": 0.18339669166752287, "grad_norm": 0.15444768965244293, "learning_rate": 0.007333607230895644, "loss": 2.6218, "step": 1785 }, { "epoch": 0.18370492140141786, "grad_norm": 0.1738140732049942, "learning_rate": 0.007345932621199671, "loss": 2.6525, "step": 1788 }, { "epoch": 0.18401315113531286, "grad_norm": 0.13087227940559387, "learning_rate": 0.007358258011503698, "loss": 2.6266, "step": 1791 }, { "epoch": 0.18432138086920785, "grad_norm": 0.1026511862874031, "learning_rate": 0.007370583401807724, "loss": 2.6017, "step": 1794 }, { "epoch": 0.18462961060310285, "grad_norm": 0.11183813214302063, "learning_rate": 0.00738290879211175, "loss": 2.5966, "step": 1797 }, { "epoch": 0.18493784033699784, "grad_norm": 0.12239934504032135, "learning_rate": 0.007395234182415777, "loss": 2.6205, "step": 1800 }, { "epoch": 0.18524607007089283, "grad_norm": 0.2630854845046997, "learning_rate": 0.007407559572719803, "loss": 2.609, "step": 1803 }, { "epoch": 0.18555429980478783, "grad_norm": 0.24282613396644592, "learning_rate": 0.00741988496302383, "loss": 2.6405, "step": 1806 }, { "epoch": 0.18586252953868282, "grad_norm": 0.2825084328651428, "learning_rate": 0.007432210353327855, "loss": 2.5933, "step": 1809 }, { "epoch": 0.18617075927257784, "grad_norm": 0.26462721824645996, "learning_rate": 0.007444535743631881, "loss": 2.6021, "step": 1812 }, { "epoch": 0.18647898900647283, "grad_norm": 0.11797992140054703, "learning_rate": 0.007456861133935908, "loss": 2.6246, "step": 1815 }, { "epoch": 0.18678721874036783, "grad_norm": 0.14044708013534546, "learning_rate": 0.0074691865242399345, "loss": 2.6028, "step": 1818 }, { "epoch": 0.18709544847426282, "grad_norm": 0.1374548226594925, "learning_rate": 0.00748151191454396, "loss": 2.6092, "step": 1821 }, { "epoch": 0.18740367820815781, "grad_norm": 0.10084279626607895, "learning_rate": 0.007493837304847987, "loss": 2.6162, "step": 1824 }, { "epoch": 0.1877119079420528, "grad_norm": 0.1052001565694809, "learning_rate": 0.0075061626951520135, "loss": 2.5742, "step": 1827 }, { "epoch": 0.1880201376759478, "grad_norm": 0.11738535761833191, "learning_rate": 0.00751848808545604, "loss": 2.5715, "step": 1830 }, { "epoch": 0.1883283674098428, "grad_norm": 0.10453224182128906, "learning_rate": 0.007530813475760066, "loss": 2.5896, "step": 1833 }, { "epoch": 0.1886365971437378, "grad_norm": 0.10509374737739563, "learning_rate": 0.007543138866064092, "loss": 2.6047, "step": 1836 }, { "epoch": 0.18894482687763278, "grad_norm": 0.11291799694299698, "learning_rate": 0.007555464256368119, "loss": 2.6062, "step": 1839 }, { "epoch": 0.1892530566115278, "grad_norm": 0.11998583376407623, "learning_rate": 0.007567789646672146, "loss": 2.629, "step": 1842 }, { "epoch": 0.1895612863454228, "grad_norm": 0.21776226162910461, "learning_rate": 0.0075801150369761705, "loss": 2.5847, "step": 1845 }, { "epoch": 0.1898695160793178, "grad_norm": 0.210985466837883, "learning_rate": 0.007592440427280197, "loss": 2.5901, "step": 1848 }, { "epoch": 0.19017774581321278, "grad_norm": 0.11799308657646179, "learning_rate": 0.007604765817584224, "loss": 2.5893, "step": 1851 }, { "epoch": 0.19048597554710778, "grad_norm": 0.10019934922456741, "learning_rate": 0.00761709120788825, "loss": 2.6327, "step": 1854 }, { "epoch": 0.19079420528100277, "grad_norm": 0.07964596897363663, "learning_rate": 0.007629416598192276, "loss": 2.5921, "step": 1857 }, { "epoch": 0.19110243501489776, "grad_norm": 0.16393065452575684, "learning_rate": 0.007641741988496303, "loss": 2.5912, "step": 1860 }, { "epoch": 0.19141066474879276, "grad_norm": 0.324639230966568, "learning_rate": 0.007654067378800329, "loss": 2.5998, "step": 1863 }, { "epoch": 0.19171889448268775, "grad_norm": 0.14071421325206757, "learning_rate": 0.007666392769104356, "loss": 2.5803, "step": 1866 }, { "epoch": 0.19202712421658277, "grad_norm": 0.20063026249408722, "learning_rate": 0.007678718159408381, "loss": 2.6019, "step": 1869 }, { "epoch": 0.19233535395047777, "grad_norm": 0.11311519891023636, "learning_rate": 0.007691043549712407, "loss": 2.5645, "step": 1872 }, { "epoch": 0.19264358368437276, "grad_norm": 0.08542342483997345, "learning_rate": 0.007703368940016434, "loss": 2.6122, "step": 1875 }, { "epoch": 0.19295181341826775, "grad_norm": 0.08306868374347687, "learning_rate": 0.007715694330320461, "loss": 2.5859, "step": 1878 }, { "epoch": 0.19326004315216275, "grad_norm": 0.11635984480381012, "learning_rate": 0.007728019720624486, "loss": 2.5855, "step": 1881 }, { "epoch": 0.19356827288605774, "grad_norm": 0.08945252746343613, "learning_rate": 0.007740345110928513, "loss": 2.5509, "step": 1884 }, { "epoch": 0.19387650261995273, "grad_norm": 0.19044962525367737, "learning_rate": 0.0077526705012325395, "loss": 2.559, "step": 1887 }, { "epoch": 0.19418473235384773, "grad_norm": 0.1462780088186264, "learning_rate": 0.007764995891536566, "loss": 2.5749, "step": 1890 }, { "epoch": 0.19449296208774272, "grad_norm": 0.15944691002368927, "learning_rate": 0.007777321281840591, "loss": 2.5801, "step": 1893 }, { "epoch": 0.19480119182163771, "grad_norm": 0.10125305503606796, "learning_rate": 0.007789646672144618, "loss": 2.5821, "step": 1896 }, { "epoch": 0.19510942155553274, "grad_norm": 0.17344938218593597, "learning_rate": 0.007801972062448644, "loss": 2.5905, "step": 1899 }, { "epoch": 0.19541765128942773, "grad_norm": 0.16651591658592224, "learning_rate": 0.007814297452752672, "loss": 2.5668, "step": 1902 }, { "epoch": 0.19572588102332272, "grad_norm": 0.17417702078819275, "learning_rate": 0.007826622843056696, "loss": 2.568, "step": 1905 }, { "epoch": 0.19603411075721772, "grad_norm": 0.11182334274053574, "learning_rate": 0.007838948233360723, "loss": 2.5547, "step": 1908 }, { "epoch": 0.1963423404911127, "grad_norm": 0.23256631195545197, "learning_rate": 0.007851273623664749, "loss": 2.5722, "step": 1911 }, { "epoch": 0.1966505702250077, "grad_norm": 0.18180392682552338, "learning_rate": 0.007863599013968776, "loss": 2.558, "step": 1914 }, { "epoch": 0.1969587999589027, "grad_norm": 0.12168890237808228, "learning_rate": 0.007875924404272802, "loss": 2.5977, "step": 1917 }, { "epoch": 0.1972670296927977, "grad_norm": 0.11032187938690186, "learning_rate": 0.007888249794576828, "loss": 2.5846, "step": 1920 }, { "epoch": 0.19757525942669268, "grad_norm": 0.0740116760134697, "learning_rate": 0.007900575184880855, "loss": 2.5824, "step": 1923 }, { "epoch": 0.19788348916058768, "grad_norm": 0.05902474746108055, "learning_rate": 0.007912900575184881, "loss": 2.5497, "step": 1926 }, { "epoch": 0.1981917188944827, "grad_norm": 0.09003309905529022, "learning_rate": 0.007925225965488907, "loss": 2.5523, "step": 1929 }, { "epoch": 0.1984999486283777, "grad_norm": 0.4191035330295563, "learning_rate": 0.007937551355792934, "loss": 2.6223, "step": 1932 }, { "epoch": 0.1988081783622727, "grad_norm": 0.17093214392662048, "learning_rate": 0.00794987674609696, "loss": 2.5647, "step": 1935 }, { "epoch": 0.19911640809616768, "grad_norm": 0.0921127051115036, "learning_rate": 0.007962202136400986, "loss": 2.564, "step": 1938 }, { "epoch": 0.19942463783006267, "grad_norm": 0.14204134047031403, "learning_rate": 0.007974527526705012, "loss": 2.5972, "step": 1941 }, { "epoch": 0.19973286756395767, "grad_norm": 0.07556895911693573, "learning_rate": 0.007986852917009039, "loss": 2.5796, "step": 1944 }, { "epoch": 0.20004109729785266, "grad_norm": 0.07290320843458176, "learning_rate": 0.007999178307313065, "loss": 2.5564, "step": 1947 }, { "epoch": 0.20034932703174765, "grad_norm": 0.1624913364648819, "learning_rate": 0.008011503697617092, "loss": 2.5849, "step": 1950 }, { "epoch": 0.20065755676564265, "grad_norm": 0.11839967221021652, "learning_rate": 0.008023829087921118, "loss": 2.5611, "step": 1953 }, { "epoch": 0.20096578649953767, "grad_norm": 0.14280788600444794, "learning_rate": 0.008036154478225144, "loss": 2.5289, "step": 1956 }, { "epoch": 0.20127401623343266, "grad_norm": 0.11515247821807861, "learning_rate": 0.008048479868529171, "loss": 2.5678, "step": 1959 }, { "epoch": 0.20158224596732766, "grad_norm": 0.1147715225815773, "learning_rate": 0.008060805258833197, "loss": 2.5452, "step": 1962 }, { "epoch": 0.20189047570122265, "grad_norm": 0.09767001122236252, "learning_rate": 0.008073130649137223, "loss": 2.6023, "step": 1965 }, { "epoch": 0.20219870543511764, "grad_norm": 0.0866391509771347, "learning_rate": 0.008085456039441248, "loss": 2.5518, "step": 1968 }, { "epoch": 0.20250693516901264, "grad_norm": 0.1610632985830307, "learning_rate": 0.008097781429745276, "loss": 2.5271, "step": 1971 }, { "epoch": 0.20281516490290763, "grad_norm": 0.20238341391086578, "learning_rate": 0.008110106820049302, "loss": 2.5597, "step": 1974 }, { "epoch": 0.20312339463680262, "grad_norm": 0.11807162314653397, "learning_rate": 0.008122432210353327, "loss": 2.5663, "step": 1977 }, { "epoch": 0.20343162437069762, "grad_norm": 0.14654900133609772, "learning_rate": 0.008134757600657355, "loss": 2.5729, "step": 1980 }, { "epoch": 0.2037398541045926, "grad_norm": 0.17804567515850067, "learning_rate": 0.00814708299096138, "loss": 2.5658, "step": 1983 }, { "epoch": 0.20404808383848763, "grad_norm": 0.12376303225755692, "learning_rate": 0.008159408381265408, "loss": 2.5703, "step": 1986 }, { "epoch": 0.20435631357238263, "grad_norm": 0.1248418316245079, "learning_rate": 0.008171733771569432, "loss": 2.5328, "step": 1989 }, { "epoch": 0.20466454330627762, "grad_norm": 0.08159278333187103, "learning_rate": 0.00818405916187346, "loss": 2.5349, "step": 1992 }, { "epoch": 0.2049727730401726, "grad_norm": 0.11184779554605484, "learning_rate": 0.008196384552177485, "loss": 2.5557, "step": 1995 }, { "epoch": 0.2052810027740676, "grad_norm": 0.09568610787391663, "learning_rate": 0.008208709942481513, "loss": 2.5415, "step": 1998 }, { "epoch": 0.2055892325079626, "grad_norm": 0.08708583563566208, "learning_rate": 0.008221035332785539, "loss": 2.5369, "step": 2001 }, { "epoch": 0.2058974622418576, "grad_norm": 0.11849135160446167, "learning_rate": 0.008233360723089564, "loss": 2.5617, "step": 2004 }, { "epoch": 0.2062056919757526, "grad_norm": 0.1407340168952942, "learning_rate": 0.008245686113393592, "loss": 2.5374, "step": 2007 }, { "epoch": 0.20651392170964758, "grad_norm": 0.13198955357074738, "learning_rate": 0.008258011503697617, "loss": 2.57, "step": 2010 }, { "epoch": 0.20682215144354257, "grad_norm": 0.12408044934272766, "learning_rate": 0.008270336894001643, "loss": 2.5344, "step": 2013 }, { "epoch": 0.2071303811774376, "grad_norm": 0.149169921875, "learning_rate": 0.008282662284305669, "loss": 2.5357, "step": 2016 }, { "epoch": 0.2074386109113326, "grad_norm": 0.10010293871164322, "learning_rate": 0.008294987674609696, "loss": 2.5166, "step": 2019 }, { "epoch": 0.20774684064522758, "grad_norm": 0.17650344967842102, "learning_rate": 0.008307313064913722, "loss": 2.5664, "step": 2022 }, { "epoch": 0.20805507037912258, "grad_norm": 0.09946206212043762, "learning_rate": 0.008319638455217748, "loss": 2.5378, "step": 2025 }, { "epoch": 0.20836330011301757, "grad_norm": 0.07705225795507431, "learning_rate": 0.008331963845521775, "loss": 2.5088, "step": 2028 }, { "epoch": 0.20867152984691256, "grad_norm": 0.18174925446510315, "learning_rate": 0.008344289235825801, "loss": 2.5264, "step": 2031 }, { "epoch": 0.20897975958080756, "grad_norm": 0.14415894448757172, "learning_rate": 0.008356614626129829, "loss": 2.5549, "step": 2034 }, { "epoch": 0.20928798931470255, "grad_norm": 0.17721933126449585, "learning_rate": 0.008368940016433854, "loss": 2.5476, "step": 2037 }, { "epoch": 0.20959621904859754, "grad_norm": 0.1727544367313385, "learning_rate": 0.00838126540673788, "loss": 2.5809, "step": 2040 }, { "epoch": 0.20990444878249256, "grad_norm": 0.20624054968357086, "learning_rate": 0.008393590797041908, "loss": 2.5256, "step": 2043 }, { "epoch": 0.21021267851638756, "grad_norm": 0.08070924133062363, "learning_rate": 0.008405916187345933, "loss": 2.5537, "step": 2046 }, { "epoch": 0.21052090825028255, "grad_norm": 0.07868220657110214, "learning_rate": 0.008418241577649959, "loss": 2.5266, "step": 2049 }, { "epoch": 0.21082913798417754, "grad_norm": 0.19941876828670502, "learning_rate": 0.008430566967953985, "loss": 2.5344, "step": 2052 }, { "epoch": 0.21113736771807254, "grad_norm": 0.08758697658777237, "learning_rate": 0.008442892358258012, "loss": 2.5409, "step": 2055 }, { "epoch": 0.21144559745196753, "grad_norm": 0.11635969579219818, "learning_rate": 0.008455217748562038, "loss": 2.5497, "step": 2058 }, { "epoch": 0.21175382718586253, "grad_norm": 0.16910326480865479, "learning_rate": 0.008467543138866064, "loss": 2.5509, "step": 2061 }, { "epoch": 0.21206205691975752, "grad_norm": 0.14605827629566193, "learning_rate": 0.008479868529170091, "loss": 2.5589, "step": 2064 }, { "epoch": 0.2123702866536525, "grad_norm": 0.18890123069286346, "learning_rate": 0.008492193919474117, "loss": 2.5454, "step": 2067 }, { "epoch": 0.2126785163875475, "grad_norm": 0.09277717024087906, "learning_rate": 0.008504519309778144, "loss": 2.4984, "step": 2070 }, { "epoch": 0.21298674612144253, "grad_norm": 0.07268327474594116, "learning_rate": 0.008516844700082168, "loss": 2.5323, "step": 2073 }, { "epoch": 0.21329497585533752, "grad_norm": 0.0807403028011322, "learning_rate": 0.008529170090386196, "loss": 2.5083, "step": 2076 }, { "epoch": 0.21360320558923251, "grad_norm": 0.12681947648525238, "learning_rate": 0.008541495480690222, "loss": 2.5386, "step": 2079 }, { "epoch": 0.2139114353231275, "grad_norm": 0.25378334522247314, "learning_rate": 0.008553820870994249, "loss": 2.5188, "step": 2082 }, { "epoch": 0.2142196650570225, "grad_norm": 0.15101733803749084, "learning_rate": 0.008566146261298275, "loss": 2.5457, "step": 2085 }, { "epoch": 0.2145278947909175, "grad_norm": 0.17336703836917877, "learning_rate": 0.0085784716516023, "loss": 2.5206, "step": 2088 }, { "epoch": 0.2148361245248125, "grad_norm": 0.07735245674848557, "learning_rate": 0.008590797041906328, "loss": 2.5297, "step": 2091 }, { "epoch": 0.21514435425870748, "grad_norm": 0.15841136872768402, "learning_rate": 0.008603122432210354, "loss": 2.5086, "step": 2094 }, { "epoch": 0.21545258399260248, "grad_norm": 0.15941859781742096, "learning_rate": 0.00861544782251438, "loss": 2.5316, "step": 2097 }, { "epoch": 0.21576081372649747, "grad_norm": 0.13837756216526031, "learning_rate": 0.008627773212818405, "loss": 2.4818, "step": 2100 }, { "epoch": 0.2160690434603925, "grad_norm": 0.14743675291538239, "learning_rate": 0.008640098603122433, "loss": 2.5351, "step": 2103 }, { "epoch": 0.21637727319428748, "grad_norm": 0.15961112082004547, "learning_rate": 0.008652423993426459, "loss": 2.4916, "step": 2106 }, { "epoch": 0.21668550292818248, "grad_norm": 0.16091223061084747, "learning_rate": 0.008664749383730484, "loss": 2.5026, "step": 2109 }, { "epoch": 0.21699373266207747, "grad_norm": 0.1695915311574936, "learning_rate": 0.008677074774034512, "loss": 2.4994, "step": 2112 }, { "epoch": 0.21730196239597246, "grad_norm": 0.1457175761461258, "learning_rate": 0.008689400164338537, "loss": 2.5225, "step": 2115 }, { "epoch": 0.21761019212986746, "grad_norm": 0.0995342880487442, "learning_rate": 0.008701725554642563, "loss": 2.5373, "step": 2118 }, { "epoch": 0.21791842186376245, "grad_norm": 0.11527393013238907, "learning_rate": 0.00871405094494659, "loss": 2.5207, "step": 2121 }, { "epoch": 0.21822665159765745, "grad_norm": 0.07951527088880539, "learning_rate": 0.008726376335250616, "loss": 2.4868, "step": 2124 }, { "epoch": 0.21853488133155244, "grad_norm": 0.11319970339536667, "learning_rate": 0.008738701725554644, "loss": 2.4965, "step": 2127 }, { "epoch": 0.21884311106544746, "grad_norm": 0.14932893216609955, "learning_rate": 0.008751027115858668, "loss": 2.5164, "step": 2130 }, { "epoch": 0.21915134079934245, "grad_norm": 0.1703396886587143, "learning_rate": 0.008763352506162695, "loss": 2.5175, "step": 2133 }, { "epoch": 0.21945957053323745, "grad_norm": 0.2208787351846695, "learning_rate": 0.008775677896466721, "loss": 2.521, "step": 2136 }, { "epoch": 0.21976780026713244, "grad_norm": 0.0884699895977974, "learning_rate": 0.008788003286770749, "loss": 2.5356, "step": 2139 }, { "epoch": 0.22007603000102743, "grad_norm": 0.06739311665296555, "learning_rate": 0.008800328677074774, "loss": 2.5102, "step": 2142 }, { "epoch": 0.22038425973492243, "grad_norm": 0.09653139859437943, "learning_rate": 0.0088126540673788, "loss": 2.5047, "step": 2145 }, { "epoch": 0.22069248946881742, "grad_norm": 0.11972832679748535, "learning_rate": 0.008824979457682828, "loss": 2.5086, "step": 2148 }, { "epoch": 0.22100071920271241, "grad_norm": 0.13725396990776062, "learning_rate": 0.008837304847986853, "loss": 2.5034, "step": 2151 }, { "epoch": 0.2213089489366074, "grad_norm": 0.09293966740369797, "learning_rate": 0.008849630238290879, "loss": 2.5004, "step": 2154 }, { "epoch": 0.2216171786705024, "grad_norm": 0.07625159621238708, "learning_rate": 0.008861955628594905, "loss": 2.508, "step": 2157 }, { "epoch": 0.22192540840439742, "grad_norm": 0.08581928163766861, "learning_rate": 0.008874281018898932, "loss": 2.4973, "step": 2160 }, { "epoch": 0.22223363813829242, "grad_norm": 0.12700457870960236, "learning_rate": 0.008886606409202958, "loss": 2.5174, "step": 2163 }, { "epoch": 0.2225418678721874, "grad_norm": 0.17155064642429352, "learning_rate": 0.008898931799506984, "loss": 2.4969, "step": 2166 }, { "epoch": 0.2228500976060824, "grad_norm": 0.13356278836727142, "learning_rate": 0.008911257189811011, "loss": 2.4876, "step": 2169 }, { "epoch": 0.2231583273399774, "grad_norm": 0.07805536687374115, "learning_rate": 0.008923582580115037, "loss": 2.5151, "step": 2172 }, { "epoch": 0.2234665570738724, "grad_norm": 0.10661714524030685, "learning_rate": 0.008935907970419064, "loss": 2.4607, "step": 2175 }, { "epoch": 0.22377478680776738, "grad_norm": 0.15095242857933044, "learning_rate": 0.008948233360723088, "loss": 2.5358, "step": 2178 }, { "epoch": 0.22408301654166238, "grad_norm": 0.11287077516317368, "learning_rate": 0.008960558751027116, "loss": 2.5289, "step": 2181 }, { "epoch": 0.22439124627555737, "grad_norm": 0.16408318281173706, "learning_rate": 0.008972884141331142, "loss": 2.5256, "step": 2184 }, { "epoch": 0.22469947600945236, "grad_norm": 0.1227622851729393, "learning_rate": 0.008985209531635169, "loss": 2.5091, "step": 2187 }, { "epoch": 0.22500770574334739, "grad_norm": 0.06549924612045288, "learning_rate": 0.008997534921939195, "loss": 2.4908, "step": 2190 }, { "epoch": 0.22531593547724238, "grad_norm": 0.09310626983642578, "learning_rate": 0.00900986031224322, "loss": 2.4903, "step": 2193 }, { "epoch": 0.22562416521113737, "grad_norm": 0.12637357413768768, "learning_rate": 0.009022185702547248, "loss": 2.5089, "step": 2196 }, { "epoch": 0.22593239494503237, "grad_norm": 0.1691301167011261, "learning_rate": 0.009034511092851274, "loss": 2.4984, "step": 2199 }, { "epoch": 0.22624062467892736, "grad_norm": 0.18173068761825562, "learning_rate": 0.0090468364831553, "loss": 2.4552, "step": 2202 }, { "epoch": 0.22654885441282235, "grad_norm": 0.19549600780010223, "learning_rate": 0.009059161873459327, "loss": 2.4642, "step": 2205 }, { "epoch": 0.22685708414671735, "grad_norm": 0.09038446098566055, "learning_rate": 0.009071487263763353, "loss": 2.5017, "step": 2208 }, { "epoch": 0.22716531388061234, "grad_norm": 0.07959726452827454, "learning_rate": 0.009083812654067379, "loss": 2.4934, "step": 2211 }, { "epoch": 0.22747354361450733, "grad_norm": 0.07991699874401093, "learning_rate": 0.009096138044371404, "loss": 2.498, "step": 2214 }, { "epoch": 0.22778177334840236, "grad_norm": 0.09022307395935059, "learning_rate": 0.009108463434675432, "loss": 2.4832, "step": 2217 }, { "epoch": 0.22809000308229735, "grad_norm": 0.11399543285369873, "learning_rate": 0.009120788824979457, "loss": 2.4929, "step": 2220 }, { "epoch": 0.22839823281619234, "grad_norm": 0.10349836200475693, "learning_rate": 0.009133114215283485, "loss": 2.4622, "step": 2223 }, { "epoch": 0.22870646255008734, "grad_norm": 0.17096632719039917, "learning_rate": 0.00914543960558751, "loss": 2.5103, "step": 2226 }, { "epoch": 0.22901469228398233, "grad_norm": 0.13803228735923767, "learning_rate": 0.009157764995891536, "loss": 2.5034, "step": 2229 }, { "epoch": 0.22932292201787732, "grad_norm": 0.16332487761974335, "learning_rate": 0.009170090386195564, "loss": 2.5051, "step": 2232 }, { "epoch": 0.22963115175177232, "grad_norm": 0.12147244811058044, "learning_rate": 0.00918241577649959, "loss": 2.4794, "step": 2235 }, { "epoch": 0.2299393814856673, "grad_norm": 0.08943907916545868, "learning_rate": 0.009194741166803615, "loss": 2.5331, "step": 2238 }, { "epoch": 0.2302476112195623, "grad_norm": 0.08069117367267609, "learning_rate": 0.009207066557107641, "loss": 2.4807, "step": 2241 }, { "epoch": 0.2305558409534573, "grad_norm": 0.11125557869672775, "learning_rate": 0.009219391947411669, "loss": 2.4567, "step": 2244 }, { "epoch": 0.23086407068735232, "grad_norm": 0.2825096547603607, "learning_rate": 0.009231717337715694, "loss": 2.5101, "step": 2247 }, { "epoch": 0.2311723004212473, "grad_norm": 0.10534384101629257, "learning_rate": 0.00924404272801972, "loss": 2.5272, "step": 2250 }, { "epoch": 0.2314805301551423, "grad_norm": 0.07159514725208282, "learning_rate": 0.009256368118323748, "loss": 2.4707, "step": 2253 }, { "epoch": 0.2317887598890373, "grad_norm": 0.06435802578926086, "learning_rate": 0.009268693508627773, "loss": 2.4788, "step": 2256 }, { "epoch": 0.2320969896229323, "grad_norm": 0.09402693063020706, "learning_rate": 0.0092810188989318, "loss": 2.4639, "step": 2259 }, { "epoch": 0.2324052193568273, "grad_norm": 0.18836408853530884, "learning_rate": 0.009293344289235825, "loss": 2.4747, "step": 2262 }, { "epoch": 0.23271344909072228, "grad_norm": 0.09705471992492676, "learning_rate": 0.009305669679539852, "loss": 2.5041, "step": 2265 }, { "epoch": 0.23302167882461727, "grad_norm": 0.09185091406106949, "learning_rate": 0.009317995069843878, "loss": 2.4625, "step": 2268 }, { "epoch": 0.23332990855851227, "grad_norm": 0.0848812386393547, "learning_rate": 0.009330320460147905, "loss": 2.4876, "step": 2271 }, { "epoch": 0.2336381382924073, "grad_norm": 0.07989475131034851, "learning_rate": 0.009342645850451931, "loss": 2.4697, "step": 2274 }, { "epoch": 0.23394636802630228, "grad_norm": 0.09660454094409943, "learning_rate": 0.009354971240755957, "loss": 2.4917, "step": 2277 }, { "epoch": 0.23425459776019728, "grad_norm": 0.09550273418426514, "learning_rate": 0.009367296631059984, "loss": 2.4806, "step": 2280 }, { "epoch": 0.23456282749409227, "grad_norm": 0.16650651395320892, "learning_rate": 0.00937962202136401, "loss": 2.4424, "step": 2283 }, { "epoch": 0.23487105722798726, "grad_norm": 0.1455817073583603, "learning_rate": 0.009391947411668036, "loss": 2.4907, "step": 2286 }, { "epoch": 0.23517928696188226, "grad_norm": 0.075865738093853, "learning_rate": 0.009404272801972062, "loss": 2.5004, "step": 2289 }, { "epoch": 0.23548751669577725, "grad_norm": 0.188491553068161, "learning_rate": 0.009416598192276089, "loss": 2.5111, "step": 2292 }, { "epoch": 0.23579574642967224, "grad_norm": 0.07567702233791351, "learning_rate": 0.009428923582580115, "loss": 2.4966, "step": 2295 }, { "epoch": 0.23610397616356724, "grad_norm": 0.0682358667254448, "learning_rate": 0.00944124897288414, "loss": 2.4781, "step": 2298 }, { "epoch": 0.23641220589746223, "grad_norm": 0.173895925283432, "learning_rate": 0.009453574363188168, "loss": 2.4471, "step": 2301 }, { "epoch": 0.23672043563135725, "grad_norm": 0.15088587999343872, "learning_rate": 0.009465899753492194, "loss": 2.4783, "step": 2304 }, { "epoch": 0.23702866536525224, "grad_norm": 0.09947361797094345, "learning_rate": 0.009478225143796221, "loss": 2.4757, "step": 2307 }, { "epoch": 0.23733689509914724, "grad_norm": 0.0709480568766594, "learning_rate": 0.009490550534100247, "loss": 2.4617, "step": 2310 }, { "epoch": 0.23764512483304223, "grad_norm": 0.11335324496030807, "learning_rate": 0.009502875924404273, "loss": 2.4506, "step": 2313 }, { "epoch": 0.23795335456693723, "grad_norm": 0.10329569876194, "learning_rate": 0.0095152013147083, "loss": 2.4444, "step": 2316 }, { "epoch": 0.23826158430083222, "grad_norm": 0.18935157358646393, "learning_rate": 0.009527526705012326, "loss": 2.4739, "step": 2319 }, { "epoch": 0.2385698140347272, "grad_norm": 0.10977230221033096, "learning_rate": 0.009539852095316352, "loss": 2.4849, "step": 2322 }, { "epoch": 0.2388780437686222, "grad_norm": 0.1623351126909256, "learning_rate": 0.009552177485620377, "loss": 2.4856, "step": 2325 }, { "epoch": 0.2391862735025172, "grad_norm": 0.12067209929227829, "learning_rate": 0.009564502875924405, "loss": 2.427, "step": 2328 }, { "epoch": 0.2394945032364122, "grad_norm": 0.12578649818897247, "learning_rate": 0.00957682826622843, "loss": 2.4719, "step": 2331 }, { "epoch": 0.23980273297030721, "grad_norm": 0.09442924708127975, "learning_rate": 0.009589153656532456, "loss": 2.475, "step": 2334 }, { "epoch": 0.2401109627042022, "grad_norm": 0.06693053990602493, "learning_rate": 0.009601479046836484, "loss": 2.4949, "step": 2337 }, { "epoch": 0.2404191924380972, "grad_norm": 0.09371168911457062, "learning_rate": 0.00961380443714051, "loss": 2.4611, "step": 2340 }, { "epoch": 0.2407274221719922, "grad_norm": 0.11009377986192703, "learning_rate": 0.009626129827444537, "loss": 2.4998, "step": 2343 }, { "epoch": 0.2410356519058872, "grad_norm": 0.08789053559303284, "learning_rate": 0.009638455217748561, "loss": 2.4891, "step": 2346 }, { "epoch": 0.24134388163978218, "grad_norm": 0.2513992488384247, "learning_rate": 0.009650780608052589, "loss": 2.4613, "step": 2349 }, { "epoch": 0.24165211137367718, "grad_norm": 0.09223336726427078, "learning_rate": 0.009663105998356614, "loss": 2.4874, "step": 2352 }, { "epoch": 0.24196034110757217, "grad_norm": 0.08941586315631866, "learning_rate": 0.009675431388660642, "loss": 2.4777, "step": 2355 }, { "epoch": 0.24226857084146716, "grad_norm": 0.09664765000343323, "learning_rate": 0.009687756778964668, "loss": 2.4728, "step": 2358 }, { "epoch": 0.24257680057536218, "grad_norm": 0.08079587668180466, "learning_rate": 0.009700082169268693, "loss": 2.4621, "step": 2361 }, { "epoch": 0.24288503030925718, "grad_norm": 0.07663597911596298, "learning_rate": 0.00971240755957272, "loss": 2.487, "step": 2364 }, { "epoch": 0.24319326004315217, "grad_norm": 0.07564109563827515, "learning_rate": 0.009724732949876747, "loss": 2.4123, "step": 2367 }, { "epoch": 0.24350148977704716, "grad_norm": 0.1025756299495697, "learning_rate": 0.009737058340180772, "loss": 2.4669, "step": 2370 }, { "epoch": 0.24380971951094216, "grad_norm": 0.1370251476764679, "learning_rate": 0.009749383730484798, "loss": 2.4664, "step": 2373 }, { "epoch": 0.24411794924483715, "grad_norm": 0.11926325410604477, "learning_rate": 0.009761709120788825, "loss": 2.4483, "step": 2376 }, { "epoch": 0.24442617897873214, "grad_norm": 0.16847510635852814, "learning_rate": 0.009774034511092851, "loss": 2.4421, "step": 2379 }, { "epoch": 0.24473440871262714, "grad_norm": 0.14343461394309998, "learning_rate": 0.009786359901396877, "loss": 2.452, "step": 2382 }, { "epoch": 0.24504263844652213, "grad_norm": 0.0658588707447052, "learning_rate": 0.009798685291700904, "loss": 2.4717, "step": 2385 }, { "epoch": 0.24535086818041713, "grad_norm": 0.09394209086894989, "learning_rate": 0.00981101068200493, "loss": 2.4467, "step": 2388 }, { "epoch": 0.24565909791431215, "grad_norm": 0.0717134177684784, "learning_rate": 0.009823336072308958, "loss": 2.4505, "step": 2391 }, { "epoch": 0.24596732764820714, "grad_norm": 0.07518400996923447, "learning_rate": 0.009835661462612983, "loss": 2.431, "step": 2394 }, { "epoch": 0.24627555738210213, "grad_norm": 0.10242413729429245, "learning_rate": 0.00984798685291701, "loss": 2.451, "step": 2397 }, { "epoch": 0.24658378711599713, "grad_norm": 0.11668457090854645, "learning_rate": 0.009860312243221035, "loss": 2.4574, "step": 2400 }, { "epoch": 0.24689201684989212, "grad_norm": 0.1074887290596962, "learning_rate": 0.009872637633525062, "loss": 2.4688, "step": 2403 }, { "epoch": 0.24720024658378711, "grad_norm": 0.143118217587471, "learning_rate": 0.009884963023829088, "loss": 2.4614, "step": 2406 }, { "epoch": 0.2475084763176821, "grad_norm": 0.08865509182214737, "learning_rate": 0.009897288414133114, "loss": 2.4768, "step": 2409 }, { "epoch": 0.2478167060515771, "grad_norm": 0.10735021531581879, "learning_rate": 0.009909613804437141, "loss": 2.457, "step": 2412 }, { "epoch": 0.2481249357854721, "grad_norm": 0.11766096949577332, "learning_rate": 0.009921939194741167, "loss": 2.4661, "step": 2415 }, { "epoch": 0.2484331655193671, "grad_norm": 0.11476657539606094, "learning_rate": 0.009934264585045193, "loss": 2.4488, "step": 2418 }, { "epoch": 0.2487413952532621, "grad_norm": 0.05828983336687088, "learning_rate": 0.00994658997534922, "loss": 2.4167, "step": 2421 }, { "epoch": 0.2490496249871571, "grad_norm": 0.05311143398284912, "learning_rate": 0.009958915365653246, "loss": 2.451, "step": 2424 }, { "epoch": 0.2493578547210521, "grad_norm": 0.14447921514511108, "learning_rate": 0.009971240755957273, "loss": 2.4448, "step": 2427 }, { "epoch": 0.2496660844549471, "grad_norm": 0.178679421544075, "learning_rate": 0.009983566146261297, "loss": 2.4577, "step": 2430 }, { "epoch": 0.24997431418884208, "grad_norm": 0.18707922101020813, "learning_rate": 0.009995891536565325, "loss": 2.4544, "step": 2433 }, { "epoch": 0.2502825439227371, "grad_norm": 0.11012792587280273, "learning_rate": 0.01, "loss": 2.4636, "step": 2436 }, { "epoch": 0.25059077365663207, "grad_norm": 0.1133418157696724, "learning_rate": 0.01, "loss": 2.4694, "step": 2439 }, { "epoch": 0.25089900339052706, "grad_norm": 0.09263787418603897, "learning_rate": 0.01, "loss": 2.4174, "step": 2442 }, { "epoch": 0.25120723312442206, "grad_norm": 0.07637537270784378, "learning_rate": 0.01, "loss": 2.4546, "step": 2445 }, { "epoch": 0.25151546285831705, "grad_norm": 0.05083318054676056, "learning_rate": 0.01, "loss": 2.4517, "step": 2448 }, { "epoch": 0.25182369259221205, "grad_norm": 0.11429949849843979, "learning_rate": 0.01, "loss": 2.3998, "step": 2451 }, { "epoch": 0.25213192232610704, "grad_norm": 0.0740060955286026, "learning_rate": 0.01, "loss": 2.4572, "step": 2454 }, { "epoch": 0.25244015206000203, "grad_norm": 0.23151956498622894, "learning_rate": 0.01, "loss": 2.4507, "step": 2457 }, { "epoch": 0.252748381793897, "grad_norm": 0.09557089954614639, "learning_rate": 0.01, "loss": 2.438, "step": 2460 }, { "epoch": 0.2530566115277921, "grad_norm": 0.06453042477369308, "learning_rate": 0.01, "loss": 2.4444, "step": 2463 }, { "epoch": 0.25336484126168707, "grad_norm": 0.06805883347988129, "learning_rate": 0.01, "loss": 2.4333, "step": 2466 }, { "epoch": 0.25367307099558206, "grad_norm": 0.12063002586364746, "learning_rate": 0.01, "loss": 2.4349, "step": 2469 }, { "epoch": 0.25398130072947706, "grad_norm": 0.12683679163455963, "learning_rate": 0.01, "loss": 2.4615, "step": 2472 }, { "epoch": 0.25428953046337205, "grad_norm": 0.19388514757156372, "learning_rate": 0.01, "loss": 2.4251, "step": 2475 }, { "epoch": 0.25459776019726704, "grad_norm": 0.15118692815303802, "learning_rate": 0.01, "loss": 2.4493, "step": 2478 }, { "epoch": 0.25490598993116204, "grad_norm": 0.0716528594493866, "learning_rate": 0.01, "loss": 2.4177, "step": 2481 }, { "epoch": 0.25521421966505703, "grad_norm": 0.06410454958677292, "learning_rate": 0.01, "loss": 2.4472, "step": 2484 }, { "epoch": 0.255522449398952, "grad_norm": 0.0613977424800396, "learning_rate": 0.01, "loss": 2.4374, "step": 2487 }, { "epoch": 0.255830679132847, "grad_norm": 0.18522503972053528, "learning_rate": 0.01, "loss": 2.4237, "step": 2490 }, { "epoch": 0.256138908866742, "grad_norm": 0.10789433866739273, "learning_rate": 0.01, "loss": 2.4035, "step": 2493 }, { "epoch": 0.256447138600637, "grad_norm": 0.17734338343143463, "learning_rate": 0.01, "loss": 2.4809, "step": 2496 }, { "epoch": 0.256755368334532, "grad_norm": 0.09952409565448761, "learning_rate": 0.01, "loss": 2.4586, "step": 2499 }, { "epoch": 0.257063598068427, "grad_norm": 0.15578734874725342, "learning_rate": 0.01, "loss": 2.4451, "step": 2502 }, { "epoch": 0.257371827802322, "grad_norm": 0.11684698611497879, "learning_rate": 0.01, "loss": 2.4422, "step": 2505 }, { "epoch": 0.257680057536217, "grad_norm": 0.06539366394281387, "learning_rate": 0.01, "loss": 2.4369, "step": 2508 }, { "epoch": 0.25798828727011197, "grad_norm": 0.15363268554210663, "learning_rate": 0.01, "loss": 2.4307, "step": 2511 }, { "epoch": 0.25829651700400696, "grad_norm": 0.07657501846551895, "learning_rate": 0.01, "loss": 2.4287, "step": 2514 }, { "epoch": 0.25860474673790196, "grad_norm": 0.11238528788089752, "learning_rate": 0.01, "loss": 2.415, "step": 2517 }, { "epoch": 0.25891297647179695, "grad_norm": 0.08362044394016266, "learning_rate": 0.01, "loss": 2.4603, "step": 2520 }, { "epoch": 0.259221206205692, "grad_norm": 0.07373514771461487, "learning_rate": 0.01, "loss": 2.3937, "step": 2523 }, { "epoch": 0.259529435939587, "grad_norm": 0.062842458486557, "learning_rate": 0.01, "loss": 2.4096, "step": 2526 }, { "epoch": 0.259837665673482, "grad_norm": 0.12551096081733704, "learning_rate": 0.01, "loss": 2.4379, "step": 2529 }, { "epoch": 0.260145895407377, "grad_norm": 0.06409156322479248, "learning_rate": 0.01, "loss": 2.4212, "step": 2532 }, { "epoch": 0.260454125141272, "grad_norm": 0.10057753324508667, "learning_rate": 0.01, "loss": 2.4349, "step": 2535 }, { "epoch": 0.26076235487516697, "grad_norm": 0.1575561910867691, "learning_rate": 0.01, "loss": 2.44, "step": 2538 }, { "epoch": 0.26107058460906196, "grad_norm": 0.25684165954589844, "learning_rate": 0.01, "loss": 2.4308, "step": 2541 }, { "epoch": 0.26137881434295696, "grad_norm": 0.07472192496061325, "learning_rate": 0.01, "loss": 2.4065, "step": 2544 }, { "epoch": 0.26168704407685195, "grad_norm": 0.060896482318639755, "learning_rate": 0.01, "loss": 2.4347, "step": 2547 }, { "epoch": 0.26199527381074694, "grad_norm": 0.12883131206035614, "learning_rate": 0.01, "loss": 2.42, "step": 2550 }, { "epoch": 0.26230350354464194, "grad_norm": 0.10772990435361862, "learning_rate": 0.01, "loss": 2.3982, "step": 2553 }, { "epoch": 0.26261173327853693, "grad_norm": 0.20955395698547363, "learning_rate": 0.01, "loss": 2.4204, "step": 2556 }, { "epoch": 0.2629199630124319, "grad_norm": 0.08120223879814148, "learning_rate": 0.01, "loss": 2.4192, "step": 2559 }, { "epoch": 0.2632281927463269, "grad_norm": 0.059099119156599045, "learning_rate": 0.01, "loss": 2.4252, "step": 2562 }, { "epoch": 0.2635364224802219, "grad_norm": 0.08729352802038193, "learning_rate": 0.01, "loss": 2.4227, "step": 2565 }, { "epoch": 0.2638446522141169, "grad_norm": 0.1920178234577179, "learning_rate": 0.01, "loss": 2.4017, "step": 2568 }, { "epoch": 0.2641528819480119, "grad_norm": 0.15997105836868286, "learning_rate": 0.01, "loss": 2.444, "step": 2571 }, { "epoch": 0.2644611116819069, "grad_norm": 0.12249890714883804, "learning_rate": 0.01, "loss": 2.3957, "step": 2574 }, { "epoch": 0.2647693414158019, "grad_norm": 0.05974414199590683, "learning_rate": 0.01, "loss": 2.421, "step": 2577 }, { "epoch": 0.26507757114969693, "grad_norm": 0.13711535930633545, "learning_rate": 0.01, "loss": 2.4234, "step": 2580 }, { "epoch": 0.2653858008835919, "grad_norm": 0.15437988936901093, "learning_rate": 0.01, "loss": 2.4216, "step": 2583 }, { "epoch": 0.2656940306174869, "grad_norm": 0.10766157507896423, "learning_rate": 0.01, "loss": 2.4086, "step": 2586 }, { "epoch": 0.2660022603513819, "grad_norm": 0.0736764669418335, "learning_rate": 0.01, "loss": 2.4227, "step": 2589 }, { "epoch": 0.2663104900852769, "grad_norm": 0.06279190629720688, "learning_rate": 0.01, "loss": 2.4371, "step": 2592 }, { "epoch": 0.2666187198191719, "grad_norm": 0.11150863766670227, "learning_rate": 0.01, "loss": 2.3941, "step": 2595 }, { "epoch": 0.2669269495530669, "grad_norm": 0.1527506411075592, "learning_rate": 0.01, "loss": 2.4287, "step": 2598 }, { "epoch": 0.2672351792869619, "grad_norm": 0.13321219384670258, "learning_rate": 0.01, "loss": 2.3995, "step": 2601 }, { "epoch": 0.2675434090208569, "grad_norm": 0.1157502606511116, "learning_rate": 0.01, "loss": 2.4284, "step": 2604 }, { "epoch": 0.2678516387547519, "grad_norm": 0.10027257353067398, "learning_rate": 0.01, "loss": 2.3877, "step": 2607 }, { "epoch": 0.26815986848864687, "grad_norm": 0.10909545421600342, "learning_rate": 0.01, "loss": 2.4134, "step": 2610 }, { "epoch": 0.26846809822254186, "grad_norm": 0.09810952842235565, "learning_rate": 0.01, "loss": 2.4231, "step": 2613 }, { "epoch": 0.26877632795643686, "grad_norm": 0.06906435638666153, "learning_rate": 0.01, "loss": 2.3989, "step": 2616 }, { "epoch": 0.26908455769033185, "grad_norm": 0.10627961158752441, "learning_rate": 0.01, "loss": 2.4333, "step": 2619 }, { "epoch": 0.26939278742422684, "grad_norm": 0.10462147742509842, "learning_rate": 0.01, "loss": 2.394, "step": 2622 }, { "epoch": 0.26970101715812184, "grad_norm": 0.10885953158140182, "learning_rate": 0.01, "loss": 2.4172, "step": 2625 }, { "epoch": 0.27000924689201683, "grad_norm": 0.0981958881020546, "learning_rate": 0.01, "loss": 2.4112, "step": 2628 }, { "epoch": 0.2703174766259118, "grad_norm": 0.14177650213241577, "learning_rate": 0.01, "loss": 2.3748, "step": 2631 }, { "epoch": 0.2706257063598068, "grad_norm": 0.06374615430831909, "learning_rate": 0.01, "loss": 2.4054, "step": 2634 }, { "epoch": 0.27093393609370187, "grad_norm": 0.23363849520683289, "learning_rate": 0.01, "loss": 2.4194, "step": 2637 }, { "epoch": 0.27124216582759686, "grad_norm": 0.10294153541326523, "learning_rate": 0.01, "loss": 2.384, "step": 2640 }, { "epoch": 0.27155039556149185, "grad_norm": 0.17200984060764313, "learning_rate": 0.01, "loss": 2.4122, "step": 2643 }, { "epoch": 0.27185862529538685, "grad_norm": 0.06513970345258713, "learning_rate": 0.01, "loss": 2.4152, "step": 2646 }, { "epoch": 0.27216685502928184, "grad_norm": 0.08533628284931183, "learning_rate": 0.01, "loss": 2.4508, "step": 2649 }, { "epoch": 0.27247508476317683, "grad_norm": 0.07299966365098953, "learning_rate": 0.01, "loss": 2.4091, "step": 2652 }, { "epoch": 0.2727833144970718, "grad_norm": 0.06617329269647598, "learning_rate": 0.01, "loss": 2.4, "step": 2655 }, { "epoch": 0.2730915442309668, "grad_norm": 0.07062381505966187, "learning_rate": 0.01, "loss": 2.4002, "step": 2658 }, { "epoch": 0.2733997739648618, "grad_norm": 0.11162712424993515, "learning_rate": 0.01, "loss": 2.386, "step": 2661 }, { "epoch": 0.2737080036987568, "grad_norm": 0.07827174663543701, "learning_rate": 0.01, "loss": 2.4111, "step": 2664 }, { "epoch": 0.2740162334326518, "grad_norm": 0.07248109579086304, "learning_rate": 0.01, "loss": 2.3968, "step": 2667 }, { "epoch": 0.2743244631665468, "grad_norm": 0.1251075118780136, "learning_rate": 0.01, "loss": 2.4216, "step": 2670 }, { "epoch": 0.2746326929004418, "grad_norm": 0.1280512660741806, "learning_rate": 0.01, "loss": 2.4233, "step": 2673 }, { "epoch": 0.2749409226343368, "grad_norm": 0.06290891766548157, "learning_rate": 0.01, "loss": 2.412, "step": 2676 }, { "epoch": 0.2752491523682318, "grad_norm": 0.09324091672897339, "learning_rate": 0.01, "loss": 2.4025, "step": 2679 }, { "epoch": 0.27555738210212677, "grad_norm": 0.06253890693187714, "learning_rate": 0.01, "loss": 2.4197, "step": 2682 }, { "epoch": 0.27586561183602176, "grad_norm": 0.10279545187950134, "learning_rate": 0.01, "loss": 2.4099, "step": 2685 }, { "epoch": 0.27617384156991676, "grad_norm": 0.07942310720682144, "learning_rate": 0.01, "loss": 2.4052, "step": 2688 }, { "epoch": 0.27648207130381175, "grad_norm": 0.10373161733150482, "learning_rate": 0.01, "loss": 2.3899, "step": 2691 }, { "epoch": 0.2767903010377068, "grad_norm": 0.312575101852417, "learning_rate": 0.01, "loss": 2.4243, "step": 2694 }, { "epoch": 0.2770985307716018, "grad_norm": 0.07417728751897812, "learning_rate": 0.01, "loss": 2.3604, "step": 2697 }, { "epoch": 0.2774067605054968, "grad_norm": 0.09007294476032257, "learning_rate": 0.01, "loss": 2.3863, "step": 2700 }, { "epoch": 0.2777149902393918, "grad_norm": 0.10452757775783539, "learning_rate": 0.01, "loss": 2.41, "step": 2703 }, { "epoch": 0.2780232199732868, "grad_norm": 0.09276364743709564, "learning_rate": 0.01, "loss": 2.3878, "step": 2706 }, { "epoch": 0.27833144970718177, "grad_norm": 0.08949960023164749, "learning_rate": 0.01, "loss": 2.3823, "step": 2709 }, { "epoch": 0.27863967944107676, "grad_norm": 0.0589129813015461, "learning_rate": 0.01, "loss": 2.401, "step": 2712 }, { "epoch": 0.27894790917497175, "grad_norm": 0.08298425376415253, "learning_rate": 0.01, "loss": 2.4, "step": 2715 }, { "epoch": 0.27925613890886675, "grad_norm": 0.07719019800424576, "learning_rate": 0.01, "loss": 2.3726, "step": 2718 }, { "epoch": 0.27956436864276174, "grad_norm": 0.09369128197431564, "learning_rate": 0.01, "loss": 2.3893, "step": 2721 }, { "epoch": 0.27987259837665673, "grad_norm": 0.11461931467056274, "learning_rate": 0.01, "loss": 2.4017, "step": 2724 }, { "epoch": 0.2801808281105517, "grad_norm": 0.050078991800546646, "learning_rate": 0.01, "loss": 2.3852, "step": 2727 }, { "epoch": 0.2804890578444467, "grad_norm": 0.08188966661691666, "learning_rate": 0.01, "loss": 2.3469, "step": 2730 }, { "epoch": 0.2807972875783417, "grad_norm": 0.0805756077170372, "learning_rate": 0.01, "loss": 2.3632, "step": 2733 }, { "epoch": 0.2811055173122367, "grad_norm": 0.07377249747514725, "learning_rate": 0.01, "loss": 2.3852, "step": 2736 }, { "epoch": 0.2814137470461317, "grad_norm": 0.17040085792541504, "learning_rate": 0.01, "loss": 2.3904, "step": 2739 }, { "epoch": 0.2817219767800267, "grad_norm": 0.1419583261013031, "learning_rate": 0.01, "loss": 2.3735, "step": 2742 }, { "epoch": 0.2820302065139217, "grad_norm": 0.13182134926319122, "learning_rate": 0.01, "loss": 2.3904, "step": 2745 }, { "epoch": 0.2823384362478167, "grad_norm": 0.1058223620057106, "learning_rate": 0.01, "loss": 2.3922, "step": 2748 }, { "epoch": 0.2826466659817117, "grad_norm": 0.08037062734365463, "learning_rate": 0.01, "loss": 2.3692, "step": 2751 }, { "epoch": 0.2829548957156067, "grad_norm": 0.10247037559747696, "learning_rate": 0.01, "loss": 2.3712, "step": 2754 }, { "epoch": 0.2832631254495017, "grad_norm": 0.09925279021263123, "learning_rate": 0.01, "loss": 2.3632, "step": 2757 }, { "epoch": 0.2835713551833967, "grad_norm": 0.05111562833189964, "learning_rate": 0.01, "loss": 2.3622, "step": 2760 }, { "epoch": 0.2838795849172917, "grad_norm": 0.060480840504169464, "learning_rate": 0.01, "loss": 2.3592, "step": 2763 }, { "epoch": 0.2841878146511867, "grad_norm": 0.13488496840000153, "learning_rate": 0.01, "loss": 2.3822, "step": 2766 }, { "epoch": 0.2844960443850817, "grad_norm": 0.08369171619415283, "learning_rate": 0.01, "loss": 2.3922, "step": 2769 }, { "epoch": 0.2848042741189767, "grad_norm": 0.19474861025810242, "learning_rate": 0.01, "loss": 2.387, "step": 2772 }, { "epoch": 0.2851125038528717, "grad_norm": 0.17801512777805328, "learning_rate": 0.01, "loss": 2.3745, "step": 2775 }, { "epoch": 0.2854207335867667, "grad_norm": 0.0658038854598999, "learning_rate": 0.01, "loss": 2.3857, "step": 2778 }, { "epoch": 0.28572896332066167, "grad_norm": 0.0510118305683136, "learning_rate": 0.01, "loss": 2.3735, "step": 2781 }, { "epoch": 0.28603719305455666, "grad_norm": 0.0649714320898056, "learning_rate": 0.01, "loss": 2.4002, "step": 2784 }, { "epoch": 0.28634542278845165, "grad_norm": 0.11462211608886719, "learning_rate": 0.01, "loss": 2.3642, "step": 2787 }, { "epoch": 0.28665365252234665, "grad_norm": 0.0745900496840477, "learning_rate": 0.01, "loss": 2.4058, "step": 2790 }, { "epoch": 0.28696188225624164, "grad_norm": 0.2475040704011917, "learning_rate": 0.01, "loss": 2.3778, "step": 2793 }, { "epoch": 0.28727011199013663, "grad_norm": 0.08792129158973694, "learning_rate": 0.01, "loss": 2.3932, "step": 2796 }, { "epoch": 0.2875783417240316, "grad_norm": 0.04952983185648918, "learning_rate": 0.01, "loss": 2.3631, "step": 2799 }, { "epoch": 0.2878865714579266, "grad_norm": 0.053665559738874435, "learning_rate": 0.01, "loss": 2.3673, "step": 2802 }, { "epoch": 0.2881948011918216, "grad_norm": 0.0579262301325798, "learning_rate": 0.01, "loss": 2.3234, "step": 2805 }, { "epoch": 0.2885030309257166, "grad_norm": 0.13837358355522156, "learning_rate": 0.01, "loss": 2.3854, "step": 2808 }, { "epoch": 0.28881126065961166, "grad_norm": 0.09924750030040741, "learning_rate": 0.01, "loss": 2.3819, "step": 2811 }, { "epoch": 0.28911949039350665, "grad_norm": 0.14742402732372284, "learning_rate": 0.01, "loss": 2.3853, "step": 2814 }, { "epoch": 0.28942772012740164, "grad_norm": 0.11731177568435669, "learning_rate": 0.01, "loss": 2.4082, "step": 2817 }, { "epoch": 0.28973594986129664, "grad_norm": 0.16812686622142792, "learning_rate": 0.01, "loss": 2.3855, "step": 2820 }, { "epoch": 0.29004417959519163, "grad_norm": 0.06864415854215622, "learning_rate": 0.01, "loss": 2.3911, "step": 2823 }, { "epoch": 0.2903524093290866, "grad_norm": 0.050597239285707474, "learning_rate": 0.01, "loss": 2.3627, "step": 2826 }, { "epoch": 0.2906606390629816, "grad_norm": 0.06927742809057236, "learning_rate": 0.01, "loss": 2.3653, "step": 2829 }, { "epoch": 0.2909688687968766, "grad_norm": 0.049216922372579575, "learning_rate": 0.01, "loss": 2.3527, "step": 2832 }, { "epoch": 0.2912770985307716, "grad_norm": 0.06790090352296829, "learning_rate": 0.01, "loss": 2.4087, "step": 2835 }, { "epoch": 0.2915853282646666, "grad_norm": 0.14112398028373718, "learning_rate": 0.01, "loss": 2.3777, "step": 2838 }, { "epoch": 0.2918935579985616, "grad_norm": 0.07459170371294022, "learning_rate": 0.01, "loss": 2.3776, "step": 2841 }, { "epoch": 0.2922017877324566, "grad_norm": 0.05480146035552025, "learning_rate": 0.01, "loss": 2.3831, "step": 2844 }, { "epoch": 0.2925100174663516, "grad_norm": 0.11372058093547821, "learning_rate": 0.01, "loss": 2.3667, "step": 2847 }, { "epoch": 0.2928182472002466, "grad_norm": 0.05589181184768677, "learning_rate": 0.01, "loss": 2.3744, "step": 2850 }, { "epoch": 0.29312647693414157, "grad_norm": 0.10505107790231705, "learning_rate": 0.01, "loss": 2.3461, "step": 2853 }, { "epoch": 0.29343470666803656, "grad_norm": 0.06241190806031227, "learning_rate": 0.01, "loss": 2.3616, "step": 2856 }, { "epoch": 0.29374293640193155, "grad_norm": 0.2687353193759918, "learning_rate": 0.01, "loss": 2.3728, "step": 2859 }, { "epoch": 0.29405116613582655, "grad_norm": 0.13569511473178864, "learning_rate": 0.01, "loss": 2.3758, "step": 2862 }, { "epoch": 0.29435939586972154, "grad_norm": 0.08852502703666687, "learning_rate": 0.01, "loss": 2.3805, "step": 2865 }, { "epoch": 0.2946676256036166, "grad_norm": 0.0690246969461441, "learning_rate": 0.01, "loss": 2.3694, "step": 2868 }, { "epoch": 0.2949758553375116, "grad_norm": 0.13508114218711853, "learning_rate": 0.01, "loss": 2.3626, "step": 2871 }, { "epoch": 0.2952840850714066, "grad_norm": 0.06574945896863937, "learning_rate": 0.01, "loss": 2.3661, "step": 2874 }, { "epoch": 0.29559231480530157, "grad_norm": 0.08492054790258408, "learning_rate": 0.01, "loss": 2.3737, "step": 2877 }, { "epoch": 0.29590054453919656, "grad_norm": 0.11930177360773087, "learning_rate": 0.01, "loss": 2.3684, "step": 2880 }, { "epoch": 0.29620877427309156, "grad_norm": 0.06913982331752777, "learning_rate": 0.01, "loss": 2.3597, "step": 2883 }, { "epoch": 0.29651700400698655, "grad_norm": 0.1508978009223938, "learning_rate": 0.01, "loss": 2.3807, "step": 2886 }, { "epoch": 0.29682523374088154, "grad_norm": 0.059416841715574265, "learning_rate": 0.01, "loss": 2.3672, "step": 2889 }, { "epoch": 0.29713346347477654, "grad_norm": 0.07864934206008911, "learning_rate": 0.01, "loss": 2.3542, "step": 2892 }, { "epoch": 0.29744169320867153, "grad_norm": 0.15172207355499268, "learning_rate": 0.01, "loss": 2.3816, "step": 2895 }, { "epoch": 0.2977499229425665, "grad_norm": 0.08946362882852554, "learning_rate": 0.01, "loss": 2.3854, "step": 2898 }, { "epoch": 0.2980581526764615, "grad_norm": 0.06231836602091789, "learning_rate": 0.01, "loss": 2.3803, "step": 2901 }, { "epoch": 0.2983663824103565, "grad_norm": 0.06673764437437057, "learning_rate": 0.01, "loss": 2.3506, "step": 2904 }, { "epoch": 0.2986746121442515, "grad_norm": 0.11514609307050705, "learning_rate": 0.01, "loss": 2.3345, "step": 2907 }, { "epoch": 0.2989828418781465, "grad_norm": 0.05702753737568855, "learning_rate": 0.01, "loss": 2.353, "step": 2910 }, { "epoch": 0.2992910716120415, "grad_norm": 0.09202984720468521, "learning_rate": 0.01, "loss": 2.3978, "step": 2913 }, { "epoch": 0.2995993013459365, "grad_norm": 0.09088042378425598, "learning_rate": 0.01, "loss": 2.3508, "step": 2916 }, { "epoch": 0.2999075310798315, "grad_norm": 0.09106214344501495, "learning_rate": 0.01, "loss": 2.3695, "step": 2919 }, { "epoch": 0.3002157608137265, "grad_norm": 0.12793834507465363, "learning_rate": 0.01, "loss": 2.3585, "step": 2922 }, { "epoch": 0.30052399054762147, "grad_norm": 0.16437458992004395, "learning_rate": 0.01, "loss": 2.3708, "step": 2925 }, { "epoch": 0.3008322202815165, "grad_norm": 0.10168170928955078, "learning_rate": 0.01, "loss": 2.3839, "step": 2928 }, { "epoch": 0.3011404500154115, "grad_norm": 0.12716282904148102, "learning_rate": 0.01, "loss": 2.3653, "step": 2931 }, { "epoch": 0.3014486797493065, "grad_norm": 0.05094976723194122, "learning_rate": 0.01, "loss": 2.3315, "step": 2934 }, { "epoch": 0.3017569094832015, "grad_norm": 0.11750750988721848, "learning_rate": 0.01, "loss": 2.3544, "step": 2937 }, { "epoch": 0.3020651392170965, "grad_norm": 0.0688977912068367, "learning_rate": 0.01, "loss": 2.3485, "step": 2940 }, { "epoch": 0.3023733689509915, "grad_norm": 0.09537909924983978, "learning_rate": 0.01, "loss": 2.3357, "step": 2943 }, { "epoch": 0.3026815986848865, "grad_norm": 0.15028056502342224, "learning_rate": 0.01, "loss": 2.3029, "step": 2946 }, { "epoch": 0.30298982841878147, "grad_norm": 0.2069140523672104, "learning_rate": 0.01, "loss": 2.3658, "step": 2949 }, { "epoch": 0.30329805815267646, "grad_norm": 0.04774792492389679, "learning_rate": 0.01, "loss": 2.3488, "step": 2952 }, { "epoch": 0.30360628788657146, "grad_norm": 0.04033259302377701, "learning_rate": 0.01, "loss": 2.3536, "step": 2955 }, { "epoch": 0.30391451762046645, "grad_norm": 0.04587483033537865, "learning_rate": 0.01, "loss": 2.3377, "step": 2958 }, { "epoch": 0.30422274735436144, "grad_norm": 0.08392881602048874, "learning_rate": 0.01, "loss": 2.3323, "step": 2961 }, { "epoch": 0.30453097708825644, "grad_norm": 0.16665025055408478, "learning_rate": 0.01, "loss": 2.3763, "step": 2964 }, { "epoch": 0.30483920682215143, "grad_norm": 0.19268077611923218, "learning_rate": 0.01, "loss": 2.3545, "step": 2967 }, { "epoch": 0.3051474365560464, "grad_norm": 0.14428319036960602, "learning_rate": 0.01, "loss": 2.3481, "step": 2970 }, { "epoch": 0.3054556662899414, "grad_norm": 0.08958342671394348, "learning_rate": 0.01, "loss": 2.3704, "step": 2973 }, { "epoch": 0.3057638960238364, "grad_norm": 0.06964152306318283, "learning_rate": 0.01, "loss": 2.3649, "step": 2976 }, { "epoch": 0.3060721257577314, "grad_norm": 0.1336866170167923, "learning_rate": 0.01, "loss": 2.3426, "step": 2979 }, { "epoch": 0.3063803554916264, "grad_norm": 0.06913724541664124, "learning_rate": 0.01, "loss": 2.363, "step": 2982 }, { "epoch": 0.30668858522552145, "grad_norm": 0.0705854743719101, "learning_rate": 0.01, "loss": 2.327, "step": 2985 }, { "epoch": 0.30699681495941644, "grad_norm": 0.06596222519874573, "learning_rate": 0.01, "loss": 2.3669, "step": 2988 }, { "epoch": 0.30730504469331144, "grad_norm": 0.12716993689537048, "learning_rate": 0.01, "loss": 2.3395, "step": 2991 }, { "epoch": 0.30761327442720643, "grad_norm": 0.09933049976825714, "learning_rate": 0.01, "loss": 2.3532, "step": 2994 }, { "epoch": 0.3079215041611014, "grad_norm": 0.19280697405338287, "learning_rate": 0.01, "loss": 2.3513, "step": 2997 }, { "epoch": 0.3082297338949964, "grad_norm": 0.08448618650436401, "learning_rate": 0.01, "loss": 2.3828, "step": 3000 }, { "epoch": 0.3085379636288914, "grad_norm": 0.14882349967956543, "learning_rate": 0.01, "loss": 2.3398, "step": 3003 }, { "epoch": 0.3088461933627864, "grad_norm": 0.08360068500041962, "learning_rate": 0.01, "loss": 2.3414, "step": 3006 }, { "epoch": 0.3091544230966814, "grad_norm": 0.1378074288368225, "learning_rate": 0.01, "loss": 2.3461, "step": 3009 }, { "epoch": 0.3094626528305764, "grad_norm": 0.13160692155361176, "learning_rate": 0.01, "loss": 2.3517, "step": 3012 }, { "epoch": 0.3097708825644714, "grad_norm": 0.0702040046453476, "learning_rate": 0.01, "loss": 2.3524, "step": 3015 }, { "epoch": 0.3100791122983664, "grad_norm": 0.06959223002195358, "learning_rate": 0.01, "loss": 2.3398, "step": 3018 }, { "epoch": 0.31038734203226137, "grad_norm": 0.10830830782651901, "learning_rate": 0.01, "loss": 2.3437, "step": 3021 }, { "epoch": 0.31069557176615636, "grad_norm": 0.09298605471849442, "learning_rate": 0.01, "loss": 2.3473, "step": 3024 }, { "epoch": 0.31100380150005136, "grad_norm": 0.06620427966117859, "learning_rate": 0.01, "loss": 2.3341, "step": 3027 }, { "epoch": 0.31131203123394635, "grad_norm": 0.21722812950611115, "learning_rate": 0.01, "loss": 2.3281, "step": 3030 }, { "epoch": 0.31162026096784134, "grad_norm": 0.1475544422864914, "learning_rate": 0.01, "loss": 2.3383, "step": 3033 }, { "epoch": 0.31192849070173634, "grad_norm": 0.13449987769126892, "learning_rate": 0.01, "loss": 2.314, "step": 3036 }, { "epoch": 0.31223672043563133, "grad_norm": 0.06219559907913208, "learning_rate": 0.01, "loss": 2.3526, "step": 3039 }, { "epoch": 0.3125449501695264, "grad_norm": 0.05337538942694664, "learning_rate": 0.01, "loss": 2.3386, "step": 3042 }, { "epoch": 0.3128531799034214, "grad_norm": 0.11457488685846329, "learning_rate": 0.01, "loss": 2.3261, "step": 3045 }, { "epoch": 0.31316140963731637, "grad_norm": 0.23809069395065308, "learning_rate": 0.01, "loss": 2.3411, "step": 3048 }, { "epoch": 0.31346963937121136, "grad_norm": 0.11100046336650848, "learning_rate": 0.01, "loss": 2.3269, "step": 3051 }, { "epoch": 0.31377786910510636, "grad_norm": 0.05229029804468155, "learning_rate": 0.01, "loss": 2.3339, "step": 3054 }, { "epoch": 0.31408609883900135, "grad_norm": 0.05956039950251579, "learning_rate": 0.01, "loss": 2.3566, "step": 3057 }, { "epoch": 0.31439432857289634, "grad_norm": 0.13084881007671356, "learning_rate": 0.01, "loss": 2.332, "step": 3060 }, { "epoch": 0.31470255830679134, "grad_norm": 0.13889305293560028, "learning_rate": 0.01, "loss": 2.3118, "step": 3063 }, { "epoch": 0.31501078804068633, "grad_norm": 0.10443049669265747, "learning_rate": 0.01, "loss": 2.3246, "step": 3066 }, { "epoch": 0.3153190177745813, "grad_norm": 0.04321267828345299, "learning_rate": 0.01, "loss": 2.3353, "step": 3069 }, { "epoch": 0.3156272475084763, "grad_norm": 0.046873319894075394, "learning_rate": 0.01, "loss": 2.3144, "step": 3072 }, { "epoch": 0.3159354772423713, "grad_norm": 0.06548158824443817, "learning_rate": 0.01, "loss": 2.3285, "step": 3075 }, { "epoch": 0.3162437069762663, "grad_norm": 0.19105824828147888, "learning_rate": 0.01, "loss": 2.349, "step": 3078 }, { "epoch": 0.3165519367101613, "grad_norm": 0.14477142691612244, "learning_rate": 0.01, "loss": 2.3074, "step": 3081 }, { "epoch": 0.3168601664440563, "grad_norm": 0.08536936342716217, "learning_rate": 0.01, "loss": 2.3462, "step": 3084 }, { "epoch": 0.3171683961779513, "grad_norm": 0.0595535933971405, "learning_rate": 0.01, "loss": 2.3522, "step": 3087 }, { "epoch": 0.3174766259118463, "grad_norm": 0.058548733592033386, "learning_rate": 0.01, "loss": 2.3314, "step": 3090 }, { "epoch": 0.31778485564574127, "grad_norm": 0.04651311784982681, "learning_rate": 0.01, "loss": 2.3058, "step": 3093 }, { "epoch": 0.31809308537963626, "grad_norm": 0.0516805462539196, "learning_rate": 0.01, "loss": 2.3243, "step": 3096 }, { "epoch": 0.31840131511353126, "grad_norm": 0.16851970553398132, "learning_rate": 0.01, "loss": 2.3315, "step": 3099 }, { "epoch": 0.3187095448474263, "grad_norm": 0.08350600302219391, "learning_rate": 0.01, "loss": 2.315, "step": 3102 }, { "epoch": 0.3190177745813213, "grad_norm": 0.08899964392185211, "learning_rate": 0.01, "loss": 2.3218, "step": 3105 }, { "epoch": 0.3193260043152163, "grad_norm": 0.2424800843000412, "learning_rate": 0.01, "loss": 2.3207, "step": 3108 }, { "epoch": 0.3196342340491113, "grad_norm": 0.213782399892807, "learning_rate": 0.01, "loss": 2.3728, "step": 3111 }, { "epoch": 0.3199424637830063, "grad_norm": 0.15629780292510986, "learning_rate": 0.01, "loss": 2.3453, "step": 3114 }, { "epoch": 0.3202506935169013, "grad_norm": 0.06920924782752991, "learning_rate": 0.01, "loss": 2.3111, "step": 3117 }, { "epoch": 0.32055892325079627, "grad_norm": 0.04514181613922119, "learning_rate": 0.01, "loss": 2.33, "step": 3120 }, { "epoch": 0.32086715298469126, "grad_norm": 0.05500979721546173, "learning_rate": 0.01, "loss": 2.3078, "step": 3123 }, { "epoch": 0.32117538271858626, "grad_norm": 0.09148071706295013, "learning_rate": 0.01, "loss": 2.3457, "step": 3126 }, { "epoch": 0.32148361245248125, "grad_norm": 0.10582035779953003, "learning_rate": 0.01, "loss": 2.3114, "step": 3129 }, { "epoch": 0.32179184218637624, "grad_norm": 0.1557345986366272, "learning_rate": 0.01, "loss": 2.3334, "step": 3132 }, { "epoch": 0.32210007192027124, "grad_norm": 0.11304829269647598, "learning_rate": 0.01, "loss": 2.2797, "step": 3135 }, { "epoch": 0.32240830165416623, "grad_norm": 0.08236223459243774, "learning_rate": 0.01, "loss": 2.3357, "step": 3138 }, { "epoch": 0.3227165313880612, "grad_norm": 0.09718946367502213, "learning_rate": 0.01, "loss": 2.3096, "step": 3141 }, { "epoch": 0.3230247611219562, "grad_norm": 0.07455772161483765, "learning_rate": 0.01, "loss": 2.3127, "step": 3144 }, { "epoch": 0.3233329908558512, "grad_norm": 0.0556890033185482, "learning_rate": 0.01, "loss": 2.3088, "step": 3147 }, { "epoch": 0.3236412205897462, "grad_norm": 0.07595494389533997, "learning_rate": 0.01, "loss": 2.3159, "step": 3150 }, { "epoch": 0.3239494503236412, "grad_norm": 0.07064896821975708, "learning_rate": 0.01, "loss": 2.3336, "step": 3153 }, { "epoch": 0.3242576800575362, "grad_norm": 0.06646276265382767, "learning_rate": 0.01, "loss": 2.324, "step": 3156 }, { "epoch": 0.32456590979143124, "grad_norm": 0.08837945014238358, "learning_rate": 0.01, "loss": 2.3191, "step": 3159 }, { "epoch": 0.32487413952532623, "grad_norm": 0.13228796422481537, "learning_rate": 0.01, "loss": 2.3231, "step": 3162 }, { "epoch": 0.3251823692592212, "grad_norm": 0.1080455407500267, "learning_rate": 0.01, "loss": 2.3341, "step": 3165 }, { "epoch": 0.3254905989931162, "grad_norm": 0.1073957234621048, "learning_rate": 0.01, "loss": 2.3237, "step": 3168 }, { "epoch": 0.3257988287270112, "grad_norm": 0.12472347915172577, "learning_rate": 0.01, "loss": 2.3315, "step": 3171 }, { "epoch": 0.3261070584609062, "grad_norm": 0.09123571217060089, "learning_rate": 0.01, "loss": 2.3588, "step": 3174 }, { "epoch": 0.3264152881948012, "grad_norm": 0.07830306142568588, "learning_rate": 0.01, "loss": 2.3273, "step": 3177 }, { "epoch": 0.3267235179286962, "grad_norm": 0.11552650481462479, "learning_rate": 0.01, "loss": 2.3407, "step": 3180 }, { "epoch": 0.3270317476625912, "grad_norm": 0.13251489400863647, "learning_rate": 0.01, "loss": 2.3241, "step": 3183 }, { "epoch": 0.3273399773964862, "grad_norm": 0.12775808572769165, "learning_rate": 0.01, "loss": 2.331, "step": 3186 }, { "epoch": 0.3276482071303812, "grad_norm": 0.12069859355688095, "learning_rate": 0.01, "loss": 2.3486, "step": 3189 }, { "epoch": 0.32795643686427617, "grad_norm": 0.059109434485435486, "learning_rate": 0.01, "loss": 2.2969, "step": 3192 }, { "epoch": 0.32826466659817116, "grad_norm": 0.12731850147247314, "learning_rate": 0.01, "loss": 2.3269, "step": 3195 }, { "epoch": 0.32857289633206616, "grad_norm": 0.15247757732868195, "learning_rate": 0.01, "loss": 2.3312, "step": 3198 }, { "epoch": 0.32888112606596115, "grad_norm": 0.128463476896286, "learning_rate": 0.01, "loss": 2.3275, "step": 3201 }, { "epoch": 0.32918935579985614, "grad_norm": 0.09406638145446777, "learning_rate": 0.01, "loss": 2.3205, "step": 3204 }, { "epoch": 0.32949758553375114, "grad_norm": 0.10524141043424606, "learning_rate": 0.01, "loss": 2.3423, "step": 3207 }, { "epoch": 0.32980581526764613, "grad_norm": 0.11357913911342621, "learning_rate": 0.01, "loss": 2.3071, "step": 3210 }, { "epoch": 0.3301140450015411, "grad_norm": 0.06979521363973618, "learning_rate": 0.01, "loss": 2.3319, "step": 3213 }, { "epoch": 0.33042227473543617, "grad_norm": 0.07000034302473068, "learning_rate": 0.01, "loss": 2.3523, "step": 3216 }, { "epoch": 0.33073050446933117, "grad_norm": 0.07495003193616867, "learning_rate": 0.01, "loss": 2.305, "step": 3219 }, { "epoch": 0.33103873420322616, "grad_norm": 0.07131810486316681, "learning_rate": 0.01, "loss": 2.2896, "step": 3222 }, { "epoch": 0.33134696393712115, "grad_norm": 0.051389019936323166, "learning_rate": 0.01, "loss": 2.2974, "step": 3225 }, { "epoch": 0.33165519367101615, "grad_norm": 0.05159701779484749, "learning_rate": 0.01, "loss": 2.3344, "step": 3228 }, { "epoch": 0.33196342340491114, "grad_norm": 0.07632975280284882, "learning_rate": 0.01, "loss": 2.3091, "step": 3231 }, { "epoch": 0.33227165313880613, "grad_norm": 0.08053800463676453, "learning_rate": 0.01, "loss": 2.298, "step": 3234 }, { "epoch": 0.3325798828727011, "grad_norm": 0.1371622234582901, "learning_rate": 0.01, "loss": 2.3095, "step": 3237 }, { "epoch": 0.3328881126065961, "grad_norm": 0.11367069184780121, "learning_rate": 0.01, "loss": 2.3212, "step": 3240 }, { "epoch": 0.3331963423404911, "grad_norm": 0.13252900540828705, "learning_rate": 0.01, "loss": 2.3238, "step": 3243 }, { "epoch": 0.3335045720743861, "grad_norm": 0.15517258644104004, "learning_rate": 0.01, "loss": 2.3263, "step": 3246 }, { "epoch": 0.3338128018082811, "grad_norm": 0.14029370248317719, "learning_rate": 0.01, "loss": 2.3457, "step": 3249 }, { "epoch": 0.3341210315421761, "grad_norm": 0.105759397149086, "learning_rate": 0.01, "loss": 2.3008, "step": 3252 }, { "epoch": 0.3344292612760711, "grad_norm": 0.04762979596853256, "learning_rate": 0.01, "loss": 2.3306, "step": 3255 }, { "epoch": 0.3347374910099661, "grad_norm": 0.12065446376800537, "learning_rate": 0.01, "loss": 2.2904, "step": 3258 }, { "epoch": 0.3350457207438611, "grad_norm": 0.08886688947677612, "learning_rate": 0.01, "loss": 2.3243, "step": 3261 }, { "epoch": 0.33535395047775607, "grad_norm": 0.08021339774131775, "learning_rate": 0.01, "loss": 2.3313, "step": 3264 }, { "epoch": 0.33566218021165106, "grad_norm": 0.04490290582180023, "learning_rate": 0.01, "loss": 2.2888, "step": 3267 }, { "epoch": 0.33597040994554606, "grad_norm": 0.061480812728405, "learning_rate": 0.01, "loss": 2.2898, "step": 3270 }, { "epoch": 0.33627863967944105, "grad_norm": 0.04230419546365738, "learning_rate": 0.01, "loss": 2.3062, "step": 3273 }, { "epoch": 0.3365868694133361, "grad_norm": 0.12344948202371597, "learning_rate": 0.01, "loss": 2.3105, "step": 3276 }, { "epoch": 0.3368950991472311, "grad_norm": 0.13087160885334015, "learning_rate": 0.01, "loss": 2.3388, "step": 3279 }, { "epoch": 0.3372033288811261, "grad_norm": 0.06671308726072311, "learning_rate": 0.01, "loss": 2.3062, "step": 3282 }, { "epoch": 0.3375115586150211, "grad_norm": 0.055828843265771866, "learning_rate": 0.01, "loss": 2.3227, "step": 3285 }, { "epoch": 0.3378197883489161, "grad_norm": 0.07760481536388397, "learning_rate": 0.01, "loss": 2.307, "step": 3288 }, { "epoch": 0.33812801808281107, "grad_norm": 0.08074722439050674, "learning_rate": 0.01, "loss": 2.3363, "step": 3291 }, { "epoch": 0.33843624781670606, "grad_norm": 0.046514566987752914, "learning_rate": 0.01, "loss": 2.3152, "step": 3294 }, { "epoch": 0.33874447755060105, "grad_norm": 0.15358585119247437, "learning_rate": 0.01, "loss": 2.3114, "step": 3297 }, { "epoch": 0.33905270728449605, "grad_norm": 0.09048300981521606, "learning_rate": 0.01, "loss": 2.3218, "step": 3300 }, { "epoch": 0.33936093701839104, "grad_norm": 0.08199465274810791, "learning_rate": 0.01, "loss": 2.3133, "step": 3303 }, { "epoch": 0.33966916675228603, "grad_norm": 0.13738159835338593, "learning_rate": 0.01, "loss": 2.3108, "step": 3306 }, { "epoch": 0.339977396486181, "grad_norm": 0.11493804305791855, "learning_rate": 0.01, "loss": 2.2996, "step": 3309 }, { "epoch": 0.340285626220076, "grad_norm": 0.06872740387916565, "learning_rate": 0.01, "loss": 2.306, "step": 3312 }, { "epoch": 0.340593855953971, "grad_norm": 0.055139992386102676, "learning_rate": 0.01, "loss": 2.3129, "step": 3315 }, { "epoch": 0.340902085687866, "grad_norm": 0.16477546095848083, "learning_rate": 0.01, "loss": 2.3138, "step": 3318 }, { "epoch": 0.341210315421761, "grad_norm": 0.06387230008840561, "learning_rate": 0.01, "loss": 2.3025, "step": 3321 }, { "epoch": 0.341518545155656, "grad_norm": 0.1657593995332718, "learning_rate": 0.01, "loss": 2.3255, "step": 3324 }, { "epoch": 0.341826774889551, "grad_norm": 0.08980764448642731, "learning_rate": 0.01, "loss": 2.3024, "step": 3327 }, { "epoch": 0.342135004623446, "grad_norm": 0.05479981005191803, "learning_rate": 0.01, "loss": 2.2955, "step": 3330 }, { "epoch": 0.34244323435734103, "grad_norm": 0.05986113101243973, "learning_rate": 0.01, "loss": 2.3078, "step": 3333 }, { "epoch": 0.342751464091236, "grad_norm": 0.1339874267578125, "learning_rate": 0.01, "loss": 2.2974, "step": 3336 }, { "epoch": 0.343059693825131, "grad_norm": 0.11250229179859161, "learning_rate": 0.01, "loss": 2.3162, "step": 3339 }, { "epoch": 0.343367923559026, "grad_norm": 0.12179972976446152, "learning_rate": 0.01, "loss": 2.2746, "step": 3342 }, { "epoch": 0.343676153292921, "grad_norm": 0.10306143760681152, "learning_rate": 0.01, "loss": 2.3008, "step": 3345 }, { "epoch": 0.343984383026816, "grad_norm": 0.08372616767883301, "learning_rate": 0.01, "loss": 2.2962, "step": 3348 }, { "epoch": 0.344292612760711, "grad_norm": 0.05286876857280731, "learning_rate": 0.01, "loss": 2.3067, "step": 3351 }, { "epoch": 0.344600842494606, "grad_norm": 0.06248036026954651, "learning_rate": 0.01, "loss": 2.3432, "step": 3354 }, { "epoch": 0.344909072228501, "grad_norm": 0.1287723332643509, "learning_rate": 0.01, "loss": 2.3064, "step": 3357 }, { "epoch": 0.345217301962396, "grad_norm": 0.08843682706356049, "learning_rate": 0.01, "loss": 2.3059, "step": 3360 }, { "epoch": 0.34552553169629097, "grad_norm": 0.07060680538415909, "learning_rate": 0.01, "loss": 2.2627, "step": 3363 }, { "epoch": 0.34583376143018596, "grad_norm": 0.10443838685750961, "learning_rate": 0.01, "loss": 2.2876, "step": 3366 }, { "epoch": 0.34614199116408095, "grad_norm": 0.06748315691947937, "learning_rate": 0.01, "loss": 2.3182, "step": 3369 }, { "epoch": 0.34645022089797595, "grad_norm": 0.06599223613739014, "learning_rate": 0.01, "loss": 2.2997, "step": 3372 }, { "epoch": 0.34675845063187094, "grad_norm": 0.08530016988515854, "learning_rate": 0.01, "loss": 2.2959, "step": 3375 }, { "epoch": 0.34706668036576593, "grad_norm": 0.10694181174039841, "learning_rate": 0.01, "loss": 2.3248, "step": 3378 }, { "epoch": 0.3473749100996609, "grad_norm": 0.06598237156867981, "learning_rate": 0.01, "loss": 2.2837, "step": 3381 }, { "epoch": 0.3476831398335559, "grad_norm": 0.0782204419374466, "learning_rate": 0.01, "loss": 2.2926, "step": 3384 }, { "epoch": 0.3479913695674509, "grad_norm": 0.09585436433553696, "learning_rate": 0.01, "loss": 2.2984, "step": 3387 }, { "epoch": 0.34829959930134596, "grad_norm": 0.061477720737457275, "learning_rate": 0.01, "loss": 2.2693, "step": 3390 }, { "epoch": 0.34860782903524096, "grad_norm": 0.104725681245327, "learning_rate": 0.01, "loss": 2.2887, "step": 3393 }, { "epoch": 0.34891605876913595, "grad_norm": 0.12205322831869125, "learning_rate": 0.01, "loss": 2.3052, "step": 3396 }, { "epoch": 0.34922428850303094, "grad_norm": 0.16279913485050201, "learning_rate": 0.01, "loss": 2.2771, "step": 3399 }, { "epoch": 0.34953251823692594, "grad_norm": 0.059565551578998566, "learning_rate": 0.01, "loss": 2.3027, "step": 3402 }, { "epoch": 0.34984074797082093, "grad_norm": 0.06318376958370209, "learning_rate": 0.01, "loss": 2.3131, "step": 3405 }, { "epoch": 0.3501489777047159, "grad_norm": 0.05476443096995354, "learning_rate": 0.01, "loss": 2.2953, "step": 3408 }, { "epoch": 0.3504572074386109, "grad_norm": 0.07989142090082169, "learning_rate": 0.01, "loss": 2.31, "step": 3411 }, { "epoch": 0.3507654371725059, "grad_norm": 0.15566086769104004, "learning_rate": 0.01, "loss": 2.2839, "step": 3414 }, { "epoch": 0.3510736669064009, "grad_norm": 0.060441337525844574, "learning_rate": 0.01, "loss": 2.2952, "step": 3417 }, { "epoch": 0.3513818966402959, "grad_norm": 0.06277213245630264, "learning_rate": 0.01, "loss": 2.32, "step": 3420 }, { "epoch": 0.3516901263741909, "grad_norm": 0.04959907755255699, "learning_rate": 0.01, "loss": 2.3116, "step": 3423 }, { "epoch": 0.3519983561080859, "grad_norm": 0.06766139715909958, "learning_rate": 0.01, "loss": 2.3201, "step": 3426 }, { "epoch": 0.3523065858419809, "grad_norm": 0.053323931992053986, "learning_rate": 0.01, "loss": 2.3271, "step": 3429 }, { "epoch": 0.3526148155758759, "grad_norm": 0.06396596878767014, "learning_rate": 0.01, "loss": 2.2929, "step": 3432 }, { "epoch": 0.35292304530977087, "grad_norm": 0.07360636442899704, "learning_rate": 0.01, "loss": 2.2918, "step": 3435 }, { "epoch": 0.35323127504366586, "grad_norm": 0.10262563079595566, "learning_rate": 0.01, "loss": 2.2871, "step": 3438 }, { "epoch": 0.35353950477756085, "grad_norm": 0.09783780574798584, "learning_rate": 0.01, "loss": 2.3229, "step": 3441 }, { "epoch": 0.35384773451145585, "grad_norm": 0.08542583137750626, "learning_rate": 0.01, "loss": 2.2887, "step": 3444 }, { "epoch": 0.35415596424535084, "grad_norm": 0.11864805966615677, "learning_rate": 0.01, "loss": 2.2848, "step": 3447 }, { "epoch": 0.3544641939792459, "grad_norm": 0.10997387021780014, "learning_rate": 0.01, "loss": 2.2897, "step": 3450 }, { "epoch": 0.3547724237131409, "grad_norm": 0.10915081202983856, "learning_rate": 0.01, "loss": 2.3114, "step": 3453 }, { "epoch": 0.3550806534470359, "grad_norm": 0.15109725296497345, "learning_rate": 0.01, "loss": 2.2933, "step": 3456 }, { "epoch": 0.35538888318093087, "grad_norm": 0.04911811649799347, "learning_rate": 0.01, "loss": 2.3035, "step": 3459 }, { "epoch": 0.35569711291482586, "grad_norm": 0.12352598458528519, "learning_rate": 0.01, "loss": 2.2897, "step": 3462 }, { "epoch": 0.35600534264872086, "grad_norm": 0.10834213346242905, "learning_rate": 0.01, "loss": 2.2879, "step": 3465 }, { "epoch": 0.35631357238261585, "grad_norm": 0.10665787756443024, "learning_rate": 0.01, "loss": 2.2614, "step": 3468 }, { "epoch": 0.35662180211651084, "grad_norm": 0.0898185670375824, "learning_rate": 0.01, "loss": 2.2943, "step": 3471 }, { "epoch": 0.35693003185040584, "grad_norm": 0.07015782594680786, "learning_rate": 0.01, "loss": 2.298, "step": 3474 }, { "epoch": 0.35723826158430083, "grad_norm": 0.1292288452386856, "learning_rate": 0.01, "loss": 2.3122, "step": 3477 }, { "epoch": 0.3575464913181958, "grad_norm": 0.09300121665000916, "learning_rate": 0.01, "loss": 2.2769, "step": 3480 }, { "epoch": 0.3578547210520908, "grad_norm": 0.0449809767305851, "learning_rate": 0.01, "loss": 2.2564, "step": 3483 }, { "epoch": 0.3581629507859858, "grad_norm": 0.051362160593271255, "learning_rate": 0.01, "loss": 2.2739, "step": 3486 }, { "epoch": 0.3584711805198808, "grad_norm": 0.12473469972610474, "learning_rate": 0.01, "loss": 2.2844, "step": 3489 }, { "epoch": 0.3587794102537758, "grad_norm": 0.0925057902932167, "learning_rate": 0.01, "loss": 2.2618, "step": 3492 }, { "epoch": 0.3590876399876708, "grad_norm": 0.1026608943939209, "learning_rate": 0.01, "loss": 2.2814, "step": 3495 }, { "epoch": 0.3593958697215658, "grad_norm": 0.0995681881904602, "learning_rate": 0.01, "loss": 2.2861, "step": 3498 }, { "epoch": 0.3597040994554608, "grad_norm": 0.06513385474681854, "learning_rate": 0.01, "loss": 2.2827, "step": 3501 }, { "epoch": 0.3600123291893558, "grad_norm": 0.06724824756383896, "learning_rate": 0.01, "loss": 2.2799, "step": 3504 }, { "epoch": 0.3603205589232508, "grad_norm": 0.06367610394954681, "learning_rate": 0.01, "loss": 2.2846, "step": 3507 }, { "epoch": 0.3606287886571458, "grad_norm": 0.07489916682243347, "learning_rate": 0.01, "loss": 2.2816, "step": 3510 }, { "epoch": 0.3609370183910408, "grad_norm": 0.11221667379140854, "learning_rate": 0.01, "loss": 2.2869, "step": 3513 }, { "epoch": 0.3612452481249358, "grad_norm": 0.09854032099246979, "learning_rate": 0.01, "loss": 2.2646, "step": 3516 }, { "epoch": 0.3615534778588308, "grad_norm": 0.09218656271696091, "learning_rate": 0.01, "loss": 2.2844, "step": 3519 }, { "epoch": 0.3618617075927258, "grad_norm": 0.1531379073858261, "learning_rate": 0.01, "loss": 2.279, "step": 3522 }, { "epoch": 0.3621699373266208, "grad_norm": 0.07070820778608322, "learning_rate": 0.01, "loss": 2.2747, "step": 3525 }, { "epoch": 0.3624781670605158, "grad_norm": 0.1057102233171463, "learning_rate": 0.01, "loss": 2.275, "step": 3528 }, { "epoch": 0.36278639679441077, "grad_norm": 0.049471016973257065, "learning_rate": 0.01, "loss": 2.3013, "step": 3531 }, { "epoch": 0.36309462652830576, "grad_norm": 0.08196526020765305, "learning_rate": 0.01, "loss": 2.2571, "step": 3534 }, { "epoch": 0.36340285626220076, "grad_norm": 0.09507983922958374, "learning_rate": 0.01, "loss": 2.3196, "step": 3537 }, { "epoch": 0.36371108599609575, "grad_norm": 0.089228555560112, "learning_rate": 0.01, "loss": 2.2539, "step": 3540 }, { "epoch": 0.36401931572999074, "grad_norm": 0.0866270586848259, "learning_rate": 0.01, "loss": 2.284, "step": 3543 }, { "epoch": 0.36432754546388574, "grad_norm": 0.13805072009563446, "learning_rate": 0.01, "loss": 2.2723, "step": 3546 }, { "epoch": 0.36463577519778073, "grad_norm": 0.09308724105358124, "learning_rate": 0.01, "loss": 2.2969, "step": 3549 }, { "epoch": 0.3649440049316757, "grad_norm": 0.07004178315401077, "learning_rate": 0.01, "loss": 2.2959, "step": 3552 }, { "epoch": 0.3652522346655707, "grad_norm": 0.09345975518226624, "learning_rate": 0.01, "loss": 2.2656, "step": 3555 }, { "epoch": 0.3655604643994657, "grad_norm": 0.07694482058286667, "learning_rate": 0.01, "loss": 2.2921, "step": 3558 }, { "epoch": 0.3658686941333607, "grad_norm": 0.05591150000691414, "learning_rate": 0.01, "loss": 2.2869, "step": 3561 }, { "epoch": 0.36617692386725575, "grad_norm": 0.06863993406295776, "learning_rate": 0.01, "loss": 2.2897, "step": 3564 }, { "epoch": 0.36648515360115075, "grad_norm": 0.06258527934551239, "learning_rate": 0.01, "loss": 2.2994, "step": 3567 }, { "epoch": 0.36679338333504574, "grad_norm": 0.1049329936504364, "learning_rate": 0.01, "loss": 2.2794, "step": 3570 }, { "epoch": 0.36710161306894074, "grad_norm": 0.1229025200009346, "learning_rate": 0.01, "loss": 2.2949, "step": 3573 }, { "epoch": 0.36740984280283573, "grad_norm": 0.13274389505386353, "learning_rate": 0.01, "loss": 2.2791, "step": 3576 }, { "epoch": 0.3677180725367307, "grad_norm": 0.09388844668865204, "learning_rate": 0.01, "loss": 2.3067, "step": 3579 }, { "epoch": 0.3680263022706257, "grad_norm": 0.05375714227557182, "learning_rate": 0.01, "loss": 2.2946, "step": 3582 }, { "epoch": 0.3683345320045207, "grad_norm": 0.059105634689331055, "learning_rate": 0.01, "loss": 2.2821, "step": 3585 }, { "epoch": 0.3686427617384157, "grad_norm": 0.055578552186489105, "learning_rate": 0.01, "loss": 2.2694, "step": 3588 }, { "epoch": 0.3689509914723107, "grad_norm": 0.08778764307498932, "learning_rate": 0.01, "loss": 2.2712, "step": 3591 }, { "epoch": 0.3692592212062057, "grad_norm": 0.1044803187251091, "learning_rate": 0.01, "loss": 2.2797, "step": 3594 }, { "epoch": 0.3695674509401007, "grad_norm": 0.15398399531841278, "learning_rate": 0.01, "loss": 2.3042, "step": 3597 }, { "epoch": 0.3698756806739957, "grad_norm": 0.11562564969062805, "learning_rate": 0.01, "loss": 2.2609, "step": 3600 }, { "epoch": 0.37018391040789067, "grad_norm": 0.060630831867456436, "learning_rate": 0.01, "loss": 2.2663, "step": 3603 }, { "epoch": 0.37049214014178566, "grad_norm": 0.0576477013528347, "learning_rate": 0.01, "loss": 2.2974, "step": 3606 }, { "epoch": 0.37080036987568066, "grad_norm": 0.059915438294410706, "learning_rate": 0.01, "loss": 2.3031, "step": 3609 }, { "epoch": 0.37110859960957565, "grad_norm": 0.10807155817747116, "learning_rate": 0.01, "loss": 2.2739, "step": 3612 }, { "epoch": 0.37141682934347064, "grad_norm": 0.09196165949106216, "learning_rate": 0.01, "loss": 2.3054, "step": 3615 }, { "epoch": 0.37172505907736564, "grad_norm": 0.07379795610904694, "learning_rate": 0.01, "loss": 2.2805, "step": 3618 }, { "epoch": 0.3720332888112607, "grad_norm": 0.06034912168979645, "learning_rate": 0.01, "loss": 2.2549, "step": 3621 }, { "epoch": 0.3723415185451557, "grad_norm": 0.13983361423015594, "learning_rate": 0.01, "loss": 2.269, "step": 3624 }, { "epoch": 0.3726497482790507, "grad_norm": 0.11592069268226624, "learning_rate": 0.01, "loss": 2.2903, "step": 3627 }, { "epoch": 0.37295797801294567, "grad_norm": 0.15428505837917328, "learning_rate": 0.01, "loss": 2.2918, "step": 3630 }, { "epoch": 0.37326620774684066, "grad_norm": 0.19936774671077728, "learning_rate": 0.01, "loss": 2.2782, "step": 3633 }, { "epoch": 0.37357443748073565, "grad_norm": 0.15364627540111542, "learning_rate": 0.01, "loss": 2.2736, "step": 3636 }, { "epoch": 0.37388266721463065, "grad_norm": 0.047554273158311844, "learning_rate": 0.01, "loss": 2.3172, "step": 3639 }, { "epoch": 0.37419089694852564, "grad_norm": 0.0555570051074028, "learning_rate": 0.01, "loss": 2.2731, "step": 3642 }, { "epoch": 0.37449912668242064, "grad_norm": 0.052204012870788574, "learning_rate": 0.01, "loss": 2.281, "step": 3645 }, { "epoch": 0.37480735641631563, "grad_norm": 0.09206510335206985, "learning_rate": 0.01, "loss": 2.2639, "step": 3648 }, { "epoch": 0.3751155861502106, "grad_norm": 0.1199311912059784, "learning_rate": 0.01, "loss": 2.2873, "step": 3651 }, { "epoch": 0.3754238158841056, "grad_norm": 0.08949270099401474, "learning_rate": 0.01, "loss": 2.2668, "step": 3654 }, { "epoch": 0.3757320456180006, "grad_norm": 0.08521883934736252, "learning_rate": 0.01, "loss": 2.247, "step": 3657 }, { "epoch": 0.3760402753518956, "grad_norm": 0.07689694315195084, "learning_rate": 0.01, "loss": 2.2904, "step": 3660 }, { "epoch": 0.3763485050857906, "grad_norm": 0.08761987835168839, "learning_rate": 0.01, "loss": 2.2761, "step": 3663 }, { "epoch": 0.3766567348196856, "grad_norm": 0.056420013308525085, "learning_rate": 0.01, "loss": 2.259, "step": 3666 }, { "epoch": 0.3769649645535806, "grad_norm": 0.06192856654524803, "learning_rate": 0.01, "loss": 2.2294, "step": 3669 }, { "epoch": 0.3772731942874756, "grad_norm": 0.1021333709359169, "learning_rate": 0.01, "loss": 2.2649, "step": 3672 }, { "epoch": 0.37758142402137057, "grad_norm": 0.10071670264005661, "learning_rate": 0.01, "loss": 2.2584, "step": 3675 }, { "epoch": 0.37788965375526556, "grad_norm": 0.05968625843524933, "learning_rate": 0.01, "loss": 2.2699, "step": 3678 }, { "epoch": 0.3781978834891606, "grad_norm": 0.07489661872386932, "learning_rate": 0.01, "loss": 2.2663, "step": 3681 }, { "epoch": 0.3785061132230556, "grad_norm": 0.07880943268537521, "learning_rate": 0.01, "loss": 2.2709, "step": 3684 }, { "epoch": 0.3788143429569506, "grad_norm": 0.055632054805755615, "learning_rate": 0.01, "loss": 2.272, "step": 3687 }, { "epoch": 0.3791225726908456, "grad_norm": 0.05365302786231041, "learning_rate": 0.01, "loss": 2.2268, "step": 3690 }, { "epoch": 0.3794308024247406, "grad_norm": 0.0802481397986412, "learning_rate": 0.01, "loss": 2.2631, "step": 3693 }, { "epoch": 0.3797390321586356, "grad_norm": 0.1312764585018158, "learning_rate": 0.01, "loss": 2.2985, "step": 3696 }, { "epoch": 0.3800472618925306, "grad_norm": 0.14543971419334412, "learning_rate": 0.01, "loss": 2.25, "step": 3699 }, { "epoch": 0.38035549162642557, "grad_norm": 0.05727002024650574, "learning_rate": 0.01, "loss": 2.2556, "step": 3702 }, { "epoch": 0.38066372136032056, "grad_norm": 0.07309607416391373, "learning_rate": 0.01, "loss": 2.2574, "step": 3705 }, { "epoch": 0.38097195109421556, "grad_norm": 0.03849095106124878, "learning_rate": 0.01, "loss": 2.2501, "step": 3708 }, { "epoch": 0.38128018082811055, "grad_norm": 0.0623021237552166, "learning_rate": 0.01, "loss": 2.2672, "step": 3711 }, { "epoch": 0.38158841056200554, "grad_norm": 0.08916610479354858, "learning_rate": 0.01, "loss": 2.2683, "step": 3714 }, { "epoch": 0.38189664029590054, "grad_norm": 0.08126388490200043, "learning_rate": 0.01, "loss": 2.2574, "step": 3717 }, { "epoch": 0.38220487002979553, "grad_norm": 0.07121114432811737, "learning_rate": 0.01, "loss": 2.2358, "step": 3720 }, { "epoch": 0.3825130997636905, "grad_norm": 0.07406505942344666, "learning_rate": 0.01, "loss": 2.2736, "step": 3723 }, { "epoch": 0.3828213294975855, "grad_norm": 0.13355331122875214, "learning_rate": 0.01, "loss": 2.2685, "step": 3726 }, { "epoch": 0.3831295592314805, "grad_norm": 0.05672430619597435, "learning_rate": 0.01, "loss": 2.2913, "step": 3729 }, { "epoch": 0.3834377889653755, "grad_norm": 0.047647468745708466, "learning_rate": 0.01, "loss": 2.2533, "step": 3732 }, { "epoch": 0.3837460186992705, "grad_norm": 0.059008341282606125, "learning_rate": 0.01, "loss": 2.2867, "step": 3735 }, { "epoch": 0.38405424843316555, "grad_norm": 0.06551840156316757, "learning_rate": 0.01, "loss": 2.2742, "step": 3738 }, { "epoch": 0.38436247816706054, "grad_norm": 0.08781883865594864, "learning_rate": 0.01, "loss": 2.2427, "step": 3741 }, { "epoch": 0.38467070790095553, "grad_norm": 0.06808102875947952, "learning_rate": 0.01, "loss": 2.2493, "step": 3744 }, { "epoch": 0.3849789376348505, "grad_norm": 0.06570697575807571, "learning_rate": 0.01, "loss": 2.2445, "step": 3747 }, { "epoch": 0.3852871673687455, "grad_norm": 0.08742080628871918, "learning_rate": 0.01, "loss": 2.2576, "step": 3750 }, { "epoch": 0.3855953971026405, "grad_norm": 0.1518019735813141, "learning_rate": 0.01, "loss": 2.2819, "step": 3753 }, { "epoch": 0.3859036268365355, "grad_norm": 0.10349754244089127, "learning_rate": 0.01, "loss": 2.2465, "step": 3756 }, { "epoch": 0.3862118565704305, "grad_norm": 0.06008581072092056, "learning_rate": 0.01, "loss": 2.2817, "step": 3759 }, { "epoch": 0.3865200863043255, "grad_norm": 0.0450257770717144, "learning_rate": 0.01, "loss": 2.2585, "step": 3762 }, { "epoch": 0.3868283160382205, "grad_norm": 0.04145176708698273, "learning_rate": 0.01, "loss": 2.2634, "step": 3765 }, { "epoch": 0.3871365457721155, "grad_norm": 0.17084141075611115, "learning_rate": 0.01, "loss": 2.2355, "step": 3768 }, { "epoch": 0.3874447755060105, "grad_norm": 0.06679602712392807, "learning_rate": 0.01, "loss": 2.2737, "step": 3771 }, { "epoch": 0.38775300523990547, "grad_norm": 0.05363382026553154, "learning_rate": 0.01, "loss": 2.244, "step": 3774 }, { "epoch": 0.38806123497380046, "grad_norm": 0.05722133815288544, "learning_rate": 0.01, "loss": 2.2515, "step": 3777 }, { "epoch": 0.38836946470769546, "grad_norm": 0.06288215517997742, "learning_rate": 0.01, "loss": 2.2625, "step": 3780 }, { "epoch": 0.38867769444159045, "grad_norm": 0.05087801814079285, "learning_rate": 0.01, "loss": 2.2883, "step": 3783 }, { "epoch": 0.38898592417548544, "grad_norm": 0.08160998672246933, "learning_rate": 0.01, "loss": 2.2462, "step": 3786 }, { "epoch": 0.38929415390938044, "grad_norm": 0.22291240096092224, "learning_rate": 0.01, "loss": 2.2613, "step": 3789 }, { "epoch": 0.38960238364327543, "grad_norm": 0.11482773721218109, "learning_rate": 0.01, "loss": 2.2633, "step": 3792 }, { "epoch": 0.3899106133771705, "grad_norm": 0.056299589574337006, "learning_rate": 0.01, "loss": 2.2896, "step": 3795 }, { "epoch": 0.39021884311106547, "grad_norm": 0.04524017125368118, "learning_rate": 0.01, "loss": 2.2543, "step": 3798 }, { "epoch": 0.39052707284496047, "grad_norm": 0.0903107225894928, "learning_rate": 0.01, "loss": 2.2801, "step": 3801 }, { "epoch": 0.39083530257885546, "grad_norm": 0.0645504966378212, "learning_rate": 0.01, "loss": 2.2628, "step": 3804 }, { "epoch": 0.39114353231275045, "grad_norm": 0.06752094626426697, "learning_rate": 0.01, "loss": 2.2732, "step": 3807 }, { "epoch": 0.39145176204664545, "grad_norm": 0.04459339380264282, "learning_rate": 0.01, "loss": 2.2601, "step": 3810 }, { "epoch": 0.39175999178054044, "grad_norm": 0.07300913333892822, "learning_rate": 0.01, "loss": 2.2437, "step": 3813 }, { "epoch": 0.39206822151443543, "grad_norm": 0.16804097592830658, "learning_rate": 0.01, "loss": 2.26, "step": 3816 }, { "epoch": 0.3923764512483304, "grad_norm": 0.10682248324155807, "learning_rate": 0.01, "loss": 2.2764, "step": 3819 }, { "epoch": 0.3926846809822254, "grad_norm": 0.046895258128643036, "learning_rate": 0.01, "loss": 2.2654, "step": 3822 }, { "epoch": 0.3929929107161204, "grad_norm": 0.05799179524183273, "learning_rate": 0.01, "loss": 2.2254, "step": 3825 }, { "epoch": 0.3933011404500154, "grad_norm": 0.0474528968334198, "learning_rate": 0.01, "loss": 2.2604, "step": 3828 }, { "epoch": 0.3936093701839104, "grad_norm": 0.1437537968158722, "learning_rate": 0.01, "loss": 2.2532, "step": 3831 }, { "epoch": 0.3939175999178054, "grad_norm": 0.06202014535665512, "learning_rate": 0.01, "loss": 2.2486, "step": 3834 }, { "epoch": 0.3942258296517004, "grad_norm": 0.09379147738218307, "learning_rate": 0.01, "loss": 2.2602, "step": 3837 }, { "epoch": 0.3945340593855954, "grad_norm": 0.07898830622434616, "learning_rate": 0.01, "loss": 2.2605, "step": 3840 }, { "epoch": 0.3948422891194904, "grad_norm": 0.10186600685119629, "learning_rate": 0.01, "loss": 2.2807, "step": 3843 }, { "epoch": 0.39515051885338537, "grad_norm": 0.08611535280942917, "learning_rate": 0.01, "loss": 2.2571, "step": 3846 }, { "epoch": 0.39545874858728036, "grad_norm": 0.10435480624437332, "learning_rate": 0.01, "loss": 2.2721, "step": 3849 }, { "epoch": 0.39576697832117536, "grad_norm": 0.11543019860982895, "learning_rate": 0.01, "loss": 2.2598, "step": 3852 }, { "epoch": 0.3960752080550704, "grad_norm": 0.11996404081583023, "learning_rate": 0.01, "loss": 2.2536, "step": 3855 }, { "epoch": 0.3963834377889654, "grad_norm": 0.05615765228867531, "learning_rate": 0.01, "loss": 2.2637, "step": 3858 }, { "epoch": 0.3966916675228604, "grad_norm": 0.06568838655948639, "learning_rate": 0.01, "loss": 2.2756, "step": 3861 }, { "epoch": 0.3969998972567554, "grad_norm": 0.07747132331132889, "learning_rate": 0.01, "loss": 2.2816, "step": 3864 }, { "epoch": 0.3973081269906504, "grad_norm": 0.057373497635126114, "learning_rate": 0.01, "loss": 2.255, "step": 3867 }, { "epoch": 0.3976163567245454, "grad_norm": 0.11501277983188629, "learning_rate": 0.01, "loss": 2.2494, "step": 3870 }, { "epoch": 0.39792458645844037, "grad_norm": 0.07761958241462708, "learning_rate": 0.01, "loss": 2.2459, "step": 3873 }, { "epoch": 0.39823281619233536, "grad_norm": 0.06263428926467896, "learning_rate": 0.01, "loss": 2.2649, "step": 3876 }, { "epoch": 0.39854104592623035, "grad_norm": 0.04552373290061951, "learning_rate": 0.01, "loss": 2.2578, "step": 3879 }, { "epoch": 0.39884927566012535, "grad_norm": 0.0631655901670456, "learning_rate": 0.01, "loss": 2.2648, "step": 3882 }, { "epoch": 0.39915750539402034, "grad_norm": 0.06519417464733124, "learning_rate": 0.01, "loss": 2.2438, "step": 3885 }, { "epoch": 0.39946573512791533, "grad_norm": 0.10446424037218094, "learning_rate": 0.01, "loss": 2.2815, "step": 3888 }, { "epoch": 0.3997739648618103, "grad_norm": 0.07533372938632965, "learning_rate": 0.01, "loss": 2.272, "step": 3891 }, { "epoch": 0.4000821945957053, "grad_norm": 0.05748215690255165, "learning_rate": 0.01, "loss": 2.2971, "step": 3894 }, { "epoch": 0.4003904243296003, "grad_norm": 0.051343973726034164, "learning_rate": 0.01, "loss": 2.2316, "step": 3897 }, { "epoch": 0.4006986540634953, "grad_norm": 0.04799075797200203, "learning_rate": 0.01, "loss": 2.2333, "step": 3900 }, { "epoch": 0.4010068837973903, "grad_norm": 0.12885436415672302, "learning_rate": 0.01, "loss": 2.247, "step": 3903 }, { "epoch": 0.4013151135312853, "grad_norm": 0.07175249606370926, "learning_rate": 0.01, "loss": 2.2407, "step": 3906 }, { "epoch": 0.4016233432651803, "grad_norm": 0.10784266144037247, "learning_rate": 0.01, "loss": 2.2458, "step": 3909 }, { "epoch": 0.40193157299907534, "grad_norm": 0.08646712452173233, "learning_rate": 0.01, "loss": 2.2571, "step": 3912 }, { "epoch": 0.40223980273297033, "grad_norm": 0.05365338176488876, "learning_rate": 0.01, "loss": 2.2585, "step": 3915 }, { "epoch": 0.4025480324668653, "grad_norm": 0.07037780433893204, "learning_rate": 0.01, "loss": 2.2277, "step": 3918 }, { "epoch": 0.4028562622007603, "grad_norm": 0.040290024131536484, "learning_rate": 0.01, "loss": 2.2508, "step": 3921 }, { "epoch": 0.4031644919346553, "grad_norm": 0.050338853150606155, "learning_rate": 0.01, "loss": 2.2356, "step": 3924 }, { "epoch": 0.4034727216685503, "grad_norm": 0.1420246660709381, "learning_rate": 0.01, "loss": 2.2531, "step": 3927 }, { "epoch": 0.4037809514024453, "grad_norm": 0.07432923465967178, "learning_rate": 0.01, "loss": 2.2766, "step": 3930 }, { "epoch": 0.4040891811363403, "grad_norm": 0.04954257979989052, "learning_rate": 0.01, "loss": 2.2825, "step": 3933 }, { "epoch": 0.4043974108702353, "grad_norm": 0.05988876149058342, "learning_rate": 0.01, "loss": 2.2342, "step": 3936 }, { "epoch": 0.4047056406041303, "grad_norm": 0.09800540655851364, "learning_rate": 0.01, "loss": 2.2268, "step": 3939 }, { "epoch": 0.4050138703380253, "grad_norm": 0.09171874821186066, "learning_rate": 0.01, "loss": 2.2648, "step": 3942 }, { "epoch": 0.40532210007192027, "grad_norm": 0.07430606335401535, "learning_rate": 0.01, "loss": 2.2523, "step": 3945 }, { "epoch": 0.40563032980581526, "grad_norm": 0.043649185448884964, "learning_rate": 0.01, "loss": 2.2303, "step": 3948 }, { "epoch": 0.40593855953971025, "grad_norm": 0.04120480641722679, "learning_rate": 0.01, "loss": 2.2299, "step": 3951 }, { "epoch": 0.40624678927360525, "grad_norm": 0.0692945346236229, "learning_rate": 0.01, "loss": 2.2466, "step": 3954 }, { "epoch": 0.40655501900750024, "grad_norm": 0.08884318917989731, "learning_rate": 0.01, "loss": 2.2802, "step": 3957 }, { "epoch": 0.40686324874139523, "grad_norm": 0.05542384088039398, "learning_rate": 0.01, "loss": 2.2303, "step": 3960 }, { "epoch": 0.4071714784752902, "grad_norm": 0.08013599365949631, "learning_rate": 0.01, "loss": 2.2361, "step": 3963 }, { "epoch": 0.4074797082091852, "grad_norm": 0.15963242948055267, "learning_rate": 0.01, "loss": 2.2608, "step": 3966 }, { "epoch": 0.40778793794308027, "grad_norm": 0.05428241938352585, "learning_rate": 0.01, "loss": 2.2415, "step": 3969 }, { "epoch": 0.40809616767697526, "grad_norm": 0.09297880530357361, "learning_rate": 0.01, "loss": 2.2804, "step": 3972 }, { "epoch": 0.40840439741087026, "grad_norm": 0.11259882897138596, "learning_rate": 0.01, "loss": 2.2562, "step": 3975 }, { "epoch": 0.40871262714476525, "grad_norm": 0.0546397790312767, "learning_rate": 0.01, "loss": 2.2423, "step": 3978 }, { "epoch": 0.40902085687866024, "grad_norm": 0.13870957493782043, "learning_rate": 0.01, "loss": 2.2431, "step": 3981 }, { "epoch": 0.40932908661255524, "grad_norm": 0.05527504161000252, "learning_rate": 0.01, "loss": 2.2649, "step": 3984 }, { "epoch": 0.40963731634645023, "grad_norm": 0.08060980588197708, "learning_rate": 0.01, "loss": 2.2708, "step": 3987 }, { "epoch": 0.4099455460803452, "grad_norm": 0.05611690506339073, "learning_rate": 0.01, "loss": 2.2683, "step": 3990 }, { "epoch": 0.4102537758142402, "grad_norm": 0.08760816603899002, "learning_rate": 0.01, "loss": 2.2392, "step": 3993 }, { "epoch": 0.4105620055481352, "grad_norm": 0.07327746599912643, "learning_rate": 0.01, "loss": 2.2587, "step": 3996 }, { "epoch": 0.4108702352820302, "grad_norm": 0.05924748629331589, "learning_rate": 0.01, "loss": 2.2435, "step": 3999 }, { "epoch": 0.4111784650159252, "grad_norm": 0.08269370347261429, "learning_rate": 0.01, "loss": 2.2365, "step": 4002 }, { "epoch": 0.4114866947498202, "grad_norm": 0.06834371387958527, "learning_rate": 0.01, "loss": 2.2579, "step": 4005 }, { "epoch": 0.4117949244837152, "grad_norm": 0.06737885624170303, "learning_rate": 0.01, "loss": 2.2585, "step": 4008 }, { "epoch": 0.4121031542176102, "grad_norm": 0.0919148176908493, "learning_rate": 0.01, "loss": 2.2524, "step": 4011 }, { "epoch": 0.4124113839515052, "grad_norm": 0.0744348093867302, "learning_rate": 0.01, "loss": 2.2328, "step": 4014 }, { "epoch": 0.41271961368540017, "grad_norm": 0.08952994644641876, "learning_rate": 0.01, "loss": 2.2556, "step": 4017 }, { "epoch": 0.41302784341929516, "grad_norm": 0.054230738431215286, "learning_rate": 0.01, "loss": 2.2559, "step": 4020 }, { "epoch": 0.41333607315319015, "grad_norm": 0.11185753345489502, "learning_rate": 0.01, "loss": 2.2599, "step": 4023 }, { "epoch": 0.41364430288708515, "grad_norm": 0.11211541295051575, "learning_rate": 0.01, "loss": 2.2456, "step": 4026 }, { "epoch": 0.4139525326209802, "grad_norm": 0.08211257308721542, "learning_rate": 0.01, "loss": 2.2636, "step": 4029 }, { "epoch": 0.4142607623548752, "grad_norm": 0.07233046740293503, "learning_rate": 0.01, "loss": 2.2148, "step": 4032 }, { "epoch": 0.4145689920887702, "grad_norm": 0.1062379851937294, "learning_rate": 0.01, "loss": 2.2382, "step": 4035 }, { "epoch": 0.4148772218226652, "grad_norm": 0.07079877704381943, "learning_rate": 0.01, "loss": 2.2462, "step": 4038 }, { "epoch": 0.41518545155656017, "grad_norm": 0.04237307608127594, "learning_rate": 0.01, "loss": 2.2523, "step": 4041 }, { "epoch": 0.41549368129045516, "grad_norm": 0.12513239681720734, "learning_rate": 0.01, "loss": 2.2614, "step": 4044 }, { "epoch": 0.41580191102435016, "grad_norm": 0.07134360820055008, "learning_rate": 0.01, "loss": 2.2533, "step": 4047 }, { "epoch": 0.41611014075824515, "grad_norm": 0.07371515780687332, "learning_rate": 0.01, "loss": 2.2333, "step": 4050 }, { "epoch": 0.41641837049214014, "grad_norm": 0.05744464695453644, "learning_rate": 0.01, "loss": 2.2292, "step": 4053 }, { "epoch": 0.41672660022603514, "grad_norm": 0.0790088102221489, "learning_rate": 0.01, "loss": 2.2217, "step": 4056 }, { "epoch": 0.41703482995993013, "grad_norm": 0.12540112435817719, "learning_rate": 0.01, "loss": 2.2367, "step": 4059 }, { "epoch": 0.4173430596938251, "grad_norm": 0.06895852833986282, "learning_rate": 0.01, "loss": 2.2354, "step": 4062 }, { "epoch": 0.4176512894277201, "grad_norm": 0.09068478643894196, "learning_rate": 0.01, "loss": 2.2605, "step": 4065 }, { "epoch": 0.4179595191616151, "grad_norm": 0.051881443709135056, "learning_rate": 0.01, "loss": 2.2501, "step": 4068 }, { "epoch": 0.4182677488955101, "grad_norm": 0.20433951914310455, "learning_rate": 0.01, "loss": 2.2582, "step": 4071 }, { "epoch": 0.4185759786294051, "grad_norm": 0.08301309496164322, "learning_rate": 0.01, "loss": 2.2424, "step": 4074 }, { "epoch": 0.4188842083633001, "grad_norm": 0.07062964886426926, "learning_rate": 0.01, "loss": 2.2345, "step": 4077 }, { "epoch": 0.4191924380971951, "grad_norm": 0.09770773351192474, "learning_rate": 0.01, "loss": 2.264, "step": 4080 }, { "epoch": 0.4195006678310901, "grad_norm": 0.0847458690404892, "learning_rate": 0.01, "loss": 2.2329, "step": 4083 }, { "epoch": 0.41980889756498513, "grad_norm": 0.06491915881633759, "learning_rate": 0.01, "loss": 2.2174, "step": 4086 }, { "epoch": 0.4201171272988801, "grad_norm": 0.11355047672986984, "learning_rate": 0.01, "loss": 2.2653, "step": 4089 }, { "epoch": 0.4204253570327751, "grad_norm": 0.10509520024061203, "learning_rate": 0.01, "loss": 2.2435, "step": 4092 }, { "epoch": 0.4207335867666701, "grad_norm": 0.07456620037555695, "learning_rate": 0.01, "loss": 2.2348, "step": 4095 }, { "epoch": 0.4210418165005651, "grad_norm": 0.07531027495861053, "learning_rate": 0.01, "loss": 2.2524, "step": 4098 }, { "epoch": 0.4213500462344601, "grad_norm": 0.06129564717411995, "learning_rate": 0.01, "loss": 2.2577, "step": 4101 }, { "epoch": 0.4216582759683551, "grad_norm": 0.03984616696834564, "learning_rate": 0.01, "loss": 2.2354, "step": 4104 }, { "epoch": 0.4219665057022501, "grad_norm": 0.1273418813943863, "learning_rate": 0.01, "loss": 2.2478, "step": 4107 }, { "epoch": 0.4222747354361451, "grad_norm": 0.08859774470329285, "learning_rate": 0.01, "loss": 2.2504, "step": 4110 }, { "epoch": 0.42258296517004007, "grad_norm": 0.10512147098779678, "learning_rate": 0.01, "loss": 2.2435, "step": 4113 }, { "epoch": 0.42289119490393506, "grad_norm": 0.11181578040122986, "learning_rate": 0.01, "loss": 2.2396, "step": 4116 }, { "epoch": 0.42319942463783006, "grad_norm": 0.07474307715892792, "learning_rate": 0.01, "loss": 2.2518, "step": 4119 }, { "epoch": 0.42350765437172505, "grad_norm": 0.07233690470457077, "learning_rate": 0.01, "loss": 2.2283, "step": 4122 }, { "epoch": 0.42381588410562004, "grad_norm": 0.06051602587103844, "learning_rate": 0.01, "loss": 2.2429, "step": 4125 }, { "epoch": 0.42412411383951504, "grad_norm": 0.0492120198905468, "learning_rate": 0.01, "loss": 2.2312, "step": 4128 }, { "epoch": 0.42443234357341003, "grad_norm": 0.07249493151903152, "learning_rate": 0.01, "loss": 2.244, "step": 4131 }, { "epoch": 0.424740573307305, "grad_norm": 0.0993468165397644, "learning_rate": 0.01, "loss": 2.2441, "step": 4134 }, { "epoch": 0.4250488030412, "grad_norm": 0.07051920145750046, "learning_rate": 0.01, "loss": 2.2188, "step": 4137 }, { "epoch": 0.425357032775095, "grad_norm": 0.08267249912023544, "learning_rate": 0.01, "loss": 2.2472, "step": 4140 }, { "epoch": 0.42566526250899006, "grad_norm": 0.1307336390018463, "learning_rate": 0.01, "loss": 2.2359, "step": 4143 }, { "epoch": 0.42597349224288505, "grad_norm": 0.09383214265108109, "learning_rate": 0.01, "loss": 2.2519, "step": 4146 }, { "epoch": 0.42628172197678005, "grad_norm": 0.08928582817316055, "learning_rate": 0.01, "loss": 2.2322, "step": 4149 }, { "epoch": 0.42658995171067504, "grad_norm": 0.10554556548595428, "learning_rate": 0.01, "loss": 2.2219, "step": 4152 }, { "epoch": 0.42689818144457004, "grad_norm": 0.06501816213130951, "learning_rate": 0.01, "loss": 2.2351, "step": 4155 }, { "epoch": 0.42720641117846503, "grad_norm": 0.10736589878797531, "learning_rate": 0.01, "loss": 2.2327, "step": 4158 }, { "epoch": 0.42751464091236, "grad_norm": 0.11834681034088135, "learning_rate": 0.01, "loss": 2.2617, "step": 4161 }, { "epoch": 0.427822870646255, "grad_norm": 0.07011161744594574, "learning_rate": 0.01, "loss": 2.2218, "step": 4164 }, { "epoch": 0.42813110038015, "grad_norm": 0.0653071179986, "learning_rate": 0.01, "loss": 2.2115, "step": 4167 }, { "epoch": 0.428439330114045, "grad_norm": 0.057517893612384796, "learning_rate": 0.01, "loss": 2.2533, "step": 4170 }, { "epoch": 0.42874755984794, "grad_norm": 0.060261376202106476, "learning_rate": 0.01, "loss": 2.2199, "step": 4173 }, { "epoch": 0.429055789581835, "grad_norm": 0.12384762614965439, "learning_rate": 0.01, "loss": 2.2124, "step": 4176 }, { "epoch": 0.42936401931573, "grad_norm": 0.06436473876237869, "learning_rate": 0.01, "loss": 2.2558, "step": 4179 }, { "epoch": 0.429672249049625, "grad_norm": 0.049704987555742264, "learning_rate": 0.01, "loss": 2.2434, "step": 4182 }, { "epoch": 0.42998047878351997, "grad_norm": 0.0809103325009346, "learning_rate": 0.01, "loss": 2.2461, "step": 4185 }, { "epoch": 0.43028870851741496, "grad_norm": 0.04888701066374779, "learning_rate": 0.01, "loss": 2.2342, "step": 4188 }, { "epoch": 0.43059693825130996, "grad_norm": 0.04951067641377449, "learning_rate": 0.01, "loss": 2.2292, "step": 4191 }, { "epoch": 0.43090516798520495, "grad_norm": 0.13740333914756775, "learning_rate": 0.01, "loss": 2.2243, "step": 4194 }, { "epoch": 0.43121339771909994, "grad_norm": 0.09912848472595215, "learning_rate": 0.01, "loss": 2.2065, "step": 4197 }, { "epoch": 0.43152162745299494, "grad_norm": 0.1031954362988472, "learning_rate": 0.01, "loss": 2.2247, "step": 4200 }, { "epoch": 0.43182985718689, "grad_norm": 0.04378229379653931, "learning_rate": 0.01, "loss": 2.2485, "step": 4203 }, { "epoch": 0.432138086920785, "grad_norm": 0.05430865287780762, "learning_rate": 0.01, "loss": 2.2178, "step": 4206 }, { "epoch": 0.43244631665468, "grad_norm": 0.05675321817398071, "learning_rate": 0.01, "loss": 2.2568, "step": 4209 }, { "epoch": 0.43275454638857497, "grad_norm": 0.07637004554271698, "learning_rate": 0.01, "loss": 2.2567, "step": 4212 }, { "epoch": 0.43306277612246996, "grad_norm": 0.06263475120067596, "learning_rate": 0.01, "loss": 2.2597, "step": 4215 }, { "epoch": 0.43337100585636495, "grad_norm": 0.09689760208129883, "learning_rate": 0.01, "loss": 2.2376, "step": 4218 }, { "epoch": 0.43367923559025995, "grad_norm": 0.13923399150371552, "learning_rate": 0.01, "loss": 2.2394, "step": 4221 }, { "epoch": 0.43398746532415494, "grad_norm": 0.0607299767434597, "learning_rate": 0.01, "loss": 2.2366, "step": 4224 }, { "epoch": 0.43429569505804994, "grad_norm": 0.05221550166606903, "learning_rate": 0.01, "loss": 2.2587, "step": 4227 }, { "epoch": 0.43460392479194493, "grad_norm": 0.05556831881403923, "learning_rate": 0.01, "loss": 2.2422, "step": 4230 }, { "epoch": 0.4349121545258399, "grad_norm": 0.0843261182308197, "learning_rate": 0.01, "loss": 2.2399, "step": 4233 }, { "epoch": 0.4352203842597349, "grad_norm": 0.08864692598581314, "learning_rate": 0.01, "loss": 2.2155, "step": 4236 }, { "epoch": 0.4355286139936299, "grad_norm": 0.11530198156833649, "learning_rate": 0.01, "loss": 2.2612, "step": 4239 }, { "epoch": 0.4358368437275249, "grad_norm": 0.11549337208271027, "learning_rate": 0.01, "loss": 2.2233, "step": 4242 }, { "epoch": 0.4361450734614199, "grad_norm": 0.11105350404977798, "learning_rate": 0.01, "loss": 2.2426, "step": 4245 }, { "epoch": 0.4364533031953149, "grad_norm": 0.1190980076789856, "learning_rate": 0.01, "loss": 2.2353, "step": 4248 }, { "epoch": 0.4367615329292099, "grad_norm": 0.08560021221637726, "learning_rate": 0.01, "loss": 2.2542, "step": 4251 }, { "epoch": 0.4370697626631049, "grad_norm": 0.05514337494969368, "learning_rate": 0.01, "loss": 2.2171, "step": 4254 }, { "epoch": 0.43737799239699987, "grad_norm": 0.06764981150627136, "learning_rate": 0.01, "loss": 2.2363, "step": 4257 }, { "epoch": 0.4376862221308949, "grad_norm": 0.04801105335354805, "learning_rate": 0.01, "loss": 2.2352, "step": 4260 }, { "epoch": 0.4379944518647899, "grad_norm": 0.04782482981681824, "learning_rate": 0.01, "loss": 2.2458, "step": 4263 }, { "epoch": 0.4383026815986849, "grad_norm": 0.12880820035934448, "learning_rate": 0.01, "loss": 2.2384, "step": 4266 }, { "epoch": 0.4386109113325799, "grad_norm": 0.06714754551649094, "learning_rate": 0.01, "loss": 2.2214, "step": 4269 }, { "epoch": 0.4389191410664749, "grad_norm": 0.08878037333488464, "learning_rate": 0.01, "loss": 2.2597, "step": 4272 }, { "epoch": 0.4392273708003699, "grad_norm": 0.051335882395505905, "learning_rate": 0.01, "loss": 2.2065, "step": 4275 }, { "epoch": 0.4395356005342649, "grad_norm": 0.058174654841423035, "learning_rate": 0.01, "loss": 2.246, "step": 4278 }, { "epoch": 0.4398438302681599, "grad_norm": 0.053695593029260635, "learning_rate": 0.01, "loss": 2.2406, "step": 4281 }, { "epoch": 0.44015206000205487, "grad_norm": 0.07685926556587219, "learning_rate": 0.01, "loss": 2.2212, "step": 4284 }, { "epoch": 0.44046028973594986, "grad_norm": 0.13495223224163055, "learning_rate": 0.01, "loss": 2.2486, "step": 4287 }, { "epoch": 0.44076851946984485, "grad_norm": 0.0707453116774559, "learning_rate": 0.01, "loss": 2.247, "step": 4290 }, { "epoch": 0.44107674920373985, "grad_norm": 0.04909240081906319, "learning_rate": 0.01, "loss": 2.2528, "step": 4293 }, { "epoch": 0.44138497893763484, "grad_norm": 0.06148238107562065, "learning_rate": 0.01, "loss": 2.2462, "step": 4296 }, { "epoch": 0.44169320867152984, "grad_norm": 0.07306285202503204, "learning_rate": 0.01, "loss": 2.199, "step": 4299 }, { "epoch": 0.44200143840542483, "grad_norm": 0.12965865433216095, "learning_rate": 0.01, "loss": 2.2156, "step": 4302 }, { "epoch": 0.4423096681393198, "grad_norm": 0.059606775641441345, "learning_rate": 0.01, "loss": 2.2209, "step": 4305 }, { "epoch": 0.4426178978732148, "grad_norm": 0.06866457313299179, "learning_rate": 0.01, "loss": 2.2508, "step": 4308 }, { "epoch": 0.4429261276071098, "grad_norm": 0.08940677344799042, "learning_rate": 0.01, "loss": 2.2244, "step": 4311 }, { "epoch": 0.4432343573410048, "grad_norm": 0.10428988933563232, "learning_rate": 0.01, "loss": 2.2106, "step": 4314 }, { "epoch": 0.44354258707489985, "grad_norm": 0.1565064787864685, "learning_rate": 0.01, "loss": 2.2745, "step": 4317 }, { "epoch": 0.44385081680879485, "grad_norm": 0.11433500796556473, "learning_rate": 0.01, "loss": 2.2655, "step": 4320 }, { "epoch": 0.44415904654268984, "grad_norm": 0.07315809279680252, "learning_rate": 0.01, "loss": 2.2523, "step": 4323 }, { "epoch": 0.44446727627658483, "grad_norm": 0.048583708703517914, "learning_rate": 0.01, "loss": 2.2345, "step": 4326 }, { "epoch": 0.4447755060104798, "grad_norm": 0.03422848507761955, "learning_rate": 0.01, "loss": 2.1883, "step": 4329 }, { "epoch": 0.4450837357443748, "grad_norm": 0.05057518929243088, "learning_rate": 0.01, "loss": 2.2288, "step": 4332 }, { "epoch": 0.4453919654782698, "grad_norm": 0.10407044738531113, "learning_rate": 0.01, "loss": 2.1974, "step": 4335 }, { "epoch": 0.4457001952121648, "grad_norm": 0.06545260548591614, "learning_rate": 0.01, "loss": 2.2121, "step": 4338 }, { "epoch": 0.4460084249460598, "grad_norm": 0.09442485123872757, "learning_rate": 0.01, "loss": 2.2145, "step": 4341 }, { "epoch": 0.4463166546799548, "grad_norm": 0.11353209614753723, "learning_rate": 0.01, "loss": 2.227, "step": 4344 }, { "epoch": 0.4466248844138498, "grad_norm": 0.11243279278278351, "learning_rate": 0.01, "loss": 2.242, "step": 4347 }, { "epoch": 0.4469331141477448, "grad_norm": 0.14264856278896332, "learning_rate": 0.01, "loss": 2.2405, "step": 4350 }, { "epoch": 0.4472413438816398, "grad_norm": 0.048186566680669785, "learning_rate": 0.01, "loss": 2.1921, "step": 4353 }, { "epoch": 0.44754957361553477, "grad_norm": 0.0693448930978775, "learning_rate": 0.01, "loss": 2.2404, "step": 4356 }, { "epoch": 0.44785780334942976, "grad_norm": 0.04426461458206177, "learning_rate": 0.01, "loss": 2.2114, "step": 4359 }, { "epoch": 0.44816603308332476, "grad_norm": 0.06392990797758102, "learning_rate": 0.01, "loss": 2.224, "step": 4362 }, { "epoch": 0.44847426281721975, "grad_norm": 0.16224262118339539, "learning_rate": 0.01, "loss": 2.261, "step": 4365 }, { "epoch": 0.44878249255111474, "grad_norm": 0.06382444500923157, "learning_rate": 0.01, "loss": 2.2067, "step": 4368 }, { "epoch": 0.44909072228500974, "grad_norm": 0.09267281740903854, "learning_rate": 0.01, "loss": 2.2403, "step": 4371 }, { "epoch": 0.44939895201890473, "grad_norm": 0.09785914421081543, "learning_rate": 0.01, "loss": 2.2276, "step": 4374 }, { "epoch": 0.4497071817527998, "grad_norm": 0.06673259288072586, "learning_rate": 0.01, "loss": 2.214, "step": 4377 }, { "epoch": 0.45001541148669477, "grad_norm": 0.05463524907827377, "learning_rate": 0.01, "loss": 2.2048, "step": 4380 }, { "epoch": 0.45032364122058977, "grad_norm": 0.05466567724943161, "learning_rate": 0.01, "loss": 2.2062, "step": 4383 }, { "epoch": 0.45063187095448476, "grad_norm": 0.07413290441036224, "learning_rate": 0.01, "loss": 2.2178, "step": 4386 }, { "epoch": 0.45094010068837975, "grad_norm": 0.06564678996801376, "learning_rate": 0.01, "loss": 2.2304, "step": 4389 }, { "epoch": 0.45124833042227475, "grad_norm": 0.12468644231557846, "learning_rate": 0.01, "loss": 2.2301, "step": 4392 }, { "epoch": 0.45155656015616974, "grad_norm": 0.06898069381713867, "learning_rate": 0.01, "loss": 2.2255, "step": 4395 }, { "epoch": 0.45186478989006473, "grad_norm": 0.13579058647155762, "learning_rate": 0.01, "loss": 2.2021, "step": 4398 }, { "epoch": 0.4521730196239597, "grad_norm": 0.07980421930551529, "learning_rate": 0.01, "loss": 2.2598, "step": 4401 }, { "epoch": 0.4524812493578547, "grad_norm": 0.07771994173526764, "learning_rate": 0.01, "loss": 2.2166, "step": 4404 }, { "epoch": 0.4527894790917497, "grad_norm": 0.08967602998018265, "learning_rate": 0.01, "loss": 2.2095, "step": 4407 }, { "epoch": 0.4530977088256447, "grad_norm": 0.10909977555274963, "learning_rate": 0.01, "loss": 2.2064, "step": 4410 }, { "epoch": 0.4534059385595397, "grad_norm": 0.11167363077402115, "learning_rate": 0.01, "loss": 2.2021, "step": 4413 }, { "epoch": 0.4537141682934347, "grad_norm": 0.10310694575309753, "learning_rate": 0.01, "loss": 2.2582, "step": 4416 }, { "epoch": 0.4540223980273297, "grad_norm": 0.06411474943161011, "learning_rate": 0.01, "loss": 2.2203, "step": 4419 }, { "epoch": 0.4543306277612247, "grad_norm": 0.11141805350780487, "learning_rate": 0.01, "loss": 2.2163, "step": 4422 }, { "epoch": 0.4546388574951197, "grad_norm": 0.09054200351238251, "learning_rate": 0.01, "loss": 2.2054, "step": 4425 }, { "epoch": 0.45494708722901467, "grad_norm": 0.06952405720949173, "learning_rate": 0.01, "loss": 2.2488, "step": 4428 }, { "epoch": 0.45525531696290966, "grad_norm": 0.08597440272569656, "learning_rate": 0.01, "loss": 2.2044, "step": 4431 }, { "epoch": 0.4555635466968047, "grad_norm": 0.06718187034130096, "learning_rate": 0.01, "loss": 2.2419, "step": 4434 }, { "epoch": 0.4558717764306997, "grad_norm": 0.0558515265583992, "learning_rate": 0.01, "loss": 2.2102, "step": 4437 }, { "epoch": 0.4561800061645947, "grad_norm": 0.0560682937502861, "learning_rate": 0.01, "loss": 2.2324, "step": 4440 }, { "epoch": 0.4564882358984897, "grad_norm": 0.058881547302007675, "learning_rate": 0.01, "loss": 2.1966, "step": 4443 }, { "epoch": 0.4567964656323847, "grad_norm": 0.07034582644701004, "learning_rate": 0.01, "loss": 2.2021, "step": 4446 }, { "epoch": 0.4571046953662797, "grad_norm": 0.09703799337148666, "learning_rate": 0.01, "loss": 2.21, "step": 4449 }, { "epoch": 0.45741292510017467, "grad_norm": 0.06268820911645889, "learning_rate": 0.01, "loss": 2.2237, "step": 4452 }, { "epoch": 0.45772115483406967, "grad_norm": 0.123359814286232, "learning_rate": 0.01, "loss": 2.2063, "step": 4455 }, { "epoch": 0.45802938456796466, "grad_norm": 0.0536644384264946, "learning_rate": 0.01, "loss": 2.2002, "step": 4458 }, { "epoch": 0.45833761430185965, "grad_norm": 0.0957527682185173, "learning_rate": 0.01, "loss": 2.2484, "step": 4461 }, { "epoch": 0.45864584403575465, "grad_norm": 0.12607458233833313, "learning_rate": 0.01, "loss": 2.2241, "step": 4464 }, { "epoch": 0.45895407376964964, "grad_norm": 0.07415255159139633, "learning_rate": 0.01, "loss": 2.2083, "step": 4467 }, { "epoch": 0.45926230350354463, "grad_norm": 0.10248073190450668, "learning_rate": 0.01, "loss": 2.2253, "step": 4470 }, { "epoch": 0.4595705332374396, "grad_norm": 0.05264243111014366, "learning_rate": 0.01, "loss": 2.2166, "step": 4473 }, { "epoch": 0.4598787629713346, "grad_norm": 0.0557783767580986, "learning_rate": 0.01, "loss": 2.2213, "step": 4476 }, { "epoch": 0.4601869927052296, "grad_norm": 0.06835830211639404, "learning_rate": 0.01, "loss": 2.2255, "step": 4479 }, { "epoch": 0.4604952224391246, "grad_norm": 0.12045460939407349, "learning_rate": 0.01, "loss": 2.2331, "step": 4482 }, { "epoch": 0.4608034521730196, "grad_norm": 0.11495090276002884, "learning_rate": 0.01, "loss": 2.2191, "step": 4485 }, { "epoch": 0.4611116819069146, "grad_norm": 0.07859046757221222, "learning_rate": 0.01, "loss": 2.2282, "step": 4488 }, { "epoch": 0.46141991164080964, "grad_norm": 0.03789819777011871, "learning_rate": 0.01, "loss": 2.2188, "step": 4491 }, { "epoch": 0.46172814137470464, "grad_norm": 0.03617655113339424, "learning_rate": 0.01, "loss": 2.2496, "step": 4494 }, { "epoch": 0.46203637110859963, "grad_norm": 0.06894705444574356, "learning_rate": 0.01, "loss": 2.2007, "step": 4497 }, { "epoch": 0.4623446008424946, "grad_norm": 0.1143706887960434, "learning_rate": 0.01, "loss": 2.2247, "step": 4500 }, { "epoch": 0.4626528305763896, "grad_norm": 0.10069230943918228, "learning_rate": 0.01, "loss": 2.2114, "step": 4503 }, { "epoch": 0.4629610603102846, "grad_norm": 0.10068007558584213, "learning_rate": 0.01, "loss": 2.2438, "step": 4506 }, { "epoch": 0.4632692900441796, "grad_norm": 0.05319290608167648, "learning_rate": 0.01, "loss": 2.2422, "step": 4509 }, { "epoch": 0.4635775197780746, "grad_norm": 0.06933122128248215, "learning_rate": 0.01, "loss": 2.2059, "step": 4512 }, { "epoch": 0.4638857495119696, "grad_norm": 0.11921056360006332, "learning_rate": 0.01, "loss": 2.2137, "step": 4515 }, { "epoch": 0.4641939792458646, "grad_norm": 0.06092121824622154, "learning_rate": 0.01, "loss": 2.1941, "step": 4518 }, { "epoch": 0.4645022089797596, "grad_norm": 0.06017937511205673, "learning_rate": 0.01, "loss": 2.2539, "step": 4521 }, { "epoch": 0.4648104387136546, "grad_norm": 0.05721915140748024, "learning_rate": 0.01, "loss": 2.2348, "step": 4524 }, { "epoch": 0.46511866844754957, "grad_norm": 0.07706714421510696, "learning_rate": 0.01, "loss": 2.2169, "step": 4527 }, { "epoch": 0.46542689818144456, "grad_norm": 0.07279779762029648, "learning_rate": 0.01, "loss": 2.2163, "step": 4530 }, { "epoch": 0.46573512791533955, "grad_norm": 0.06781268864870071, "learning_rate": 0.01, "loss": 2.1682, "step": 4533 }, { "epoch": 0.46604335764923455, "grad_norm": 0.0807657316327095, "learning_rate": 0.01, "loss": 2.2123, "step": 4536 }, { "epoch": 0.46635158738312954, "grad_norm": 0.06467099487781525, "learning_rate": 0.01, "loss": 2.2152, "step": 4539 }, { "epoch": 0.46665981711702453, "grad_norm": 0.10680168867111206, "learning_rate": 0.01, "loss": 2.2062, "step": 4542 }, { "epoch": 0.4669680468509195, "grad_norm": 0.11668167263269424, "learning_rate": 0.01, "loss": 2.206, "step": 4545 }, { "epoch": 0.4672762765848146, "grad_norm": 0.06468226760625839, "learning_rate": 0.01, "loss": 2.2011, "step": 4548 }, { "epoch": 0.46758450631870957, "grad_norm": 0.07668601721525192, "learning_rate": 0.01, "loss": 2.2128, "step": 4551 }, { "epoch": 0.46789273605260456, "grad_norm": 0.05631673336029053, "learning_rate": 0.01, "loss": 2.1812, "step": 4554 }, { "epoch": 0.46820096578649956, "grad_norm": 0.12898530066013336, "learning_rate": 0.01, "loss": 2.2312, "step": 4557 }, { "epoch": 0.46850919552039455, "grad_norm": 0.07105603069067001, "learning_rate": 0.01, "loss": 2.1949, "step": 4560 }, { "epoch": 0.46881742525428954, "grad_norm": 0.07172367721796036, "learning_rate": 0.01, "loss": 2.2509, "step": 4563 }, { "epoch": 0.46912565498818454, "grad_norm": 0.1219574511051178, "learning_rate": 0.01, "loss": 2.2147, "step": 4566 }, { "epoch": 0.46943388472207953, "grad_norm": 0.05777307227253914, "learning_rate": 0.01, "loss": 2.2071, "step": 4569 }, { "epoch": 0.4697421144559745, "grad_norm": 0.12805253267288208, "learning_rate": 0.01, "loss": 2.2166, "step": 4572 }, { "epoch": 0.4700503441898695, "grad_norm": 0.11360877752304077, "learning_rate": 0.01, "loss": 2.1827, "step": 4575 }, { "epoch": 0.4703585739237645, "grad_norm": 0.07203348726034164, "learning_rate": 0.01, "loss": 2.2378, "step": 4578 }, { "epoch": 0.4706668036576595, "grad_norm": 0.05645303055644035, "learning_rate": 0.01, "loss": 2.2044, "step": 4581 }, { "epoch": 0.4709750333915545, "grad_norm": 0.06103040650486946, "learning_rate": 0.01, "loss": 2.2302, "step": 4584 }, { "epoch": 0.4712832631254495, "grad_norm": 0.0621771402657032, "learning_rate": 0.01, "loss": 2.2147, "step": 4587 }, { "epoch": 0.4715914928593445, "grad_norm": 0.08458666503429413, "learning_rate": 0.01, "loss": 2.1781, "step": 4590 }, { "epoch": 0.4718997225932395, "grad_norm": 0.092729851603508, "learning_rate": 0.01, "loss": 2.2326, "step": 4593 }, { "epoch": 0.4722079523271345, "grad_norm": 0.09255766123533249, "learning_rate": 0.01, "loss": 2.2082, "step": 4596 }, { "epoch": 0.47251618206102947, "grad_norm": 0.11929985135793686, "learning_rate": 0.01, "loss": 2.2064, "step": 4599 }, { "epoch": 0.47282441179492446, "grad_norm": 0.12234004586935043, "learning_rate": 0.01, "loss": 2.1513, "step": 4602 }, { "epoch": 0.47313264152881945, "grad_norm": 0.07648742944002151, "learning_rate": 0.01, "loss": 2.2376, "step": 4605 }, { "epoch": 0.4734408712627145, "grad_norm": 0.05717691034078598, "learning_rate": 0.01, "loss": 2.231, "step": 4608 }, { "epoch": 0.4737491009966095, "grad_norm": 0.048224568367004395, "learning_rate": 0.01, "loss": 2.2126, "step": 4611 }, { "epoch": 0.4740573307305045, "grad_norm": 0.07530826330184937, "learning_rate": 0.01, "loss": 2.2155, "step": 4614 }, { "epoch": 0.4743655604643995, "grad_norm": 0.08617862313985825, "learning_rate": 0.01, "loss": 2.2286, "step": 4617 }, { "epoch": 0.4746737901982945, "grad_norm": 0.10041820257902145, "learning_rate": 0.01, "loss": 2.1917, "step": 4620 }, { "epoch": 0.47498201993218947, "grad_norm": 0.04470205307006836, "learning_rate": 0.01, "loss": 2.2188, "step": 4623 }, { "epoch": 0.47529024966608446, "grad_norm": 0.060269374400377274, "learning_rate": 0.01, "loss": 2.2267, "step": 4626 }, { "epoch": 0.47559847939997946, "grad_norm": 0.06320520490407944, "learning_rate": 0.01, "loss": 2.2054, "step": 4629 }, { "epoch": 0.47590670913387445, "grad_norm": 0.05642838776111603, "learning_rate": 0.01, "loss": 2.2062, "step": 4632 }, { "epoch": 0.47621493886776944, "grad_norm": 0.064301997423172, "learning_rate": 0.01, "loss": 2.2296, "step": 4635 }, { "epoch": 0.47652316860166444, "grad_norm": 0.07448214292526245, "learning_rate": 0.01, "loss": 2.197, "step": 4638 }, { "epoch": 0.47683139833555943, "grad_norm": 0.08586326986551285, "learning_rate": 0.01, "loss": 2.1743, "step": 4641 }, { "epoch": 0.4771396280694544, "grad_norm": 0.13179326057434082, "learning_rate": 0.01, "loss": 2.2299, "step": 4644 }, { "epoch": 0.4774478578033494, "grad_norm": 0.1163720041513443, "learning_rate": 0.01, "loss": 2.2089, "step": 4647 }, { "epoch": 0.4777560875372444, "grad_norm": 0.04846031963825226, "learning_rate": 0.01, "loss": 2.1564, "step": 4650 }, { "epoch": 0.4780643172711394, "grad_norm": 0.13724131882190704, "learning_rate": 0.01, "loss": 2.2078, "step": 4653 }, { "epoch": 0.4783725470050344, "grad_norm": 0.062840536236763, "learning_rate": 0.01, "loss": 2.2252, "step": 4656 }, { "epoch": 0.4786807767389294, "grad_norm": 0.06721820682287216, "learning_rate": 0.01, "loss": 2.1781, "step": 4659 }, { "epoch": 0.4789890064728244, "grad_norm": 0.09086044877767563, "learning_rate": 0.01, "loss": 2.2179, "step": 4662 }, { "epoch": 0.47929723620671943, "grad_norm": 0.07732655107975006, "learning_rate": 0.01, "loss": 2.2334, "step": 4665 }, { "epoch": 0.47960546594061443, "grad_norm": 0.04763714596629143, "learning_rate": 0.01, "loss": 2.2262, "step": 4668 }, { "epoch": 0.4799136956745094, "grad_norm": 0.09649144858121872, "learning_rate": 0.01, "loss": 2.2141, "step": 4671 }, { "epoch": 0.4802219254084044, "grad_norm": 0.05458167567849159, "learning_rate": 0.01, "loss": 2.1967, "step": 4674 }, { "epoch": 0.4805301551422994, "grad_norm": 0.08577650040388107, "learning_rate": 0.01, "loss": 2.2183, "step": 4677 }, { "epoch": 0.4808383848761944, "grad_norm": 0.0733698159456253, "learning_rate": 0.01, "loss": 2.2185, "step": 4680 }, { "epoch": 0.4811466146100894, "grad_norm": 0.06648692488670349, "learning_rate": 0.01, "loss": 2.1904, "step": 4683 }, { "epoch": 0.4814548443439844, "grad_norm": 0.08376996219158173, "learning_rate": 0.01, "loss": 2.2097, "step": 4686 }, { "epoch": 0.4817630740778794, "grad_norm": 0.05270134285092354, "learning_rate": 0.01, "loss": 2.2304, "step": 4689 }, { "epoch": 0.4820713038117744, "grad_norm": 0.05531509965658188, "learning_rate": 0.01, "loss": 2.2039, "step": 4692 }, { "epoch": 0.48237953354566937, "grad_norm": 0.05848492309451103, "learning_rate": 0.01, "loss": 2.2113, "step": 4695 }, { "epoch": 0.48268776327956436, "grad_norm": 0.06692120432853699, "learning_rate": 0.01, "loss": 2.1972, "step": 4698 }, { "epoch": 0.48299599301345936, "grad_norm": 0.07243851572275162, "learning_rate": 0.01, "loss": 2.223, "step": 4701 }, { "epoch": 0.48330422274735435, "grad_norm": 0.06565523892641068, "learning_rate": 0.01, "loss": 2.1913, "step": 4704 }, { "epoch": 0.48361245248124934, "grad_norm": 0.04595122113823891, "learning_rate": 0.01, "loss": 2.1782, "step": 4707 }, { "epoch": 0.48392068221514434, "grad_norm": 0.06658844649791718, "learning_rate": 0.01, "loss": 2.224, "step": 4710 }, { "epoch": 0.48422891194903933, "grad_norm": 0.0807071253657341, "learning_rate": 0.01, "loss": 2.217, "step": 4713 }, { "epoch": 0.4845371416829343, "grad_norm": 0.0562782846391201, "learning_rate": 0.01, "loss": 2.2033, "step": 4716 }, { "epoch": 0.4848453714168293, "grad_norm": 0.07851718366146088, "learning_rate": 0.01, "loss": 2.1847, "step": 4719 }, { "epoch": 0.48515360115072437, "grad_norm": 0.07649900764226913, "learning_rate": 0.01, "loss": 2.2222, "step": 4722 }, { "epoch": 0.48546183088461936, "grad_norm": 0.07279150187969208, "learning_rate": 0.01, "loss": 2.1951, "step": 4725 }, { "epoch": 0.48577006061851435, "grad_norm": 0.053628645837306976, "learning_rate": 0.01, "loss": 2.1681, "step": 4728 }, { "epoch": 0.48607829035240935, "grad_norm": 0.09401357173919678, "learning_rate": 0.01, "loss": 2.1943, "step": 4731 }, { "epoch": 0.48638652008630434, "grad_norm": 0.1156088337302208, "learning_rate": 0.01, "loss": 2.2317, "step": 4734 }, { "epoch": 0.48669474982019933, "grad_norm": 0.12672138214111328, "learning_rate": 0.01, "loss": 2.2085, "step": 4737 }, { "epoch": 0.48700297955409433, "grad_norm": 0.06799574196338654, "learning_rate": 0.01, "loss": 2.2161, "step": 4740 }, { "epoch": 0.4873112092879893, "grad_norm": 0.06479325145483017, "learning_rate": 0.01, "loss": 2.1663, "step": 4743 }, { "epoch": 0.4876194390218843, "grad_norm": 0.09143824130296707, "learning_rate": 0.01, "loss": 2.2193, "step": 4746 }, { "epoch": 0.4879276687557793, "grad_norm": 0.09262688457965851, "learning_rate": 0.01, "loss": 2.218, "step": 4749 }, { "epoch": 0.4882358984896743, "grad_norm": 0.11519678682088852, "learning_rate": 0.01, "loss": 2.1937, "step": 4752 }, { "epoch": 0.4885441282235693, "grad_norm": 0.07646415382623672, "learning_rate": 0.01, "loss": 2.2133, "step": 4755 }, { "epoch": 0.4888523579574643, "grad_norm": 0.08090809732675552, "learning_rate": 0.01, "loss": 2.193, "step": 4758 }, { "epoch": 0.4891605876913593, "grad_norm": 0.08812209218740463, "learning_rate": 0.01, "loss": 2.2215, "step": 4761 }, { "epoch": 0.4894688174252543, "grad_norm": 0.14427846670150757, "learning_rate": 0.01, "loss": 2.2115, "step": 4764 }, { "epoch": 0.48977704715914927, "grad_norm": 0.08065719902515411, "learning_rate": 0.01, "loss": 2.1861, "step": 4767 }, { "epoch": 0.49008527689304426, "grad_norm": 0.04888691008090973, "learning_rate": 0.01, "loss": 2.1911, "step": 4770 }, { "epoch": 0.49039350662693926, "grad_norm": 0.04742259159684181, "learning_rate": 0.01, "loss": 2.2152, "step": 4773 }, { "epoch": 0.49070173636083425, "grad_norm": 0.061714138835668564, "learning_rate": 0.01, "loss": 2.2009, "step": 4776 }, { "epoch": 0.49100996609472924, "grad_norm": 0.07582443952560425, "learning_rate": 0.01, "loss": 2.2189, "step": 4779 }, { "epoch": 0.4913181958286243, "grad_norm": 0.1390780359506607, "learning_rate": 0.01, "loss": 2.211, "step": 4782 }, { "epoch": 0.4916264255625193, "grad_norm": 0.03784565255045891, "learning_rate": 0.01, "loss": 2.2011, "step": 4785 }, { "epoch": 0.4919346552964143, "grad_norm": 0.07413594424724579, "learning_rate": 0.01, "loss": 2.2103, "step": 4788 }, { "epoch": 0.4922428850303093, "grad_norm": 0.09402404725551605, "learning_rate": 0.01, "loss": 2.1912, "step": 4791 }, { "epoch": 0.49255111476420427, "grad_norm": 0.0717400312423706, "learning_rate": 0.01, "loss": 2.1868, "step": 4794 }, { "epoch": 0.49285934449809926, "grad_norm": 0.05179424583911896, "learning_rate": 0.01, "loss": 2.2298, "step": 4797 }, { "epoch": 0.49316757423199425, "grad_norm": 0.12123433500528336, "learning_rate": 0.01, "loss": 2.2005, "step": 4800 }, { "epoch": 0.49347580396588925, "grad_norm": 0.04941033944487572, "learning_rate": 0.01, "loss": 2.2113, "step": 4803 }, { "epoch": 0.49378403369978424, "grad_norm": 0.10987304151058197, "learning_rate": 0.01, "loss": 2.209, "step": 4806 }, { "epoch": 0.49409226343367924, "grad_norm": 0.09235193580389023, "learning_rate": 0.01, "loss": 2.1967, "step": 4809 }, { "epoch": 0.49440049316757423, "grad_norm": 0.057354703545570374, "learning_rate": 0.01, "loss": 2.219, "step": 4812 }, { "epoch": 0.4947087229014692, "grad_norm": 0.04692654311656952, "learning_rate": 0.01, "loss": 2.173, "step": 4815 }, { "epoch": 0.4950169526353642, "grad_norm": 0.09447453171014786, "learning_rate": 0.01, "loss": 2.1806, "step": 4818 }, { "epoch": 0.4953251823692592, "grad_norm": 0.09967079013586044, "learning_rate": 0.01, "loss": 2.1809, "step": 4821 }, { "epoch": 0.4956334121031542, "grad_norm": 0.06462189555168152, "learning_rate": 0.01, "loss": 2.1922, "step": 4824 }, { "epoch": 0.4959416418370492, "grad_norm": 0.038030870258808136, "learning_rate": 0.01, "loss": 2.2239, "step": 4827 }, { "epoch": 0.4962498715709442, "grad_norm": 0.06828872114419937, "learning_rate": 0.01, "loss": 2.1881, "step": 4830 }, { "epoch": 0.4965581013048392, "grad_norm": 0.10087070614099503, "learning_rate": 0.01, "loss": 2.22, "step": 4833 }, { "epoch": 0.4968663310387342, "grad_norm": 0.07630455493927002, "learning_rate": 0.01, "loss": 2.188, "step": 4836 }, { "epoch": 0.4971745607726292, "grad_norm": 0.05040668696165085, "learning_rate": 0.01, "loss": 2.2012, "step": 4839 }, { "epoch": 0.4974827905065242, "grad_norm": 0.05160282924771309, "learning_rate": 0.01, "loss": 2.2119, "step": 4842 }, { "epoch": 0.4977910202404192, "grad_norm": 0.04949258640408516, "learning_rate": 0.01, "loss": 2.1959, "step": 4845 }, { "epoch": 0.4980992499743142, "grad_norm": 0.07766029983758926, "learning_rate": 0.01, "loss": 2.1896, "step": 4848 }, { "epoch": 0.4984074797082092, "grad_norm": 0.06274580955505371, "learning_rate": 0.01, "loss": 2.2014, "step": 4851 }, { "epoch": 0.4987157094421042, "grad_norm": 0.1071280762553215, "learning_rate": 0.01, "loss": 2.2045, "step": 4854 }, { "epoch": 0.4990239391759992, "grad_norm": 0.10645020008087158, "learning_rate": 0.01, "loss": 2.1895, "step": 4857 }, { "epoch": 0.4993321689098942, "grad_norm": 0.1151091679930687, "learning_rate": 0.01, "loss": 2.1954, "step": 4860 }, { "epoch": 0.4996403986437892, "grad_norm": 0.09699530899524689, "learning_rate": 0.01, "loss": 2.1833, "step": 4863 }, { "epoch": 0.49994862837768417, "grad_norm": 0.06568959355354309, "learning_rate": 0.01, "loss": 2.1862, "step": 4866 }, { "epoch": 0.5002568581115792, "grad_norm": 0.0421447716653347, "learning_rate": 0.01, "loss": 2.1819, "step": 4869 }, { "epoch": 0.5005650878454742, "grad_norm": 0.04529868811368942, "learning_rate": 0.01, "loss": 2.1852, "step": 4872 }, { "epoch": 0.5008733175793691, "grad_norm": 0.059541650116443634, "learning_rate": 0.01, "loss": 2.1955, "step": 4875 }, { "epoch": 0.5011815473132641, "grad_norm": 0.061823770403862, "learning_rate": 0.01, "loss": 2.2039, "step": 4878 }, { "epoch": 0.5014897770471591, "grad_norm": 0.05892050638794899, "learning_rate": 0.01, "loss": 2.178, "step": 4881 }, { "epoch": 0.5017980067810541, "grad_norm": 0.04842402786016464, "learning_rate": 0.01, "loss": 2.1948, "step": 4884 }, { "epoch": 0.5021062365149491, "grad_norm": 0.05962050333619118, "learning_rate": 0.01, "loss": 2.1932, "step": 4887 }, { "epoch": 0.5024144662488441, "grad_norm": 0.056295089423656464, "learning_rate": 0.01, "loss": 2.1757, "step": 4890 }, { "epoch": 0.5027226959827391, "grad_norm": 0.07448049634695053, "learning_rate": 0.01, "loss": 2.2181, "step": 4893 }, { "epoch": 0.5030309257166341, "grad_norm": 0.07998815923929214, "learning_rate": 0.01, "loss": 2.1714, "step": 4896 }, { "epoch": 0.5033391554505291, "grad_norm": 0.08058517426252365, "learning_rate": 0.01, "loss": 2.2131, "step": 4899 }, { "epoch": 0.5036473851844241, "grad_norm": 0.07899410277605057, "learning_rate": 0.01, "loss": 2.1727, "step": 4902 }, { "epoch": 0.5039556149183191, "grad_norm": 0.05830831080675125, "learning_rate": 0.01, "loss": 2.176, "step": 4905 }, { "epoch": 0.5042638446522141, "grad_norm": 0.05831579118967056, "learning_rate": 0.01, "loss": 2.1754, "step": 4908 }, { "epoch": 0.5045720743861091, "grad_norm": 0.052614904940128326, "learning_rate": 0.01, "loss": 2.1935, "step": 4911 }, { "epoch": 0.5048803041200041, "grad_norm": 0.0830332413315773, "learning_rate": 0.01, "loss": 2.2274, "step": 4914 }, { "epoch": 0.5051885338538991, "grad_norm": 0.1138230562210083, "learning_rate": 0.01, "loss": 2.1976, "step": 4917 }, { "epoch": 0.505496763587794, "grad_norm": 0.07024016976356506, "learning_rate": 0.01, "loss": 2.1969, "step": 4920 }, { "epoch": 0.505804993321689, "grad_norm": 0.07235170155763626, "learning_rate": 0.01, "loss": 2.2163, "step": 4923 }, { "epoch": 0.5061132230555841, "grad_norm": 0.06894835084676743, "learning_rate": 0.01, "loss": 2.2232, "step": 4926 }, { "epoch": 0.5064214527894791, "grad_norm": 0.0825890600681305, "learning_rate": 0.01, "loss": 2.1896, "step": 4929 }, { "epoch": 0.5067296825233741, "grad_norm": 0.05901159718632698, "learning_rate": 0.01, "loss": 2.1988, "step": 4932 }, { "epoch": 0.5070379122572691, "grad_norm": 0.048157334327697754, "learning_rate": 0.01, "loss": 2.1904, "step": 4935 }, { "epoch": 0.5073461419911641, "grad_norm": 0.10036749392747879, "learning_rate": 0.01, "loss": 2.1977, "step": 4938 }, { "epoch": 0.5076543717250591, "grad_norm": 0.10984963923692703, "learning_rate": 0.01, "loss": 2.1957, "step": 4941 }, { "epoch": 0.5079626014589541, "grad_norm": 0.09587367624044418, "learning_rate": 0.01, "loss": 2.2, "step": 4944 }, { "epoch": 0.5082708311928491, "grad_norm": 0.06347552686929703, "learning_rate": 0.01, "loss": 2.1918, "step": 4947 }, { "epoch": 0.5085790609267441, "grad_norm": 0.0658629834651947, "learning_rate": 0.01, "loss": 2.1945, "step": 4950 }, { "epoch": 0.5088872906606391, "grad_norm": 0.045971643179655075, "learning_rate": 0.01, "loss": 2.2114, "step": 4953 }, { "epoch": 0.5091955203945341, "grad_norm": 0.04058291018009186, "learning_rate": 0.01, "loss": 2.2066, "step": 4956 }, { "epoch": 0.5095037501284291, "grad_norm": 0.052851296961307526, "learning_rate": 0.01, "loss": 2.1884, "step": 4959 }, { "epoch": 0.5098119798623241, "grad_norm": 0.033158350735902786, "learning_rate": 0.01, "loss": 2.2078, "step": 4962 }, { "epoch": 0.5101202095962191, "grad_norm": 0.05409036949276924, "learning_rate": 0.01, "loss": 2.181, "step": 4965 }, { "epoch": 0.5104284393301141, "grad_norm": 0.0731736570596695, "learning_rate": 0.01, "loss": 2.1825, "step": 4968 }, { "epoch": 0.510736669064009, "grad_norm": 0.05854470282793045, "learning_rate": 0.01, "loss": 2.2119, "step": 4971 }, { "epoch": 0.511044898797904, "grad_norm": 0.05071520060300827, "learning_rate": 0.01, "loss": 2.1886, "step": 4974 }, { "epoch": 0.511353128531799, "grad_norm": 0.060792725533246994, "learning_rate": 0.01, "loss": 2.2066, "step": 4977 }, { "epoch": 0.511661358265694, "grad_norm": 0.0910191684961319, "learning_rate": 0.01, "loss": 2.209, "step": 4980 }, { "epoch": 0.511969587999589, "grad_norm": 0.12366749346256256, "learning_rate": 0.01, "loss": 2.2263, "step": 4983 }, { "epoch": 0.512277817733484, "grad_norm": 0.11254429817199707, "learning_rate": 0.01, "loss": 2.175, "step": 4986 }, { "epoch": 0.512586047467379, "grad_norm": 0.11091643571853638, "learning_rate": 0.01, "loss": 2.2046, "step": 4989 }, { "epoch": 0.512894277201274, "grad_norm": 0.045611754059791565, "learning_rate": 0.01, "loss": 2.1861, "step": 4992 }, { "epoch": 0.513202506935169, "grad_norm": 0.09836157411336899, "learning_rate": 0.01, "loss": 2.1931, "step": 4995 }, { "epoch": 0.513510736669064, "grad_norm": 0.11932815611362457, "learning_rate": 0.01, "loss": 2.2087, "step": 4998 }, { "epoch": 0.513818966402959, "grad_norm": 0.10955359041690826, "learning_rate": 0.01, "loss": 2.19, "step": 5001 }, { "epoch": 0.514127196136854, "grad_norm": 0.09804633259773254, "learning_rate": 0.01, "loss": 2.1572, "step": 5004 }, { "epoch": 0.514435425870749, "grad_norm": 0.04835839942097664, "learning_rate": 0.01, "loss": 2.2115, "step": 5007 }, { "epoch": 0.514743655604644, "grad_norm": 0.04645110294222832, "learning_rate": 0.01, "loss": 2.2012, "step": 5010 }, { "epoch": 0.515051885338539, "grad_norm": 0.05947386845946312, "learning_rate": 0.01, "loss": 2.2039, "step": 5013 }, { "epoch": 0.515360115072434, "grad_norm": 0.05693971738219261, "learning_rate": 0.01, "loss": 2.1733, "step": 5016 }, { "epoch": 0.515668344806329, "grad_norm": 0.0724320039153099, "learning_rate": 0.01, "loss": 2.1944, "step": 5019 }, { "epoch": 0.5159765745402239, "grad_norm": 0.06627337634563446, "learning_rate": 0.01, "loss": 2.1932, "step": 5022 }, { "epoch": 0.5162848042741189, "grad_norm": 0.10879958420991898, "learning_rate": 0.01, "loss": 2.2024, "step": 5025 }, { "epoch": 0.5165930340080139, "grad_norm": 0.12266898900270462, "learning_rate": 0.01, "loss": 2.1938, "step": 5028 }, { "epoch": 0.5169012637419089, "grad_norm": 0.06240540370345116, "learning_rate": 0.01, "loss": 2.18, "step": 5031 }, { "epoch": 0.5172094934758039, "grad_norm": 0.05043266713619232, "learning_rate": 0.01, "loss": 2.1936, "step": 5034 }, { "epoch": 0.5175177232096989, "grad_norm": 0.052652738988399506, "learning_rate": 0.01, "loss": 2.1631, "step": 5037 }, { "epoch": 0.5178259529435939, "grad_norm": 0.04598904401063919, "learning_rate": 0.01, "loss": 2.2067, "step": 5040 }, { "epoch": 0.518134182677489, "grad_norm": 0.07040087133646011, "learning_rate": 0.01, "loss": 2.1737, "step": 5043 }, { "epoch": 0.518442412411384, "grad_norm": 0.04827702417969704, "learning_rate": 0.01, "loss": 2.2128, "step": 5046 }, { "epoch": 0.518750642145279, "grad_norm": 0.09803622215986252, "learning_rate": 0.01, "loss": 2.1851, "step": 5049 }, { "epoch": 0.519058871879174, "grad_norm": 0.1019926443696022, "learning_rate": 0.01, "loss": 2.181, "step": 5052 }, { "epoch": 0.519367101613069, "grad_norm": 0.08847504109144211, "learning_rate": 0.01, "loss": 2.1874, "step": 5055 }, { "epoch": 0.519675331346964, "grad_norm": 0.06151921674609184, "learning_rate": 0.01, "loss": 2.1785, "step": 5058 }, { "epoch": 0.519983561080859, "grad_norm": 0.04823022335767746, "learning_rate": 0.01, "loss": 2.1662, "step": 5061 }, { "epoch": 0.520291790814754, "grad_norm": 0.12454935908317566, "learning_rate": 0.01, "loss": 2.1864, "step": 5064 }, { "epoch": 0.520600020548649, "grad_norm": 0.0716002956032753, "learning_rate": 0.01, "loss": 2.1866, "step": 5067 }, { "epoch": 0.520908250282544, "grad_norm": 0.055079616606235504, "learning_rate": 0.01, "loss": 2.2137, "step": 5070 }, { "epoch": 0.5212164800164389, "grad_norm": 0.05969909206032753, "learning_rate": 0.01, "loss": 2.1972, "step": 5073 }, { "epoch": 0.5215247097503339, "grad_norm": 0.07373122125864029, "learning_rate": 0.01, "loss": 2.2312, "step": 5076 }, { "epoch": 0.5218329394842289, "grad_norm": 0.1899929642677307, "learning_rate": 0.01, "loss": 2.2141, "step": 5079 }, { "epoch": 0.5221411692181239, "grad_norm": 0.05221979692578316, "learning_rate": 0.01, "loss": 2.1899, "step": 5082 }, { "epoch": 0.5224493989520189, "grad_norm": 0.04537337273359299, "learning_rate": 0.01, "loss": 2.1571, "step": 5085 }, { "epoch": 0.5227576286859139, "grad_norm": 0.05490431934595108, "learning_rate": 0.01, "loss": 2.1705, "step": 5088 }, { "epoch": 0.5230658584198089, "grad_norm": 0.03813198208808899, "learning_rate": 0.01, "loss": 2.1773, "step": 5091 }, { "epoch": 0.5233740881537039, "grad_norm": 0.045411352068185806, "learning_rate": 0.01, "loss": 2.2065, "step": 5094 }, { "epoch": 0.5236823178875989, "grad_norm": 0.05433456227183342, "learning_rate": 0.01, "loss": 2.1901, "step": 5097 }, { "epoch": 0.5239905476214939, "grad_norm": 0.10771681368350983, "learning_rate": 0.01, "loss": 2.171, "step": 5100 }, { "epoch": 0.5242987773553889, "grad_norm": 0.06446761637926102, "learning_rate": 0.01, "loss": 2.2033, "step": 5103 }, { "epoch": 0.5246070070892839, "grad_norm": 0.06428392231464386, "learning_rate": 0.01, "loss": 2.1877, "step": 5106 }, { "epoch": 0.5249152368231789, "grad_norm": 0.0525304451584816, "learning_rate": 0.01, "loss": 2.206, "step": 5109 }, { "epoch": 0.5252234665570739, "grad_norm": 0.07332491129636765, "learning_rate": 0.01, "loss": 2.1992, "step": 5112 }, { "epoch": 0.5255316962909689, "grad_norm": 0.1738174557685852, "learning_rate": 0.01, "loss": 2.176, "step": 5115 }, { "epoch": 0.5258399260248638, "grad_norm": 0.08102334290742874, "learning_rate": 0.01, "loss": 2.2067, "step": 5118 }, { "epoch": 0.5261481557587588, "grad_norm": 0.06945500522851944, "learning_rate": 0.01, "loss": 2.206, "step": 5121 }, { "epoch": 0.5264563854926538, "grad_norm": 0.07017000019550323, "learning_rate": 0.01, "loss": 2.2002, "step": 5124 }, { "epoch": 0.5267646152265488, "grad_norm": 0.03883346915245056, "learning_rate": 0.01, "loss": 2.1608, "step": 5127 }, { "epoch": 0.5270728449604438, "grad_norm": 0.050974566489458084, "learning_rate": 0.01, "loss": 2.1973, "step": 5130 }, { "epoch": 0.5273810746943388, "grad_norm": 0.0665312334895134, "learning_rate": 0.01, "loss": 2.2064, "step": 5133 }, { "epoch": 0.5276893044282338, "grad_norm": 0.03946761414408684, "learning_rate": 0.01, "loss": 2.1794, "step": 5136 }, { "epoch": 0.5279975341621288, "grad_norm": 0.046160902827978134, "learning_rate": 0.01, "loss": 2.1919, "step": 5139 }, { "epoch": 0.5283057638960238, "grad_norm": 0.046186063438653946, "learning_rate": 0.01, "loss": 2.1687, "step": 5142 }, { "epoch": 0.5286139936299188, "grad_norm": 0.07073774188756943, "learning_rate": 0.01, "loss": 2.2154, "step": 5145 }, { "epoch": 0.5289222233638138, "grad_norm": 0.08319617807865143, "learning_rate": 0.01, "loss": 2.2104, "step": 5148 }, { "epoch": 0.5292304530977088, "grad_norm": 0.08551453799009323, "learning_rate": 0.01, "loss": 2.1743, "step": 5151 }, { "epoch": 0.5295386828316038, "grad_norm": 0.06613323837518692, "learning_rate": 0.01, "loss": 2.1774, "step": 5154 }, { "epoch": 0.5298469125654989, "grad_norm": 0.0601351298391819, "learning_rate": 0.01, "loss": 2.1831, "step": 5157 }, { "epoch": 0.5301551422993939, "grad_norm": 0.04963411018252373, "learning_rate": 0.01, "loss": 2.1915, "step": 5160 }, { "epoch": 0.5304633720332889, "grad_norm": 0.06755329668521881, "learning_rate": 0.01, "loss": 2.163, "step": 5163 }, { "epoch": 0.5307716017671839, "grad_norm": 0.04755258187651634, "learning_rate": 0.01, "loss": 2.1518, "step": 5166 }, { "epoch": 0.5310798315010788, "grad_norm": 0.15916316211223602, "learning_rate": 0.01, "loss": 2.1835, "step": 5169 }, { "epoch": 0.5313880612349738, "grad_norm": 0.0807122215628624, "learning_rate": 0.01, "loss": 2.2193, "step": 5172 }, { "epoch": 0.5316962909688688, "grad_norm": 0.05207689106464386, "learning_rate": 0.01, "loss": 2.1754, "step": 5175 }, { "epoch": 0.5320045207027638, "grad_norm": 0.045082803815603256, "learning_rate": 0.01, "loss": 2.1525, "step": 5178 }, { "epoch": 0.5323127504366588, "grad_norm": 0.07747700810432434, "learning_rate": 0.01, "loss": 2.1438, "step": 5181 }, { "epoch": 0.5326209801705538, "grad_norm": 0.13366450369358063, "learning_rate": 0.01, "loss": 2.1904, "step": 5184 }, { "epoch": 0.5329292099044488, "grad_norm": 0.06902889162302017, "learning_rate": 0.01, "loss": 2.1786, "step": 5187 }, { "epoch": 0.5332374396383438, "grad_norm": 0.04604712501168251, "learning_rate": 0.01, "loss": 2.1848, "step": 5190 }, { "epoch": 0.5335456693722388, "grad_norm": 0.08803047984838486, "learning_rate": 0.01, "loss": 2.1798, "step": 5193 }, { "epoch": 0.5338538991061338, "grad_norm": 0.08366485685110092, "learning_rate": 0.01, "loss": 2.2008, "step": 5196 }, { "epoch": 0.5341621288400288, "grad_norm": 0.06176333501935005, "learning_rate": 0.01, "loss": 2.1722, "step": 5199 }, { "epoch": 0.5344703585739238, "grad_norm": 0.0837249681353569, "learning_rate": 0.01, "loss": 2.1783, "step": 5202 }, { "epoch": 0.5347785883078188, "grad_norm": 0.1322035938501358, "learning_rate": 0.01, "loss": 2.1948, "step": 5205 }, { "epoch": 0.5350868180417138, "grad_norm": 0.11094444990158081, "learning_rate": 0.01, "loss": 2.1937, "step": 5208 }, { "epoch": 0.5353950477756088, "grad_norm": 0.05182232707738876, "learning_rate": 0.01, "loss": 2.1945, "step": 5211 }, { "epoch": 0.5357032775095038, "grad_norm": 0.08261944353580475, "learning_rate": 0.01, "loss": 2.1855, "step": 5214 }, { "epoch": 0.5360115072433987, "grad_norm": 0.11097295582294464, "learning_rate": 0.01, "loss": 2.1902, "step": 5217 }, { "epoch": 0.5363197369772937, "grad_norm": 0.05825675278902054, "learning_rate": 0.01, "loss": 2.1984, "step": 5220 }, { "epoch": 0.5366279667111887, "grad_norm": 0.11612821370363235, "learning_rate": 0.01, "loss": 2.1679, "step": 5223 }, { "epoch": 0.5369361964450837, "grad_norm": 0.09120064228773117, "learning_rate": 0.01, "loss": 2.1586, "step": 5226 }, { "epoch": 0.5372444261789787, "grad_norm": 0.055181995034217834, "learning_rate": 0.01, "loss": 2.2169, "step": 5229 }, { "epoch": 0.5375526559128737, "grad_norm": 0.055780068039894104, "learning_rate": 0.01, "loss": 2.1748, "step": 5232 }, { "epoch": 0.5378608856467687, "grad_norm": 0.06303024291992188, "learning_rate": 0.01, "loss": 2.1782, "step": 5235 }, { "epoch": 0.5381691153806637, "grad_norm": 0.10456321388483047, "learning_rate": 0.01, "loss": 2.1975, "step": 5238 }, { "epoch": 0.5384773451145587, "grad_norm": 0.054177962243556976, "learning_rate": 0.01, "loss": 2.1641, "step": 5241 }, { "epoch": 0.5387855748484537, "grad_norm": 0.06265738606452942, "learning_rate": 0.01, "loss": 2.183, "step": 5244 }, { "epoch": 0.5390938045823487, "grad_norm": 0.13720418512821198, "learning_rate": 0.01, "loss": 2.1698, "step": 5247 }, { "epoch": 0.5394020343162437, "grad_norm": 0.04917861148715019, "learning_rate": 0.01, "loss": 2.1692, "step": 5250 }, { "epoch": 0.5397102640501387, "grad_norm": 0.04919945448637009, "learning_rate": 0.01, "loss": 2.1652, "step": 5253 }, { "epoch": 0.5400184937840337, "grad_norm": 0.06462734192609787, "learning_rate": 0.01, "loss": 2.1987, "step": 5256 }, { "epoch": 0.5403267235179287, "grad_norm": 0.05275480076670647, "learning_rate": 0.01, "loss": 2.1955, "step": 5259 }, { "epoch": 0.5406349532518236, "grad_norm": 0.12235717475414276, "learning_rate": 0.01, "loss": 2.1937, "step": 5262 }, { "epoch": 0.5409431829857186, "grad_norm": 0.05300014466047287, "learning_rate": 0.01, "loss": 2.1589, "step": 5265 }, { "epoch": 0.5412514127196136, "grad_norm": 0.0429493710398674, "learning_rate": 0.01, "loss": 2.1618, "step": 5268 }, { "epoch": 0.5415596424535086, "grad_norm": 0.07041259855031967, "learning_rate": 0.01, "loss": 2.1661, "step": 5271 }, { "epoch": 0.5418678721874037, "grad_norm": 0.05304478853940964, "learning_rate": 0.01, "loss": 2.183, "step": 5274 }, { "epoch": 0.5421761019212987, "grad_norm": 0.12009457498788834, "learning_rate": 0.01, "loss": 2.1862, "step": 5277 }, { "epoch": 0.5424843316551937, "grad_norm": 0.11629784107208252, "learning_rate": 0.01, "loss": 2.1897, "step": 5280 }, { "epoch": 0.5427925613890887, "grad_norm": 0.07305426150560379, "learning_rate": 0.01, "loss": 2.1832, "step": 5283 }, { "epoch": 0.5431007911229837, "grad_norm": 0.0855623185634613, "learning_rate": 0.01, "loss": 2.1884, "step": 5286 }, { "epoch": 0.5434090208568787, "grad_norm": 0.04178578779101372, "learning_rate": 0.01, "loss": 2.1872, "step": 5289 }, { "epoch": 0.5437172505907737, "grad_norm": 0.05382310971617699, "learning_rate": 0.01, "loss": 2.1901, "step": 5292 }, { "epoch": 0.5440254803246687, "grad_norm": 0.10682760924100876, "learning_rate": 0.01, "loss": 2.1957, "step": 5295 }, { "epoch": 0.5443337100585637, "grad_norm": 0.15037471055984497, "learning_rate": 0.01, "loss": 2.2085, "step": 5298 }, { "epoch": 0.5446419397924587, "grad_norm": 0.08333491533994675, "learning_rate": 0.01, "loss": 2.1964, "step": 5301 }, { "epoch": 0.5449501695263537, "grad_norm": 0.08964785933494568, "learning_rate": 0.01, "loss": 2.1613, "step": 5304 }, { "epoch": 0.5452583992602487, "grad_norm": 0.06194687634706497, "learning_rate": 0.01, "loss": 2.1711, "step": 5307 }, { "epoch": 0.5455666289941437, "grad_norm": 0.047254305332899094, "learning_rate": 0.01, "loss": 2.1956, "step": 5310 }, { "epoch": 0.5458748587280386, "grad_norm": 0.052926719188690186, "learning_rate": 0.01, "loss": 2.1767, "step": 5313 }, { "epoch": 0.5461830884619336, "grad_norm": 0.08765383809804916, "learning_rate": 0.01, "loss": 2.1782, "step": 5316 }, { "epoch": 0.5464913181958286, "grad_norm": 0.0749160572886467, "learning_rate": 0.01, "loss": 2.1875, "step": 5319 }, { "epoch": 0.5467995479297236, "grad_norm": 0.09781020879745483, "learning_rate": 0.01, "loss": 2.1748, "step": 5322 }, { "epoch": 0.5471077776636186, "grad_norm": 0.04605260491371155, "learning_rate": 0.01, "loss": 2.145, "step": 5325 }, { "epoch": 0.5474160073975136, "grad_norm": 0.13507331907749176, "learning_rate": 0.01, "loss": 2.1769, "step": 5328 }, { "epoch": 0.5477242371314086, "grad_norm": 0.05028709024190903, "learning_rate": 0.01, "loss": 2.1925, "step": 5331 }, { "epoch": 0.5480324668653036, "grad_norm": 0.08754327893257141, "learning_rate": 0.01, "loss": 2.159, "step": 5334 }, { "epoch": 0.5483406965991986, "grad_norm": 0.10449190437793732, "learning_rate": 0.01, "loss": 2.1898, "step": 5337 }, { "epoch": 0.5486489263330936, "grad_norm": 0.10263057053089142, "learning_rate": 0.01, "loss": 2.1776, "step": 5340 }, { "epoch": 0.5489571560669886, "grad_norm": 0.0547097772359848, "learning_rate": 0.01, "loss": 2.1941, "step": 5343 }, { "epoch": 0.5492653858008836, "grad_norm": 0.06682941317558289, "learning_rate": 0.01, "loss": 2.1901, "step": 5346 }, { "epoch": 0.5495736155347786, "grad_norm": 0.06421027332544327, "learning_rate": 0.01, "loss": 2.1981, "step": 5349 }, { "epoch": 0.5498818452686736, "grad_norm": 0.041218411177396774, "learning_rate": 0.01, "loss": 2.1844, "step": 5352 }, { "epoch": 0.5501900750025686, "grad_norm": 0.042902372777462006, "learning_rate": 0.01, "loss": 2.1981, "step": 5355 }, { "epoch": 0.5504983047364636, "grad_norm": 0.05338321253657341, "learning_rate": 0.01, "loss": 2.168, "step": 5358 }, { "epoch": 0.5508065344703585, "grad_norm": 0.06692427396774292, "learning_rate": 0.01, "loss": 2.1891, "step": 5361 }, { "epoch": 0.5511147642042535, "grad_norm": 0.07927200943231583, "learning_rate": 0.01, "loss": 2.1853, "step": 5364 }, { "epoch": 0.5514229939381485, "grad_norm": 0.05655739828944206, "learning_rate": 0.01, "loss": 2.1838, "step": 5367 }, { "epoch": 0.5517312236720435, "grad_norm": 0.04488144442439079, "learning_rate": 0.01, "loss": 2.1754, "step": 5370 }, { "epoch": 0.5520394534059385, "grad_norm": 0.09253795444965363, "learning_rate": 0.01, "loss": 2.1742, "step": 5373 }, { "epoch": 0.5523476831398335, "grad_norm": 0.07396019250154495, "learning_rate": 0.01, "loss": 2.1582, "step": 5376 }, { "epoch": 0.5526559128737285, "grad_norm": 0.053663600236177444, "learning_rate": 0.01, "loss": 2.1508, "step": 5379 }, { "epoch": 0.5529641426076235, "grad_norm": 0.062076181173324585, "learning_rate": 0.01, "loss": 2.1772, "step": 5382 }, { "epoch": 0.5532723723415185, "grad_norm": 0.08481581509113312, "learning_rate": 0.01, "loss": 2.1836, "step": 5385 }, { "epoch": 0.5535806020754136, "grad_norm": 0.08981155604124069, "learning_rate": 0.01, "loss": 2.204, "step": 5388 }, { "epoch": 0.5538888318093086, "grad_norm": 0.10067261755466461, "learning_rate": 0.01, "loss": 2.1527, "step": 5391 }, { "epoch": 0.5541970615432036, "grad_norm": 0.06231047958135605, "learning_rate": 0.01, "loss": 2.194, "step": 5394 }, { "epoch": 0.5545052912770986, "grad_norm": 0.049111973494291306, "learning_rate": 0.01, "loss": 2.1889, "step": 5397 }, { "epoch": 0.5548135210109936, "grad_norm": 0.06446948647499084, "learning_rate": 0.01, "loss": 2.2103, "step": 5400 }, { "epoch": 0.5551217507448886, "grad_norm": 0.051946625113487244, "learning_rate": 0.01, "loss": 2.1977, "step": 5403 }, { "epoch": 0.5554299804787836, "grad_norm": 0.1369265466928482, "learning_rate": 0.01, "loss": 2.1771, "step": 5406 }, { "epoch": 0.5557382102126786, "grad_norm": 0.08489779382944107, "learning_rate": 0.01, "loss": 2.1782, "step": 5409 }, { "epoch": 0.5560464399465735, "grad_norm": 0.10673670470714569, "learning_rate": 0.01, "loss": 2.173, "step": 5412 }, { "epoch": 0.5563546696804685, "grad_norm": 0.055250637233257294, "learning_rate": 0.01, "loss": 2.1539, "step": 5415 }, { "epoch": 0.5566628994143635, "grad_norm": 0.05136672407388687, "learning_rate": 0.01, "loss": 2.2035, "step": 5418 }, { "epoch": 0.5569711291482585, "grad_norm": 0.040590591728687286, "learning_rate": 0.01, "loss": 2.1778, "step": 5421 }, { "epoch": 0.5572793588821535, "grad_norm": 0.048333633691072464, "learning_rate": 0.01, "loss": 2.191, "step": 5424 }, { "epoch": 0.5575875886160485, "grad_norm": 0.0582728311419487, "learning_rate": 0.01, "loss": 2.1734, "step": 5427 }, { "epoch": 0.5578958183499435, "grad_norm": 0.05272262915968895, "learning_rate": 0.01, "loss": 2.1714, "step": 5430 }, { "epoch": 0.5582040480838385, "grad_norm": 0.08472342789173126, "learning_rate": 0.01, "loss": 2.1624, "step": 5433 }, { "epoch": 0.5585122778177335, "grad_norm": 0.10869960486888885, "learning_rate": 0.01, "loss": 2.164, "step": 5436 }, { "epoch": 0.5588205075516285, "grad_norm": 0.0569114163517952, "learning_rate": 0.01, "loss": 2.1933, "step": 5439 }, { "epoch": 0.5591287372855235, "grad_norm": 0.14485467970371246, "learning_rate": 0.01, "loss": 2.1779, "step": 5442 }, { "epoch": 0.5594369670194185, "grad_norm": 0.08184878528118134, "learning_rate": 0.01, "loss": 2.1779, "step": 5445 }, { "epoch": 0.5597451967533135, "grad_norm": 0.06575775146484375, "learning_rate": 0.01, "loss": 2.136, "step": 5448 }, { "epoch": 0.5600534264872085, "grad_norm": 0.08628299832344055, "learning_rate": 0.01, "loss": 2.1696, "step": 5451 }, { "epoch": 0.5603616562211035, "grad_norm": 0.06078352406620979, "learning_rate": 0.01, "loss": 2.1865, "step": 5454 }, { "epoch": 0.5606698859549984, "grad_norm": 0.05207353085279465, "learning_rate": 0.01, "loss": 2.167, "step": 5457 }, { "epoch": 0.5609781156888934, "grad_norm": 0.059535857290029526, "learning_rate": 0.01, "loss": 2.1977, "step": 5460 }, { "epoch": 0.5612863454227884, "grad_norm": 0.05342729389667511, "learning_rate": 0.01, "loss": 2.1823, "step": 5463 }, { "epoch": 0.5615945751566834, "grad_norm": 0.04207632318139076, "learning_rate": 0.01, "loss": 2.1849, "step": 5466 }, { "epoch": 0.5619028048905784, "grad_norm": 0.1334255486726761, "learning_rate": 0.01, "loss": 2.1886, "step": 5469 }, { "epoch": 0.5622110346244734, "grad_norm": 0.06532323360443115, "learning_rate": 0.01, "loss": 2.1979, "step": 5472 }, { "epoch": 0.5625192643583684, "grad_norm": 0.0793483555316925, "learning_rate": 0.01, "loss": 2.188, "step": 5475 }, { "epoch": 0.5628274940922634, "grad_norm": 0.04637480154633522, "learning_rate": 0.01, "loss": 2.1562, "step": 5478 }, { "epoch": 0.5631357238261584, "grad_norm": 0.0482000894844532, "learning_rate": 0.01, "loss": 2.1587, "step": 5481 }, { "epoch": 0.5634439535600534, "grad_norm": 0.06253401190042496, "learning_rate": 0.01, "loss": 2.1978, "step": 5484 }, { "epoch": 0.5637521832939484, "grad_norm": 0.15622715651988983, "learning_rate": 0.01, "loss": 2.171, "step": 5487 }, { "epoch": 0.5640604130278434, "grad_norm": 0.10941077768802643, "learning_rate": 0.01, "loss": 2.1952, "step": 5490 }, { "epoch": 0.5643686427617384, "grad_norm": 0.08030713349580765, "learning_rate": 0.01, "loss": 2.1948, "step": 5493 }, { "epoch": 0.5646768724956334, "grad_norm": 0.13679014146327972, "learning_rate": 0.01, "loss": 2.1767, "step": 5496 }, { "epoch": 0.5649851022295284, "grad_norm": 0.04662426933646202, "learning_rate": 0.01, "loss": 2.1926, "step": 5499 }, { "epoch": 0.5652933319634234, "grad_norm": 0.05347858741879463, "learning_rate": 0.01, "loss": 2.1825, "step": 5502 }, { "epoch": 0.5656015616973185, "grad_norm": 0.06205238029360771, "learning_rate": 0.01, "loss": 2.1537, "step": 5505 }, { "epoch": 0.5659097914312134, "grad_norm": 0.05525955557823181, "learning_rate": 0.01, "loss": 2.1802, "step": 5508 }, { "epoch": 0.5662180211651084, "grad_norm": 0.055693045258522034, "learning_rate": 0.01, "loss": 2.1727, "step": 5511 }, { "epoch": 0.5665262508990034, "grad_norm": 0.051134396344423294, "learning_rate": 0.01, "loss": 2.1695, "step": 5514 }, { "epoch": 0.5668344806328984, "grad_norm": 0.05469521880149841, "learning_rate": 0.01, "loss": 2.1664, "step": 5517 }, { "epoch": 0.5671427103667934, "grad_norm": 0.039416272193193436, "learning_rate": 0.01, "loss": 2.1708, "step": 5520 }, { "epoch": 0.5674509401006884, "grad_norm": 0.10661659389734268, "learning_rate": 0.01, "loss": 2.1753, "step": 5523 }, { "epoch": 0.5677591698345834, "grad_norm": 0.07567829638719559, "learning_rate": 0.01, "loss": 2.1733, "step": 5526 }, { "epoch": 0.5680673995684784, "grad_norm": 0.06030309572815895, "learning_rate": 0.01, "loss": 2.1795, "step": 5529 }, { "epoch": 0.5683756293023734, "grad_norm": 0.07429811358451843, "learning_rate": 0.01, "loss": 2.1936, "step": 5532 }, { "epoch": 0.5686838590362684, "grad_norm": 0.08618849515914917, "learning_rate": 0.01, "loss": 2.2009, "step": 5535 }, { "epoch": 0.5689920887701634, "grad_norm": 0.04969833791255951, "learning_rate": 0.01, "loss": 2.1711, "step": 5538 }, { "epoch": 0.5693003185040584, "grad_norm": 0.11154712736606598, "learning_rate": 0.01, "loss": 2.1802, "step": 5541 }, { "epoch": 0.5696085482379534, "grad_norm": 0.07754155993461609, "learning_rate": 0.01, "loss": 2.164, "step": 5544 }, { "epoch": 0.5699167779718484, "grad_norm": 0.04600273445248604, "learning_rate": 0.01, "loss": 2.1918, "step": 5547 }, { "epoch": 0.5702250077057434, "grad_norm": 0.06788063049316406, "learning_rate": 0.01, "loss": 2.1477, "step": 5550 }, { "epoch": 0.5705332374396384, "grad_norm": 0.11349419504404068, "learning_rate": 0.01, "loss": 2.1603, "step": 5553 }, { "epoch": 0.5708414671735333, "grad_norm": 0.11178430914878845, "learning_rate": 0.01, "loss": 2.1439, "step": 5556 }, { "epoch": 0.5711496969074283, "grad_norm": 0.050257500261068344, "learning_rate": 0.01, "loss": 2.1851, "step": 5559 }, { "epoch": 0.5714579266413233, "grad_norm": 0.08327756822109222, "learning_rate": 0.01, "loss": 2.158, "step": 5562 }, { "epoch": 0.5717661563752183, "grad_norm": 0.06866388767957687, "learning_rate": 0.01, "loss": 2.1584, "step": 5565 }, { "epoch": 0.5720743861091133, "grad_norm": 0.1139674037694931, "learning_rate": 0.01, "loss": 2.1897, "step": 5568 }, { "epoch": 0.5723826158430083, "grad_norm": 0.07029612362384796, "learning_rate": 0.01, "loss": 2.1723, "step": 5571 }, { "epoch": 0.5726908455769033, "grad_norm": 0.10171212255954742, "learning_rate": 0.01, "loss": 2.1788, "step": 5574 }, { "epoch": 0.5729990753107983, "grad_norm": 0.11364202946424484, "learning_rate": 0.01, "loss": 2.1659, "step": 5577 }, { "epoch": 0.5733073050446933, "grad_norm": 0.08066857606172562, "learning_rate": 0.01, "loss": 2.1902, "step": 5580 }, { "epoch": 0.5736155347785883, "grad_norm": 0.09207342565059662, "learning_rate": 0.01, "loss": 2.1519, "step": 5583 }, { "epoch": 0.5739237645124833, "grad_norm": 0.06945987790822983, "learning_rate": 0.01, "loss": 2.1677, "step": 5586 }, { "epoch": 0.5742319942463783, "grad_norm": 0.05137445777654648, "learning_rate": 0.01, "loss": 2.1686, "step": 5589 }, { "epoch": 0.5745402239802733, "grad_norm": 0.10192268341779709, "learning_rate": 0.01, "loss": 2.1758, "step": 5592 }, { "epoch": 0.5748484537141683, "grad_norm": 0.056787896901369095, "learning_rate": 0.01, "loss": 2.1642, "step": 5595 }, { "epoch": 0.5751566834480633, "grad_norm": 0.07727455347776413, "learning_rate": 0.01, "loss": 2.1662, "step": 5598 }, { "epoch": 0.5754649131819582, "grad_norm": 0.1311456710100174, "learning_rate": 0.01, "loss": 2.1713, "step": 5601 }, { "epoch": 0.5757731429158532, "grad_norm": 0.1014258936047554, "learning_rate": 0.01, "loss": 2.1751, "step": 5604 }, { "epoch": 0.5760813726497482, "grad_norm": 0.06325560063123703, "learning_rate": 0.01, "loss": 2.1757, "step": 5607 }, { "epoch": 0.5763896023836432, "grad_norm": 0.07262448221445084, "learning_rate": 0.01, "loss": 2.1575, "step": 5610 }, { "epoch": 0.5766978321175382, "grad_norm": 0.07160039991140366, "learning_rate": 0.01, "loss": 2.1706, "step": 5613 }, { "epoch": 0.5770060618514332, "grad_norm": 0.050024017691612244, "learning_rate": 0.01, "loss": 2.1811, "step": 5616 }, { "epoch": 0.5773142915853282, "grad_norm": 0.09685138612985611, "learning_rate": 0.01, "loss": 2.1549, "step": 5619 }, { "epoch": 0.5776225213192233, "grad_norm": 0.058329988270998, "learning_rate": 0.01, "loss": 2.1813, "step": 5622 }, { "epoch": 0.5779307510531183, "grad_norm": 0.06637705117464066, "learning_rate": 0.01, "loss": 2.1717, "step": 5625 }, { "epoch": 0.5782389807870133, "grad_norm": 0.0906175896525383, "learning_rate": 0.01, "loss": 2.1677, "step": 5628 }, { "epoch": 0.5785472105209083, "grad_norm": 0.06751519441604614, "learning_rate": 0.01, "loss": 2.1584, "step": 5631 }, { "epoch": 0.5788554402548033, "grad_norm": 0.04437318444252014, "learning_rate": 0.01, "loss": 2.2013, "step": 5634 }, { "epoch": 0.5791636699886983, "grad_norm": 0.04365368187427521, "learning_rate": 0.01, "loss": 2.1746, "step": 5637 }, { "epoch": 0.5794718997225933, "grad_norm": 0.04844829812645912, "learning_rate": 0.01, "loss": 2.1818, "step": 5640 }, { "epoch": 0.5797801294564883, "grad_norm": 0.04154437035322189, "learning_rate": 0.01, "loss": 2.1536, "step": 5643 }, { "epoch": 0.5800883591903833, "grad_norm": 0.07691098004579544, "learning_rate": 0.01, "loss": 2.1883, "step": 5646 }, { "epoch": 0.5803965889242783, "grad_norm": 0.07065980136394501, "learning_rate": 0.01, "loss": 2.154, "step": 5649 }, { "epoch": 0.5807048186581732, "grad_norm": 0.1051129475235939, "learning_rate": 0.01, "loss": 2.1447, "step": 5652 }, { "epoch": 0.5810130483920682, "grad_norm": 0.10921964794397354, "learning_rate": 0.01, "loss": 2.1777, "step": 5655 }, { "epoch": 0.5813212781259632, "grad_norm": 0.1120898649096489, "learning_rate": 0.01, "loss": 2.1617, "step": 5658 }, { "epoch": 0.5816295078598582, "grad_norm": 0.09593590348958969, "learning_rate": 0.01, "loss": 2.1457, "step": 5661 }, { "epoch": 0.5819377375937532, "grad_norm": 0.054108936339616776, "learning_rate": 0.01, "loss": 2.1793, "step": 5664 }, { "epoch": 0.5822459673276482, "grad_norm": 0.07890141755342484, "learning_rate": 0.01, "loss": 2.1676, "step": 5667 }, { "epoch": 0.5825541970615432, "grad_norm": 0.07864063233137131, "learning_rate": 0.01, "loss": 2.1766, "step": 5670 }, { "epoch": 0.5828624267954382, "grad_norm": 0.08160068094730377, "learning_rate": 0.01, "loss": 2.166, "step": 5673 }, { "epoch": 0.5831706565293332, "grad_norm": 0.08126121759414673, "learning_rate": 0.01, "loss": 2.1691, "step": 5676 }, { "epoch": 0.5834788862632282, "grad_norm": 0.05922897160053253, "learning_rate": 0.01, "loss": 2.1854, "step": 5679 }, { "epoch": 0.5837871159971232, "grad_norm": 0.1024496778845787, "learning_rate": 0.01, "loss": 2.1818, "step": 5682 }, { "epoch": 0.5840953457310182, "grad_norm": 0.08880037069320679, "learning_rate": 0.01, "loss": 2.2054, "step": 5685 }, { "epoch": 0.5844035754649132, "grad_norm": 0.04404953494668007, "learning_rate": 0.01, "loss": 2.1524, "step": 5688 }, { "epoch": 0.5847118051988082, "grad_norm": 0.05817687511444092, "learning_rate": 0.01, "loss": 2.1813, "step": 5691 }, { "epoch": 0.5850200349327032, "grad_norm": 0.047581762075424194, "learning_rate": 0.01, "loss": 2.1545, "step": 5694 }, { "epoch": 0.5853282646665982, "grad_norm": 0.11034911125898361, "learning_rate": 0.01, "loss": 2.1803, "step": 5697 }, { "epoch": 0.5856364944004931, "grad_norm": 0.05118945613503456, "learning_rate": 0.01, "loss": 2.1314, "step": 5700 }, { "epoch": 0.5859447241343881, "grad_norm": 0.048316795378923416, "learning_rate": 0.01, "loss": 2.1711, "step": 5703 }, { "epoch": 0.5862529538682831, "grad_norm": 0.12578584253787994, "learning_rate": 0.01, "loss": 2.1636, "step": 5706 }, { "epoch": 0.5865611836021781, "grad_norm": 0.06594375520944595, "learning_rate": 0.01, "loss": 2.1977, "step": 5709 }, { "epoch": 0.5868694133360731, "grad_norm": 0.060622964054346085, "learning_rate": 0.01, "loss": 2.1408, "step": 5712 }, { "epoch": 0.5871776430699681, "grad_norm": 0.10055366903543472, "learning_rate": 0.01, "loss": 2.1999, "step": 5715 }, { "epoch": 0.5874858728038631, "grad_norm": 0.10235504060983658, "learning_rate": 0.01, "loss": 2.1337, "step": 5718 }, { "epoch": 0.5877941025377581, "grad_norm": 0.07707791030406952, "learning_rate": 0.01, "loss": 2.1387, "step": 5721 }, { "epoch": 0.5881023322716531, "grad_norm": 0.05508594587445259, "learning_rate": 0.01, "loss": 2.1494, "step": 5724 }, { "epoch": 0.5884105620055481, "grad_norm": 0.06580860912799835, "learning_rate": 0.01, "loss": 2.1598, "step": 5727 }, { "epoch": 0.5887187917394431, "grad_norm": 0.07102775573730469, "learning_rate": 0.01, "loss": 2.1618, "step": 5730 }, { "epoch": 0.5890270214733381, "grad_norm": 0.06750554591417313, "learning_rate": 0.01, "loss": 2.1782, "step": 5733 }, { "epoch": 0.5893352512072332, "grad_norm": 0.07100195437669754, "learning_rate": 0.01, "loss": 2.1456, "step": 5736 }, { "epoch": 0.5896434809411282, "grad_norm": 0.10585575550794601, "learning_rate": 0.01, "loss": 2.1751, "step": 5739 }, { "epoch": 0.5899517106750232, "grad_norm": 0.055082373321056366, "learning_rate": 0.01, "loss": 2.1808, "step": 5742 }, { "epoch": 0.5902599404089182, "grad_norm": 0.06285014003515244, "learning_rate": 0.01, "loss": 2.1588, "step": 5745 }, { "epoch": 0.5905681701428132, "grad_norm": 0.13328112661838531, "learning_rate": 0.01, "loss": 2.177, "step": 5748 }, { "epoch": 0.5908763998767081, "grad_norm": 0.08568006008863449, "learning_rate": 0.01, "loss": 2.1559, "step": 5751 }, { "epoch": 0.5911846296106031, "grad_norm": 0.07850711792707443, "learning_rate": 0.01, "loss": 2.2047, "step": 5754 }, { "epoch": 0.5914928593444981, "grad_norm": 0.07706760615110397, "learning_rate": 0.01, "loss": 2.1602, "step": 5757 }, { "epoch": 0.5918010890783931, "grad_norm": 0.07512292265892029, "learning_rate": 0.01, "loss": 2.1871, "step": 5760 }, { "epoch": 0.5921093188122881, "grad_norm": 0.059620197862386703, "learning_rate": 0.01, "loss": 2.1484, "step": 5763 }, { "epoch": 0.5924175485461831, "grad_norm": 0.04021789878606796, "learning_rate": 0.01, "loss": 2.1651, "step": 5766 }, { "epoch": 0.5927257782800781, "grad_norm": 0.050683967769145966, "learning_rate": 0.01, "loss": 2.1693, "step": 5769 }, { "epoch": 0.5930340080139731, "grad_norm": 0.07091210782527924, "learning_rate": 0.01, "loss": 2.1851, "step": 5772 }, { "epoch": 0.5933422377478681, "grad_norm": 0.09877889603376389, "learning_rate": 0.01, "loss": 2.1642, "step": 5775 }, { "epoch": 0.5936504674817631, "grad_norm": 0.08692251145839691, "learning_rate": 0.01, "loss": 2.1755, "step": 5778 }, { "epoch": 0.5939586972156581, "grad_norm": 0.06255677342414856, "learning_rate": 0.01, "loss": 2.1634, "step": 5781 }, { "epoch": 0.5942669269495531, "grad_norm": 0.05615478754043579, "learning_rate": 0.01, "loss": 2.1909, "step": 5784 }, { "epoch": 0.5945751566834481, "grad_norm": 0.04576956480741501, "learning_rate": 0.01, "loss": 2.1519, "step": 5787 }, { "epoch": 0.5948833864173431, "grad_norm": 0.044911667704582214, "learning_rate": 0.01, "loss": 2.1697, "step": 5790 }, { "epoch": 0.5951916161512381, "grad_norm": 0.07787128537893295, "learning_rate": 0.01, "loss": 2.1611, "step": 5793 }, { "epoch": 0.595499845885133, "grad_norm": 0.06199866533279419, "learning_rate": 0.01, "loss": 2.1576, "step": 5796 }, { "epoch": 0.595808075619028, "grad_norm": 0.07048948854207993, "learning_rate": 0.01, "loss": 2.1721, "step": 5799 }, { "epoch": 0.596116305352923, "grad_norm": 0.1173306256532669, "learning_rate": 0.01, "loss": 2.1573, "step": 5802 }, { "epoch": 0.596424535086818, "grad_norm": 0.06866045296192169, "learning_rate": 0.01, "loss": 2.1606, "step": 5805 }, { "epoch": 0.596732764820713, "grad_norm": 0.06821485608816147, "learning_rate": 0.01, "loss": 2.1842, "step": 5808 }, { "epoch": 0.597040994554608, "grad_norm": 0.09566816687583923, "learning_rate": 0.01, "loss": 2.1569, "step": 5811 }, { "epoch": 0.597349224288503, "grad_norm": 0.1130233108997345, "learning_rate": 0.01, "loss": 2.1649, "step": 5814 }, { "epoch": 0.597657454022398, "grad_norm": 0.07310149073600769, "learning_rate": 0.01, "loss": 2.1798, "step": 5817 }, { "epoch": 0.597965683756293, "grad_norm": 0.04523763060569763, "learning_rate": 0.01, "loss": 2.1515, "step": 5820 }, { "epoch": 0.598273913490188, "grad_norm": 0.05843660235404968, "learning_rate": 0.01, "loss": 2.1403, "step": 5823 }, { "epoch": 0.598582143224083, "grad_norm": 0.03981595113873482, "learning_rate": 0.01, "loss": 2.1598, "step": 5826 }, { "epoch": 0.598890372957978, "grad_norm": 0.057108644396066666, "learning_rate": 0.01, "loss": 2.1619, "step": 5829 }, { "epoch": 0.599198602691873, "grad_norm": 0.12298591434955597, "learning_rate": 0.01, "loss": 2.125, "step": 5832 }, { "epoch": 0.599506832425768, "grad_norm": 0.06120186299085617, "learning_rate": 0.01, "loss": 2.181, "step": 5835 }, { "epoch": 0.599815062159663, "grad_norm": 0.05780164897441864, "learning_rate": 0.01, "loss": 2.1555, "step": 5838 }, { "epoch": 0.600123291893558, "grad_norm": 0.0962534248828888, "learning_rate": 0.01, "loss": 2.1626, "step": 5841 }, { "epoch": 0.600431521627453, "grad_norm": 0.07417720556259155, "learning_rate": 0.01, "loss": 2.1996, "step": 5844 }, { "epoch": 0.6007397513613479, "grad_norm": 0.08221522718667984, "learning_rate": 0.01, "loss": 2.1562, "step": 5847 }, { "epoch": 0.6010479810952429, "grad_norm": 0.11511900275945663, "learning_rate": 0.01, "loss": 2.196, "step": 5850 }, { "epoch": 0.601356210829138, "grad_norm": 0.1331305354833603, "learning_rate": 0.01, "loss": 2.1649, "step": 5853 }, { "epoch": 0.601664440563033, "grad_norm": 0.07239941507577896, "learning_rate": 0.01, "loss": 2.1695, "step": 5856 }, { "epoch": 0.601972670296928, "grad_norm": 0.05865192040801048, "learning_rate": 0.01, "loss": 2.1808, "step": 5859 }, { "epoch": 0.602280900030823, "grad_norm": 0.047268107533454895, "learning_rate": 0.01, "loss": 2.176, "step": 5862 }, { "epoch": 0.602589129764718, "grad_norm": 0.046770863234996796, "learning_rate": 0.01, "loss": 2.1689, "step": 5865 }, { "epoch": 0.602897359498613, "grad_norm": 0.04817832633852959, "learning_rate": 0.01, "loss": 2.1566, "step": 5868 }, { "epoch": 0.603205589232508, "grad_norm": 0.05692889541387558, "learning_rate": 0.01, "loss": 2.1564, "step": 5871 }, { "epoch": 0.603513818966403, "grad_norm": 0.056694116443395615, "learning_rate": 0.01, "loss": 2.1591, "step": 5874 }, { "epoch": 0.603822048700298, "grad_norm": 0.08296339213848114, "learning_rate": 0.01, "loss": 2.1695, "step": 5877 }, { "epoch": 0.604130278434193, "grad_norm": 0.0934629738330841, "learning_rate": 0.01, "loss": 2.1472, "step": 5880 }, { "epoch": 0.604438508168088, "grad_norm": 0.10192359238862991, "learning_rate": 0.01, "loss": 2.1441, "step": 5883 }, { "epoch": 0.604746737901983, "grad_norm": 0.04818946123123169, "learning_rate": 0.01, "loss": 2.1747, "step": 5886 }, { "epoch": 0.605054967635878, "grad_norm": 0.10131523758172989, "learning_rate": 0.01, "loss": 2.1546, "step": 5889 }, { "epoch": 0.605363197369773, "grad_norm": 0.07115977257490158, "learning_rate": 0.01, "loss": 2.1597, "step": 5892 }, { "epoch": 0.605671427103668, "grad_norm": 0.03929082304239273, "learning_rate": 0.01, "loss": 2.171, "step": 5895 }, { "epoch": 0.6059796568375629, "grad_norm": 0.04109720513224602, "learning_rate": 0.01, "loss": 2.134, "step": 5898 }, { "epoch": 0.6062878865714579, "grad_norm": 0.05026080831885338, "learning_rate": 0.01, "loss": 2.1491, "step": 5901 }, { "epoch": 0.6065961163053529, "grad_norm": 0.08281126618385315, "learning_rate": 0.01, "loss": 2.1732, "step": 5904 }, { "epoch": 0.6069043460392479, "grad_norm": 0.04994012042880058, "learning_rate": 0.01, "loss": 2.1664, "step": 5907 }, { "epoch": 0.6072125757731429, "grad_norm": 0.06299131363630295, "learning_rate": 0.01, "loss": 2.1669, "step": 5910 }, { "epoch": 0.6075208055070379, "grad_norm": 0.059428080916404724, "learning_rate": 0.01, "loss": 2.1731, "step": 5913 }, { "epoch": 0.6078290352409329, "grad_norm": 0.07036252319812775, "learning_rate": 0.01, "loss": 2.1787, "step": 5916 }, { "epoch": 0.6081372649748279, "grad_norm": 0.04721888527274132, "learning_rate": 0.01, "loss": 2.1531, "step": 5919 }, { "epoch": 0.6084454947087229, "grad_norm": 0.06953759491443634, "learning_rate": 0.01, "loss": 2.1573, "step": 5922 }, { "epoch": 0.6087537244426179, "grad_norm": 0.11679168790578842, "learning_rate": 0.01, "loss": 2.155, "step": 5925 }, { "epoch": 0.6090619541765129, "grad_norm": 0.09196575731039047, "learning_rate": 0.01, "loss": 2.1574, "step": 5928 }, { "epoch": 0.6093701839104079, "grad_norm": 0.05219469591975212, "learning_rate": 0.01, "loss": 2.1605, "step": 5931 }, { "epoch": 0.6096784136443029, "grad_norm": 0.09352759271860123, "learning_rate": 0.01, "loss": 2.1456, "step": 5934 }, { "epoch": 0.6099866433781979, "grad_norm": 0.07393237948417664, "learning_rate": 0.01, "loss": 2.1611, "step": 5937 }, { "epoch": 0.6102948731120929, "grad_norm": 0.06727741658687592, "learning_rate": 0.01, "loss": 2.1599, "step": 5940 }, { "epoch": 0.6106031028459878, "grad_norm": 0.09024669975042343, "learning_rate": 0.01, "loss": 2.1621, "step": 5943 }, { "epoch": 0.6109113325798828, "grad_norm": 0.04514656960964203, "learning_rate": 0.01, "loss": 2.1809, "step": 5946 }, { "epoch": 0.6112195623137778, "grad_norm": 0.04011565446853638, "learning_rate": 0.01, "loss": 2.1715, "step": 5949 }, { "epoch": 0.6115277920476728, "grad_norm": 0.04640655592083931, "learning_rate": 0.01, "loss": 2.15, "step": 5952 }, { "epoch": 0.6118360217815678, "grad_norm": 0.0471080057322979, "learning_rate": 0.01, "loss": 2.1805, "step": 5955 }, { "epoch": 0.6121442515154628, "grad_norm": 0.17398513853549957, "learning_rate": 0.01, "loss": 2.1497, "step": 5958 }, { "epoch": 0.6124524812493578, "grad_norm": 0.06299551576375961, "learning_rate": 0.01, "loss": 2.1387, "step": 5961 }, { "epoch": 0.6127607109832528, "grad_norm": 0.07517322897911072, "learning_rate": 0.01, "loss": 2.1348, "step": 5964 }, { "epoch": 0.6130689407171478, "grad_norm": 0.050515878945589066, "learning_rate": 0.01, "loss": 2.1725, "step": 5967 }, { "epoch": 0.6133771704510429, "grad_norm": 0.04682675376534462, "learning_rate": 0.01, "loss": 2.1759, "step": 5970 }, { "epoch": 0.6136854001849379, "grad_norm": 0.05297816917300224, "learning_rate": 0.01, "loss": 2.1491, "step": 5973 }, { "epoch": 0.6139936299188329, "grad_norm": 0.07467235624790192, "learning_rate": 0.01, "loss": 2.1556, "step": 5976 }, { "epoch": 0.6143018596527279, "grad_norm": 0.06621374934911728, "learning_rate": 0.01, "loss": 2.1498, "step": 5979 }, { "epoch": 0.6146100893866229, "grad_norm": 0.0538405105471611, "learning_rate": 0.01, "loss": 2.1694, "step": 5982 }, { "epoch": 0.6149183191205179, "grad_norm": 0.09891212731599808, "learning_rate": 0.01, "loss": 2.1598, "step": 5985 }, { "epoch": 0.6152265488544129, "grad_norm": 0.042064208537340164, "learning_rate": 0.01, "loss": 2.1375, "step": 5988 }, { "epoch": 0.6155347785883079, "grad_norm": 0.06750064343214035, "learning_rate": 0.01, "loss": 2.1371, "step": 5991 }, { "epoch": 0.6158430083222028, "grad_norm": 0.0626809298992157, "learning_rate": 0.01, "loss": 2.1455, "step": 5994 }, { "epoch": 0.6161512380560978, "grad_norm": 0.04291335120797157, "learning_rate": 0.01, "loss": 2.1397, "step": 5997 }, { "epoch": 0.6164594677899928, "grad_norm": 0.05945251137018204, "learning_rate": 0.01, "loss": 2.1346, "step": 6000 }, { "epoch": 0.6167676975238878, "grad_norm": 0.15699933469295502, "learning_rate": 0.01, "loss": 2.1384, "step": 6003 }, { "epoch": 0.6170759272577828, "grad_norm": 0.06863987445831299, "learning_rate": 0.01, "loss": 2.1401, "step": 6006 }, { "epoch": 0.6173841569916778, "grad_norm": 0.04850529506802559, "learning_rate": 0.01, "loss": 2.1637, "step": 6009 }, { "epoch": 0.6176923867255728, "grad_norm": 0.05660491809248924, "learning_rate": 0.01, "loss": 2.1721, "step": 6012 }, { "epoch": 0.6180006164594678, "grad_norm": 0.050568364560604095, "learning_rate": 0.01, "loss": 2.1676, "step": 6015 }, { "epoch": 0.6183088461933628, "grad_norm": 0.060765717178583145, "learning_rate": 0.01, "loss": 2.127, "step": 6018 }, { "epoch": 0.6186170759272578, "grad_norm": 0.0731448233127594, "learning_rate": 0.01, "loss": 2.1531, "step": 6021 }, { "epoch": 0.6189253056611528, "grad_norm": 0.055431608110666275, "learning_rate": 0.01, "loss": 2.1662, "step": 6024 }, { "epoch": 0.6192335353950478, "grad_norm": 0.05376220867037773, "learning_rate": 0.01, "loss": 2.1465, "step": 6027 }, { "epoch": 0.6195417651289428, "grad_norm": 0.09729186445474625, "learning_rate": 0.01, "loss": 2.161, "step": 6030 }, { "epoch": 0.6198499948628378, "grad_norm": 0.08046093583106995, "learning_rate": 0.01, "loss": 2.1435, "step": 6033 }, { "epoch": 0.6201582245967328, "grad_norm": 0.09514495730400085, "learning_rate": 0.01, "loss": 2.1511, "step": 6036 }, { "epoch": 0.6204664543306277, "grad_norm": 0.056993287056684494, "learning_rate": 0.01, "loss": 2.1439, "step": 6039 }, { "epoch": 0.6207746840645227, "grad_norm": 0.06429582834243774, "learning_rate": 0.01, "loss": 2.1393, "step": 6042 }, { "epoch": 0.6210829137984177, "grad_norm": 0.1299380660057068, "learning_rate": 0.01, "loss": 2.1831, "step": 6045 }, { "epoch": 0.6213911435323127, "grad_norm": 0.13815906643867493, "learning_rate": 0.01, "loss": 2.1645, "step": 6048 }, { "epoch": 0.6216993732662077, "grad_norm": 0.056314874440431595, "learning_rate": 0.01, "loss": 2.1417, "step": 6051 }, { "epoch": 0.6220076030001027, "grad_norm": 0.06146218627691269, "learning_rate": 0.01, "loss": 2.1418, "step": 6054 }, { "epoch": 0.6223158327339977, "grad_norm": 0.062167149037122726, "learning_rate": 0.01, "loss": 2.1778, "step": 6057 }, { "epoch": 0.6226240624678927, "grad_norm": 0.059581879526376724, "learning_rate": 0.01, "loss": 2.1725, "step": 6060 }, { "epoch": 0.6229322922017877, "grad_norm": 0.044389910995960236, "learning_rate": 0.01, "loss": 2.1553, "step": 6063 }, { "epoch": 0.6232405219356827, "grad_norm": 0.036525238305330276, "learning_rate": 0.01, "loss": 2.1545, "step": 6066 }, { "epoch": 0.6235487516695777, "grad_norm": 0.0995573177933693, "learning_rate": 0.01, "loss": 2.1566, "step": 6069 }, { "epoch": 0.6238569814034727, "grad_norm": 0.10412520170211792, "learning_rate": 0.01, "loss": 2.1525, "step": 6072 }, { "epoch": 0.6241652111373677, "grad_norm": 0.10417335480451584, "learning_rate": 0.01, "loss": 2.1535, "step": 6075 }, { "epoch": 0.6244734408712627, "grad_norm": 0.09024351090192795, "learning_rate": 0.01, "loss": 2.1551, "step": 6078 }, { "epoch": 0.6247816706051577, "grad_norm": 0.04889573156833649, "learning_rate": 0.01, "loss": 2.1549, "step": 6081 }, { "epoch": 0.6250899003390528, "grad_norm": 0.05154373124241829, "learning_rate": 0.01, "loss": 2.1461, "step": 6084 }, { "epoch": 0.6253981300729478, "grad_norm": 0.04337237402796745, "learning_rate": 0.01, "loss": 2.1733, "step": 6087 }, { "epoch": 0.6257063598068427, "grad_norm": 0.06173473224043846, "learning_rate": 0.01, "loss": 2.1657, "step": 6090 }, { "epoch": 0.6260145895407377, "grad_norm": 0.06174352392554283, "learning_rate": 0.01, "loss": 2.1528, "step": 6093 }, { "epoch": 0.6263228192746327, "grad_norm": 0.07301110029220581, "learning_rate": 0.01, "loss": 2.1489, "step": 6096 }, { "epoch": 0.6266310490085277, "grad_norm": 0.04265190288424492, "learning_rate": 0.01, "loss": 2.1624, "step": 6099 }, { "epoch": 0.6269392787424227, "grad_norm": 0.056723251938819885, "learning_rate": 0.01, "loss": 2.1624, "step": 6102 }, { "epoch": 0.6272475084763177, "grad_norm": 0.06809309124946594, "learning_rate": 0.01, "loss": 2.1525, "step": 6105 }, { "epoch": 0.6275557382102127, "grad_norm": 0.06820474565029144, "learning_rate": 0.01, "loss": 2.1472, "step": 6108 }, { "epoch": 0.6278639679441077, "grad_norm": 0.05961904302239418, "learning_rate": 0.01, "loss": 2.1561, "step": 6111 }, { "epoch": 0.6281721976780027, "grad_norm": 0.04617665335536003, "learning_rate": 0.01, "loss": 2.1475, "step": 6114 }, { "epoch": 0.6284804274118977, "grad_norm": 0.040670618414878845, "learning_rate": 0.01, "loss": 2.153, "step": 6117 }, { "epoch": 0.6287886571457927, "grad_norm": 0.09909021109342575, "learning_rate": 0.01, "loss": 2.141, "step": 6120 }, { "epoch": 0.6290968868796877, "grad_norm": 0.04966261237859726, "learning_rate": 0.01, "loss": 2.1264, "step": 6123 }, { "epoch": 0.6294051166135827, "grad_norm": 0.0570046491920948, "learning_rate": 0.01, "loss": 2.1572, "step": 6126 }, { "epoch": 0.6297133463474777, "grad_norm": 0.10374405980110168, "learning_rate": 0.01, "loss": 2.149, "step": 6129 }, { "epoch": 0.6300215760813727, "grad_norm": 0.061325542628765106, "learning_rate": 0.01, "loss": 2.1521, "step": 6132 }, { "epoch": 0.6303298058152677, "grad_norm": 0.16151310503482819, "learning_rate": 0.01, "loss": 2.1825, "step": 6135 }, { "epoch": 0.6306380355491626, "grad_norm": 0.0921199768781662, "learning_rate": 0.01, "loss": 2.1773, "step": 6138 }, { "epoch": 0.6309462652830576, "grad_norm": 0.05603238567709923, "learning_rate": 0.01, "loss": 2.1452, "step": 6141 }, { "epoch": 0.6312544950169526, "grad_norm": 0.12173126637935638, "learning_rate": 0.01, "loss": 2.1713, "step": 6144 }, { "epoch": 0.6315627247508476, "grad_norm": 0.04609265923500061, "learning_rate": 0.01, "loss": 2.1518, "step": 6147 }, { "epoch": 0.6318709544847426, "grad_norm": 0.06445127725601196, "learning_rate": 0.01, "loss": 2.162, "step": 6150 }, { "epoch": 0.6321791842186376, "grad_norm": 0.05396106466650963, "learning_rate": 0.01, "loss": 2.1248, "step": 6153 }, { "epoch": 0.6324874139525326, "grad_norm": 0.06955734640359879, "learning_rate": 0.01, "loss": 2.1497, "step": 6156 }, { "epoch": 0.6327956436864276, "grad_norm": 0.04371445253491402, "learning_rate": 0.01, "loss": 2.1167, "step": 6159 }, { "epoch": 0.6331038734203226, "grad_norm": 0.07146921008825302, "learning_rate": 0.01, "loss": 2.1633, "step": 6162 }, { "epoch": 0.6334121031542176, "grad_norm": 0.08056561648845673, "learning_rate": 0.01, "loss": 2.1506, "step": 6165 }, { "epoch": 0.6337203328881126, "grad_norm": 0.08875605463981628, "learning_rate": 0.01, "loss": 2.1834, "step": 6168 }, { "epoch": 0.6340285626220076, "grad_norm": 0.05090434104204178, "learning_rate": 0.01, "loss": 2.1514, "step": 6171 }, { "epoch": 0.6343367923559026, "grad_norm": 0.11710961163043976, "learning_rate": 0.01, "loss": 2.1589, "step": 6174 }, { "epoch": 0.6346450220897976, "grad_norm": 0.04704523831605911, "learning_rate": 0.01, "loss": 2.1469, "step": 6177 }, { "epoch": 0.6349532518236926, "grad_norm": 0.045143596827983856, "learning_rate": 0.01, "loss": 2.1311, "step": 6180 }, { "epoch": 0.6352614815575875, "grad_norm": 0.04246919974684715, "learning_rate": 0.01, "loss": 2.1481, "step": 6183 }, { "epoch": 0.6355697112914825, "grad_norm": 0.04303867742419243, "learning_rate": 0.01, "loss": 2.1557, "step": 6186 }, { "epoch": 0.6358779410253775, "grad_norm": 0.17376503348350525, "learning_rate": 0.01, "loss": 2.1616, "step": 6189 }, { "epoch": 0.6361861707592725, "grad_norm": 0.11983154714107513, "learning_rate": 0.01, "loss": 2.1569, "step": 6192 }, { "epoch": 0.6364944004931675, "grad_norm": 0.0443497858941555, "learning_rate": 0.01, "loss": 2.1454, "step": 6195 }, { "epoch": 0.6368026302270625, "grad_norm": 0.04241250827908516, "learning_rate": 0.01, "loss": 2.1409, "step": 6198 }, { "epoch": 0.6371108599609576, "grad_norm": 0.07058902829885483, "learning_rate": 0.01, "loss": 2.1246, "step": 6201 }, { "epoch": 0.6374190896948526, "grad_norm": 0.060852985829114914, "learning_rate": 0.01, "loss": 2.1512, "step": 6204 }, { "epoch": 0.6377273194287476, "grad_norm": 0.058703117072582245, "learning_rate": 0.01, "loss": 2.1114, "step": 6207 }, { "epoch": 0.6380355491626426, "grad_norm": 0.08501632511615753, "learning_rate": 0.01, "loss": 2.1818, "step": 6210 }, { "epoch": 0.6383437788965376, "grad_norm": 0.07715412974357605, "learning_rate": 0.01, "loss": 2.1661, "step": 6213 }, { "epoch": 0.6386520086304326, "grad_norm": 0.06822165101766586, "learning_rate": 0.01, "loss": 2.1652, "step": 6216 }, { "epoch": 0.6389602383643276, "grad_norm": 0.048459213227033615, "learning_rate": 0.01, "loss": 2.1311, "step": 6219 }, { "epoch": 0.6392684680982226, "grad_norm": 0.08208850026130676, "learning_rate": 0.01, "loss": 2.1316, "step": 6222 }, { "epoch": 0.6395766978321176, "grad_norm": 0.06399821490049362, "learning_rate": 0.01, "loss": 2.1354, "step": 6225 }, { "epoch": 0.6398849275660126, "grad_norm": 0.12036826461553574, "learning_rate": 0.01, "loss": 2.1509, "step": 6228 }, { "epoch": 0.6401931572999076, "grad_norm": 0.08180755376815796, "learning_rate": 0.01, "loss": 2.1571, "step": 6231 }, { "epoch": 0.6405013870338025, "grad_norm": 0.053771521896123886, "learning_rate": 0.01, "loss": 2.1485, "step": 6234 }, { "epoch": 0.6408096167676975, "grad_norm": 0.042291607707738876, "learning_rate": 0.01, "loss": 2.1606, "step": 6237 }, { "epoch": 0.6411178465015925, "grad_norm": 0.044655315577983856, "learning_rate": 0.01, "loss": 2.1592, "step": 6240 }, { "epoch": 0.6414260762354875, "grad_norm": 0.07763859629631042, "learning_rate": 0.01, "loss": 2.1543, "step": 6243 }, { "epoch": 0.6417343059693825, "grad_norm": 0.055368274450302124, "learning_rate": 0.01, "loss": 2.1643, "step": 6246 }, { "epoch": 0.6420425357032775, "grad_norm": 0.047774944454431534, "learning_rate": 0.01, "loss": 2.1542, "step": 6249 }, { "epoch": 0.6423507654371725, "grad_norm": 0.06478223204612732, "learning_rate": 0.01, "loss": 2.1501, "step": 6252 }, { "epoch": 0.6426589951710675, "grad_norm": 0.03782160207629204, "learning_rate": 0.01, "loss": 2.1455, "step": 6255 }, { "epoch": 0.6429672249049625, "grad_norm": 0.11297930777072906, "learning_rate": 0.01, "loss": 2.1667, "step": 6258 }, { "epoch": 0.6432754546388575, "grad_norm": 0.09408997744321823, "learning_rate": 0.01, "loss": 2.146, "step": 6261 }, { "epoch": 0.6435836843727525, "grad_norm": 0.06677352637052536, "learning_rate": 0.01, "loss": 2.1723, "step": 6264 }, { "epoch": 0.6438919141066475, "grad_norm": 0.08687873184680939, "learning_rate": 0.01, "loss": 2.1517, "step": 6267 }, { "epoch": 0.6442001438405425, "grad_norm": 0.06850516051054001, "learning_rate": 0.01, "loss": 2.148, "step": 6270 }, { "epoch": 0.6445083735744375, "grad_norm": 0.07705084979534149, "learning_rate": 0.01, "loss": 2.1567, "step": 6273 }, { "epoch": 0.6448166033083325, "grad_norm": 0.1622423529624939, "learning_rate": 0.01, "loss": 2.1676, "step": 6276 }, { "epoch": 0.6451248330422275, "grad_norm": 0.11197759211063385, "learning_rate": 0.01, "loss": 2.1376, "step": 6279 }, { "epoch": 0.6454330627761224, "grad_norm": 0.06562814861536026, "learning_rate": 0.01, "loss": 2.1652, "step": 6282 }, { "epoch": 0.6457412925100174, "grad_norm": 0.0867902860045433, "learning_rate": 0.01, "loss": 2.1733, "step": 6285 }, { "epoch": 0.6460495222439124, "grad_norm": 0.08153738081455231, "learning_rate": 0.01, "loss": 2.1442, "step": 6288 }, { "epoch": 0.6463577519778074, "grad_norm": 0.09800709784030914, "learning_rate": 0.01, "loss": 2.1262, "step": 6291 }, { "epoch": 0.6466659817117024, "grad_norm": 0.07728230953216553, "learning_rate": 0.01, "loss": 2.139, "step": 6294 }, { "epoch": 0.6469742114455974, "grad_norm": 0.09658671170473099, "learning_rate": 0.01, "loss": 2.1421, "step": 6297 }, { "epoch": 0.6472824411794924, "grad_norm": 0.0448787659406662, "learning_rate": 0.01, "loss": 2.1415, "step": 6300 }, { "epoch": 0.6475906709133874, "grad_norm": 0.03848707675933838, "learning_rate": 0.01, "loss": 2.1209, "step": 6303 }, { "epoch": 0.6478989006472824, "grad_norm": 0.07465004920959473, "learning_rate": 0.01, "loss": 2.1395, "step": 6306 }, { "epoch": 0.6482071303811774, "grad_norm": 0.060424912720918655, "learning_rate": 0.01, "loss": 2.1806, "step": 6309 }, { "epoch": 0.6485153601150724, "grad_norm": 0.05204974114894867, "learning_rate": 0.01, "loss": 2.1287, "step": 6312 }, { "epoch": 0.6488235898489675, "grad_norm": 0.06045055389404297, "learning_rate": 0.01, "loss": 2.1727, "step": 6315 }, { "epoch": 0.6491318195828625, "grad_norm": 0.04978582262992859, "learning_rate": 0.01, "loss": 2.1264, "step": 6318 }, { "epoch": 0.6494400493167575, "grad_norm": 0.08131048828363419, "learning_rate": 0.01, "loss": 2.137, "step": 6321 }, { "epoch": 0.6497482790506525, "grad_norm": 0.09749994426965714, "learning_rate": 0.01, "loss": 2.1557, "step": 6324 }, { "epoch": 0.6500565087845475, "grad_norm": 0.06079535186290741, "learning_rate": 0.01, "loss": 2.1432, "step": 6327 }, { "epoch": 0.6503647385184425, "grad_norm": 0.08241060376167297, "learning_rate": 0.01, "loss": 2.1551, "step": 6330 }, { "epoch": 0.6506729682523374, "grad_norm": 0.12339378148317337, "learning_rate": 0.01, "loss": 2.1216, "step": 6333 }, { "epoch": 0.6509811979862324, "grad_norm": 0.0660511702299118, "learning_rate": 0.01, "loss": 2.1156, "step": 6336 }, { "epoch": 0.6512894277201274, "grad_norm": 0.06279938668012619, "learning_rate": 0.01, "loss": 2.1778, "step": 6339 }, { "epoch": 0.6515976574540224, "grad_norm": 0.068712018430233, "learning_rate": 0.01, "loss": 2.1348, "step": 6342 }, { "epoch": 0.6519058871879174, "grad_norm": 0.05808734893798828, "learning_rate": 0.01, "loss": 2.135, "step": 6345 }, { "epoch": 0.6522141169218124, "grad_norm": 0.044942643493413925, "learning_rate": 0.01, "loss": 2.1613, "step": 6348 }, { "epoch": 0.6525223466557074, "grad_norm": 0.11666214466094971, "learning_rate": 0.01, "loss": 2.1399, "step": 6351 }, { "epoch": 0.6528305763896024, "grad_norm": 0.06776747852563858, "learning_rate": 0.01, "loss": 2.1369, "step": 6354 }, { "epoch": 0.6531388061234974, "grad_norm": 0.10171874612569809, "learning_rate": 0.01, "loss": 2.1273, "step": 6357 }, { "epoch": 0.6534470358573924, "grad_norm": 0.04611232131719589, "learning_rate": 0.01, "loss": 2.1482, "step": 6360 }, { "epoch": 0.6537552655912874, "grad_norm": 0.042139992117881775, "learning_rate": 0.01, "loss": 2.1235, "step": 6363 }, { "epoch": 0.6540634953251824, "grad_norm": 0.057816632091999054, "learning_rate": 0.01, "loss": 2.1449, "step": 6366 }, { "epoch": 0.6543717250590774, "grad_norm": 0.11400949209928513, "learning_rate": 0.01, "loss": 2.1566, "step": 6369 }, { "epoch": 0.6546799547929724, "grad_norm": 0.07320736348628998, "learning_rate": 0.01, "loss": 2.1682, "step": 6372 }, { "epoch": 0.6549881845268674, "grad_norm": 0.07262291014194489, "learning_rate": 0.01, "loss": 2.1514, "step": 6375 }, { "epoch": 0.6552964142607623, "grad_norm": 0.05559679865837097, "learning_rate": 0.01, "loss": 2.1347, "step": 6378 }, { "epoch": 0.6556046439946573, "grad_norm": 0.049424149096012115, "learning_rate": 0.01, "loss": 2.1423, "step": 6381 }, { "epoch": 0.6559128737285523, "grad_norm": 0.05457301065325737, "learning_rate": 0.01, "loss": 2.1425, "step": 6384 }, { "epoch": 0.6562211034624473, "grad_norm": 0.058564141392707825, "learning_rate": 0.01, "loss": 2.1158, "step": 6387 }, { "epoch": 0.6565293331963423, "grad_norm": 0.10944786667823792, "learning_rate": 0.01, "loss": 2.146, "step": 6390 }, { "epoch": 0.6568375629302373, "grad_norm": 0.07760695368051529, "learning_rate": 0.01, "loss": 2.176, "step": 6393 }, { "epoch": 0.6571457926641323, "grad_norm": 0.07621042430400848, "learning_rate": 0.01, "loss": 2.1779, "step": 6396 }, { "epoch": 0.6574540223980273, "grad_norm": 0.09723789244890213, "learning_rate": 0.01, "loss": 2.1455, "step": 6399 }, { "epoch": 0.6577622521319223, "grad_norm": 0.05648832768201828, "learning_rate": 0.01, "loss": 2.154, "step": 6402 }, { "epoch": 0.6580704818658173, "grad_norm": 0.04370080679655075, "learning_rate": 0.01, "loss": 2.1374, "step": 6405 }, { "epoch": 0.6583787115997123, "grad_norm": 0.03729141131043434, "learning_rate": 0.01, "loss": 2.1275, "step": 6408 }, { "epoch": 0.6586869413336073, "grad_norm": 0.055584125220775604, "learning_rate": 0.01, "loss": 2.1442, "step": 6411 }, { "epoch": 0.6589951710675023, "grad_norm": 0.07981918007135391, "learning_rate": 0.01, "loss": 2.1618, "step": 6414 }, { "epoch": 0.6593034008013973, "grad_norm": 0.09241674095392227, "learning_rate": 0.01, "loss": 2.1519, "step": 6417 }, { "epoch": 0.6596116305352923, "grad_norm": 0.10454630106687546, "learning_rate": 0.01, "loss": 2.1309, "step": 6420 }, { "epoch": 0.6599198602691873, "grad_norm": 0.08674053847789764, "learning_rate": 0.01, "loss": 2.1617, "step": 6423 }, { "epoch": 0.6602280900030822, "grad_norm": 0.06003529578447342, "learning_rate": 0.01, "loss": 2.1475, "step": 6426 }, { "epoch": 0.6605363197369772, "grad_norm": 0.07370956987142563, "learning_rate": 0.01, "loss": 2.1466, "step": 6429 }, { "epoch": 0.6608445494708723, "grad_norm": 0.05090004578232765, "learning_rate": 0.01, "loss": 2.1506, "step": 6432 }, { "epoch": 0.6611527792047673, "grad_norm": 0.06062362715601921, "learning_rate": 0.01, "loss": 2.1601, "step": 6435 }, { "epoch": 0.6614610089386623, "grad_norm": 0.05484107881784439, "learning_rate": 0.01, "loss": 2.1452, "step": 6438 }, { "epoch": 0.6617692386725573, "grad_norm": 0.1367156058549881, "learning_rate": 0.01, "loss": 2.1586, "step": 6441 }, { "epoch": 0.6620774684064523, "grad_norm": 0.05140338093042374, "learning_rate": 0.01, "loss": 2.1463, "step": 6444 }, { "epoch": 0.6623856981403473, "grad_norm": 0.09168683737516403, "learning_rate": 0.01, "loss": 2.1467, "step": 6447 }, { "epoch": 0.6626939278742423, "grad_norm": 0.04098822921514511, "learning_rate": 0.01, "loss": 2.1648, "step": 6450 }, { "epoch": 0.6630021576081373, "grad_norm": 0.049763478338718414, "learning_rate": 0.01, "loss": 2.1289, "step": 6453 }, { "epoch": 0.6633103873420323, "grad_norm": 0.060069404542446136, "learning_rate": 0.01, "loss": 2.1467, "step": 6456 }, { "epoch": 0.6636186170759273, "grad_norm": 0.06611450761556625, "learning_rate": 0.01, "loss": 2.1599, "step": 6459 }, { "epoch": 0.6639268468098223, "grad_norm": 0.04955270141363144, "learning_rate": 0.01, "loss": 2.136, "step": 6462 }, { "epoch": 0.6642350765437173, "grad_norm": 0.04004522040486336, "learning_rate": 0.01, "loss": 2.1457, "step": 6465 }, { "epoch": 0.6645433062776123, "grad_norm": 0.06539756804704666, "learning_rate": 0.01, "loss": 2.1458, "step": 6468 }, { "epoch": 0.6648515360115073, "grad_norm": 0.10684728622436523, "learning_rate": 0.01, "loss": 2.1279, "step": 6471 }, { "epoch": 0.6651597657454023, "grad_norm": 0.09936464577913284, "learning_rate": 0.01, "loss": 2.1767, "step": 6474 }, { "epoch": 0.6654679954792972, "grad_norm": 0.04908827692270279, "learning_rate": 0.01, "loss": 2.1259, "step": 6477 }, { "epoch": 0.6657762252131922, "grad_norm": 0.048053622245788574, "learning_rate": 0.01, "loss": 2.1718, "step": 6480 }, { "epoch": 0.6660844549470872, "grad_norm": 0.05524458363652229, "learning_rate": 0.01, "loss": 2.1673, "step": 6483 }, { "epoch": 0.6663926846809822, "grad_norm": 0.05107030272483826, "learning_rate": 0.01, "loss": 2.13, "step": 6486 }, { "epoch": 0.6667009144148772, "grad_norm": 0.12472579628229141, "learning_rate": 0.01, "loss": 2.149, "step": 6489 }, { "epoch": 0.6670091441487722, "grad_norm": 0.05257454514503479, "learning_rate": 0.01, "loss": 2.1343, "step": 6492 }, { "epoch": 0.6673173738826672, "grad_norm": 0.05986837297677994, "learning_rate": 0.01, "loss": 2.1265, "step": 6495 }, { "epoch": 0.6676256036165622, "grad_norm": 0.08322940021753311, "learning_rate": 0.01, "loss": 2.1317, "step": 6498 }, { "epoch": 0.6679338333504572, "grad_norm": 0.0466473363339901, "learning_rate": 0.01, "loss": 2.1235, "step": 6501 }, { "epoch": 0.6682420630843522, "grad_norm": 0.05092160776257515, "learning_rate": 0.01, "loss": 2.1672, "step": 6504 }, { "epoch": 0.6685502928182472, "grad_norm": 0.08392294496297836, "learning_rate": 0.01, "loss": 2.1473, "step": 6507 }, { "epoch": 0.6688585225521422, "grad_norm": 0.042165517807006836, "learning_rate": 0.01, "loss": 2.1181, "step": 6510 }, { "epoch": 0.6691667522860372, "grad_norm": 0.06214481219649315, "learning_rate": 0.01, "loss": 2.138, "step": 6513 }, { "epoch": 0.6694749820199322, "grad_norm": 0.06087846681475639, "learning_rate": 0.01, "loss": 2.15, "step": 6516 }, { "epoch": 0.6697832117538272, "grad_norm": 0.047256652265787125, "learning_rate": 0.01, "loss": 2.1433, "step": 6519 }, { "epoch": 0.6700914414877222, "grad_norm": 0.10626421123743057, "learning_rate": 0.01, "loss": 2.156, "step": 6522 }, { "epoch": 0.6703996712216171, "grad_norm": 0.09426552802324295, "learning_rate": 0.01, "loss": 2.1472, "step": 6525 }, { "epoch": 0.6707079009555121, "grad_norm": 0.0632442831993103, "learning_rate": 0.01, "loss": 2.1536, "step": 6528 }, { "epoch": 0.6710161306894071, "grad_norm": 0.07149971276521683, "learning_rate": 0.01, "loss": 2.1694, "step": 6531 }, { "epoch": 0.6713243604233021, "grad_norm": 0.04060966521501541, "learning_rate": 0.01, "loss": 2.164, "step": 6534 }, { "epoch": 0.6716325901571971, "grad_norm": 0.20043891668319702, "learning_rate": 0.01, "loss": 2.125, "step": 6537 }, { "epoch": 0.6719408198910921, "grad_norm": 0.06755783408880234, "learning_rate": 0.01, "loss": 2.15, "step": 6540 }, { "epoch": 0.6722490496249871, "grad_norm": 0.0509268082678318, "learning_rate": 0.01, "loss": 2.1405, "step": 6543 }, { "epoch": 0.6725572793588821, "grad_norm": 0.04033916816115379, "learning_rate": 0.01, "loss": 2.136, "step": 6546 }, { "epoch": 0.6728655090927772, "grad_norm": 0.04707946255803108, "learning_rate": 0.01, "loss": 2.1514, "step": 6549 }, { "epoch": 0.6731737388266722, "grad_norm": 0.04360898956656456, "learning_rate": 0.01, "loss": 2.1518, "step": 6552 }, { "epoch": 0.6734819685605672, "grad_norm": 0.11959343403577805, "learning_rate": 0.01, "loss": 2.1377, "step": 6555 }, { "epoch": 0.6737901982944622, "grad_norm": 0.06620760262012482, "learning_rate": 0.01, "loss": 2.1419, "step": 6558 }, { "epoch": 0.6740984280283572, "grad_norm": 0.056747015565633774, "learning_rate": 0.01, "loss": 2.138, "step": 6561 }, { "epoch": 0.6744066577622522, "grad_norm": 0.05230560526251793, "learning_rate": 0.01, "loss": 2.1335, "step": 6564 }, { "epoch": 0.6747148874961472, "grad_norm": 0.0526299811899662, "learning_rate": 0.01, "loss": 2.131, "step": 6567 }, { "epoch": 0.6750231172300422, "grad_norm": 0.15683774650096893, "learning_rate": 0.01, "loss": 2.1167, "step": 6570 }, { "epoch": 0.6753313469639372, "grad_norm": 0.10133557766675949, "learning_rate": 0.01, "loss": 2.1219, "step": 6573 }, { "epoch": 0.6756395766978321, "grad_norm": 0.06826774775981903, "learning_rate": 0.01, "loss": 2.1416, "step": 6576 }, { "epoch": 0.6759478064317271, "grad_norm": 0.046236682683229446, "learning_rate": 0.01, "loss": 2.1704, "step": 6579 }, { "epoch": 0.6762560361656221, "grad_norm": 0.07654762268066406, "learning_rate": 0.01, "loss": 2.1411, "step": 6582 }, { "epoch": 0.6765642658995171, "grad_norm": 0.07760706543922424, "learning_rate": 0.01, "loss": 2.168, "step": 6585 }, { "epoch": 0.6768724956334121, "grad_norm": 0.04213540256023407, "learning_rate": 0.01, "loss": 2.1899, "step": 6588 }, { "epoch": 0.6771807253673071, "grad_norm": 0.0517420619726181, "learning_rate": 0.01, "loss": 2.1561, "step": 6591 }, { "epoch": 0.6774889551012021, "grad_norm": 0.04073292762041092, "learning_rate": 0.01, "loss": 2.1475, "step": 6594 }, { "epoch": 0.6777971848350971, "grad_norm": 0.11223835498094559, "learning_rate": 0.01, "loss": 2.1102, "step": 6597 }, { "epoch": 0.6781054145689921, "grad_norm": 0.08094224333763123, "learning_rate": 0.01, "loss": 2.1537, "step": 6600 }, { "epoch": 0.6784136443028871, "grad_norm": 0.036313675343990326, "learning_rate": 0.01, "loss": 2.1471, "step": 6603 }, { "epoch": 0.6787218740367821, "grad_norm": 0.09553749114274979, "learning_rate": 0.01, "loss": 2.1445, "step": 6606 }, { "epoch": 0.6790301037706771, "grad_norm": 0.07334265112876892, "learning_rate": 0.01, "loss": 2.1594, "step": 6609 }, { "epoch": 0.6793383335045721, "grad_norm": 0.12031051516532898, "learning_rate": 0.01, "loss": 2.1321, "step": 6612 }, { "epoch": 0.6796465632384671, "grad_norm": 0.08834968507289886, "learning_rate": 0.01, "loss": 2.1474, "step": 6615 }, { "epoch": 0.679954792972362, "grad_norm": 0.05016850307583809, "learning_rate": 0.01, "loss": 2.1582, "step": 6618 }, { "epoch": 0.680263022706257, "grad_norm": 0.039213377982378006, "learning_rate": 0.01, "loss": 2.1461, "step": 6621 }, { "epoch": 0.680571252440152, "grad_norm": 0.035611145198345184, "learning_rate": 0.01, "loss": 2.137, "step": 6624 }, { "epoch": 0.680879482174047, "grad_norm": 0.09345167875289917, "learning_rate": 0.01, "loss": 2.1357, "step": 6627 }, { "epoch": 0.681187711907942, "grad_norm": 0.04311450198292732, "learning_rate": 0.01, "loss": 2.1413, "step": 6630 }, { "epoch": 0.681495941641837, "grad_norm": 0.040315765887498856, "learning_rate": 0.01, "loss": 2.1091, "step": 6633 }, { "epoch": 0.681804171375732, "grad_norm": 0.11044291406869888, "learning_rate": 0.01, "loss": 2.1392, "step": 6636 }, { "epoch": 0.682112401109627, "grad_norm": 0.1288553774356842, "learning_rate": 0.01, "loss": 2.1129, "step": 6639 }, { "epoch": 0.682420630843522, "grad_norm": 0.0698169469833374, "learning_rate": 0.01, "loss": 2.137, "step": 6642 }, { "epoch": 0.682728860577417, "grad_norm": 0.037890784442424774, "learning_rate": 0.01, "loss": 2.1195, "step": 6645 }, { "epoch": 0.683037090311312, "grad_norm": 0.07425201684236526, "learning_rate": 0.01, "loss": 2.1194, "step": 6648 }, { "epoch": 0.683345320045207, "grad_norm": 0.058168716728687286, "learning_rate": 0.01, "loss": 2.1371, "step": 6651 }, { "epoch": 0.683653549779102, "grad_norm": 0.05515358969569206, "learning_rate": 0.01, "loss": 2.137, "step": 6654 }, { "epoch": 0.683961779512997, "grad_norm": 0.0501445047557354, "learning_rate": 0.01, "loss": 2.1539, "step": 6657 }, { "epoch": 0.684270009246892, "grad_norm": 0.06167145445942879, "learning_rate": 0.01, "loss": 2.1413, "step": 6660 }, { "epoch": 0.6845782389807871, "grad_norm": 0.0841723158955574, "learning_rate": 0.01, "loss": 2.1194, "step": 6663 }, { "epoch": 0.6848864687146821, "grad_norm": 0.06027607619762421, "learning_rate": 0.01, "loss": 2.158, "step": 6666 }, { "epoch": 0.685194698448577, "grad_norm": 0.1187741607427597, "learning_rate": 0.01, "loss": 2.1651, "step": 6669 }, { "epoch": 0.685502928182472, "grad_norm": 0.10789939761161804, "learning_rate": 0.01, "loss": 2.1465, "step": 6672 }, { "epoch": 0.685811157916367, "grad_norm": 0.06254967302083969, "learning_rate": 0.01, "loss": 2.1639, "step": 6675 }, { "epoch": 0.686119387650262, "grad_norm": 0.04242802783846855, "learning_rate": 0.01, "loss": 2.1563, "step": 6678 }, { "epoch": 0.686427617384157, "grad_norm": 0.03538980334997177, "learning_rate": 0.01, "loss": 2.1373, "step": 6681 }, { "epoch": 0.686735847118052, "grad_norm": 0.04609490931034088, "learning_rate": 0.01, "loss": 2.1345, "step": 6684 }, { "epoch": 0.687044076851947, "grad_norm": 0.1298975795507431, "learning_rate": 0.01, "loss": 2.1446, "step": 6687 }, { "epoch": 0.687352306585842, "grad_norm": 0.10049281269311905, "learning_rate": 0.01, "loss": 2.1432, "step": 6690 }, { "epoch": 0.687660536319737, "grad_norm": 0.05908266827464104, "learning_rate": 0.01, "loss": 2.1288, "step": 6693 }, { "epoch": 0.687968766053632, "grad_norm": 0.0546141043305397, "learning_rate": 0.01, "loss": 2.1086, "step": 6696 }, { "epoch": 0.688276995787527, "grad_norm": 0.04135862737894058, "learning_rate": 0.01, "loss": 2.1187, "step": 6699 }, { "epoch": 0.688585225521422, "grad_norm": 0.03824761137366295, "learning_rate": 0.01, "loss": 2.1162, "step": 6702 }, { "epoch": 0.688893455255317, "grad_norm": 0.041454900056123734, "learning_rate": 0.01, "loss": 2.1304, "step": 6705 }, { "epoch": 0.689201684989212, "grad_norm": 0.08948934823274612, "learning_rate": 0.01, "loss": 2.1538, "step": 6708 }, { "epoch": 0.689509914723107, "grad_norm": 0.07379783689975739, "learning_rate": 0.01, "loss": 2.145, "step": 6711 }, { "epoch": 0.689818144457002, "grad_norm": 0.0833912044763565, "learning_rate": 0.01, "loss": 2.1218, "step": 6714 }, { "epoch": 0.690126374190897, "grad_norm": 0.05899098515510559, "learning_rate": 0.01, "loss": 2.1516, "step": 6717 }, { "epoch": 0.690434603924792, "grad_norm": 0.06462058424949646, "learning_rate": 0.01, "loss": 2.1496, "step": 6720 }, { "epoch": 0.6907428336586869, "grad_norm": 0.04040443152189255, "learning_rate": 0.01, "loss": 2.1311, "step": 6723 }, { "epoch": 0.6910510633925819, "grad_norm": 0.05336814373731613, "learning_rate": 0.01, "loss": 2.1227, "step": 6726 }, { "epoch": 0.6913592931264769, "grad_norm": 0.05057406798005104, "learning_rate": 0.01, "loss": 2.1281, "step": 6729 }, { "epoch": 0.6916675228603719, "grad_norm": 0.08063513040542603, "learning_rate": 0.01, "loss": 2.1318, "step": 6732 }, { "epoch": 0.6919757525942669, "grad_norm": 0.08304840326309204, "learning_rate": 0.01, "loss": 2.1179, "step": 6735 }, { "epoch": 0.6922839823281619, "grad_norm": 0.04266434162855148, "learning_rate": 0.01, "loss": 2.1447, "step": 6738 }, { "epoch": 0.6925922120620569, "grad_norm": 0.07502007484436035, "learning_rate": 0.01, "loss": 2.1173, "step": 6741 }, { "epoch": 0.6929004417959519, "grad_norm": 0.10870220512151718, "learning_rate": 0.01, "loss": 2.1555, "step": 6744 }, { "epoch": 0.6932086715298469, "grad_norm": 0.15824924409389496, "learning_rate": 0.01, "loss": 2.1668, "step": 6747 }, { "epoch": 0.6935169012637419, "grad_norm": 0.06319935619831085, "learning_rate": 0.01, "loss": 2.1788, "step": 6750 }, { "epoch": 0.6938251309976369, "grad_norm": 0.06392507255077362, "learning_rate": 0.01, "loss": 2.1398, "step": 6753 }, { "epoch": 0.6941333607315319, "grad_norm": 0.044481996446847916, "learning_rate": 0.01, "loss": 2.147, "step": 6756 }, { "epoch": 0.6944415904654269, "grad_norm": 0.09093592315912247, "learning_rate": 0.01, "loss": 2.1399, "step": 6759 }, { "epoch": 0.6947498201993219, "grad_norm": 0.09249415248632431, "learning_rate": 0.01, "loss": 2.1274, "step": 6762 }, { "epoch": 0.6950580499332168, "grad_norm": 0.06134162098169327, "learning_rate": 0.01, "loss": 2.142, "step": 6765 }, { "epoch": 0.6953662796671118, "grad_norm": 0.048883359879255295, "learning_rate": 0.01, "loss": 2.1357, "step": 6768 }, { "epoch": 0.6956745094010068, "grad_norm": 0.04553356394171715, "learning_rate": 0.01, "loss": 2.132, "step": 6771 }, { "epoch": 0.6959827391349018, "grad_norm": 0.10365505516529083, "learning_rate": 0.01, "loss": 2.1568, "step": 6774 }, { "epoch": 0.6962909688687968, "grad_norm": 0.07474958896636963, "learning_rate": 0.01, "loss": 2.1209, "step": 6777 }, { "epoch": 0.6965991986026919, "grad_norm": 0.11140461266040802, "learning_rate": 0.01, "loss": 2.1585, "step": 6780 }, { "epoch": 0.6969074283365869, "grad_norm": 0.0529690645635128, "learning_rate": 0.01, "loss": 2.1228, "step": 6783 }, { "epoch": 0.6972156580704819, "grad_norm": 0.06484264135360718, "learning_rate": 0.01, "loss": 2.117, "step": 6786 }, { "epoch": 0.6975238878043769, "grad_norm": 0.0467400886118412, "learning_rate": 0.01, "loss": 2.1367, "step": 6789 }, { "epoch": 0.6978321175382719, "grad_norm": 0.09690822660923004, "learning_rate": 0.01, "loss": 2.1275, "step": 6792 }, { "epoch": 0.6981403472721669, "grad_norm": 0.053299982100725174, "learning_rate": 0.01, "loss": 2.1557, "step": 6795 }, { "epoch": 0.6984485770060619, "grad_norm": 0.08451724797487259, "learning_rate": 0.01, "loss": 2.1235, "step": 6798 }, { "epoch": 0.6987568067399569, "grad_norm": 0.11180119216442108, "learning_rate": 0.01, "loss": 2.1389, "step": 6801 }, { "epoch": 0.6990650364738519, "grad_norm": 0.04366112872958183, "learning_rate": 0.01, "loss": 2.1345, "step": 6804 }, { "epoch": 0.6993732662077469, "grad_norm": 0.057021014392375946, "learning_rate": 0.01, "loss": 2.145, "step": 6807 }, { "epoch": 0.6996814959416419, "grad_norm": 0.050035975873470306, "learning_rate": 0.01, "loss": 2.1245, "step": 6810 }, { "epoch": 0.6999897256755369, "grad_norm": 0.16434957087039948, "learning_rate": 0.01, "loss": 2.099, "step": 6813 }, { "epoch": 0.7002979554094318, "grad_norm": 0.0473979152739048, "learning_rate": 0.01, "loss": 2.124, "step": 6816 }, { "epoch": 0.7006061851433268, "grad_norm": 0.06207640469074249, "learning_rate": 0.01, "loss": 2.1528, "step": 6819 }, { "epoch": 0.7009144148772218, "grad_norm": 0.09829109162092209, "learning_rate": 0.01, "loss": 2.1359, "step": 6822 }, { "epoch": 0.7012226446111168, "grad_norm": 0.0563257597386837, "learning_rate": 0.01, "loss": 2.1639, "step": 6825 }, { "epoch": 0.7015308743450118, "grad_norm": 0.12371699512004852, "learning_rate": 0.01, "loss": 2.1479, "step": 6828 }, { "epoch": 0.7018391040789068, "grad_norm": 0.07342347502708435, "learning_rate": 0.01, "loss": 2.1786, "step": 6831 }, { "epoch": 0.7021473338128018, "grad_norm": 0.05420146882534027, "learning_rate": 0.01, "loss": 2.1261, "step": 6834 }, { "epoch": 0.7024555635466968, "grad_norm": 0.04500873014330864, "learning_rate": 0.01, "loss": 2.1356, "step": 6837 }, { "epoch": 0.7027637932805918, "grad_norm": 0.10648415237665176, "learning_rate": 0.01, "loss": 2.1205, "step": 6840 }, { "epoch": 0.7030720230144868, "grad_norm": 0.05089351162314415, "learning_rate": 0.01, "loss": 2.1403, "step": 6843 }, { "epoch": 0.7033802527483818, "grad_norm": 0.10011807084083557, "learning_rate": 0.01, "loss": 2.1508, "step": 6846 }, { "epoch": 0.7036884824822768, "grad_norm": 0.06787194311618805, "learning_rate": 0.01, "loss": 2.1391, "step": 6849 }, { "epoch": 0.7039967122161718, "grad_norm": 0.08248817175626755, "learning_rate": 0.01, "loss": 2.1782, "step": 6852 }, { "epoch": 0.7043049419500668, "grad_norm": 0.04949905723333359, "learning_rate": 0.01, "loss": 2.1401, "step": 6855 }, { "epoch": 0.7046131716839618, "grad_norm": 0.043910931795835495, "learning_rate": 0.01, "loss": 2.108, "step": 6858 }, { "epoch": 0.7049214014178568, "grad_norm": 0.05133078247308731, "learning_rate": 0.01, "loss": 2.1088, "step": 6861 }, { "epoch": 0.7052296311517517, "grad_norm": 0.11582443863153458, "learning_rate": 0.01, "loss": 2.1301, "step": 6864 }, { "epoch": 0.7055378608856467, "grad_norm": 0.04287354275584221, "learning_rate": 0.01, "loss": 2.124, "step": 6867 }, { "epoch": 0.7058460906195417, "grad_norm": 0.09393726289272308, "learning_rate": 0.01, "loss": 2.1326, "step": 6870 }, { "epoch": 0.7061543203534367, "grad_norm": 0.1286250203847885, "learning_rate": 0.01, "loss": 2.1292, "step": 6873 }, { "epoch": 0.7064625500873317, "grad_norm": 0.14816388487815857, "learning_rate": 0.01, "loss": 2.1439, "step": 6876 }, { "epoch": 0.7067707798212267, "grad_norm": 0.062444012612104416, "learning_rate": 0.01, "loss": 2.1421, "step": 6879 }, { "epoch": 0.7070790095551217, "grad_norm": 0.053750455379486084, "learning_rate": 0.01, "loss": 2.1185, "step": 6882 }, { "epoch": 0.7073872392890167, "grad_norm": 0.051356710493564606, "learning_rate": 0.01, "loss": 2.1298, "step": 6885 }, { "epoch": 0.7076954690229117, "grad_norm": 0.061504025012254715, "learning_rate": 0.01, "loss": 2.1132, "step": 6888 }, { "epoch": 0.7080036987568067, "grad_norm": 0.056496761739254, "learning_rate": 0.01, "loss": 2.1019, "step": 6891 }, { "epoch": 0.7083119284907017, "grad_norm": 0.048710647970438004, "learning_rate": 0.01, "loss": 2.126, "step": 6894 }, { "epoch": 0.7086201582245968, "grad_norm": 0.06260757148265839, "learning_rate": 0.01, "loss": 2.1534, "step": 6897 }, { "epoch": 0.7089283879584918, "grad_norm": 0.06622278690338135, "learning_rate": 0.01, "loss": 2.1247, "step": 6900 }, { "epoch": 0.7092366176923868, "grad_norm": 0.0810452550649643, "learning_rate": 0.01, "loss": 2.1336, "step": 6903 }, { "epoch": 0.7095448474262818, "grad_norm": 0.04692875221371651, "learning_rate": 0.01, "loss": 2.1096, "step": 6906 }, { "epoch": 0.7098530771601768, "grad_norm": 0.04757360368967056, "learning_rate": 0.01, "loss": 2.1181, "step": 6909 }, { "epoch": 0.7101613068940718, "grad_norm": 0.05597659945487976, "learning_rate": 0.01, "loss": 2.1425, "step": 6912 }, { "epoch": 0.7104695366279667, "grad_norm": 0.051605843007564545, "learning_rate": 0.01, "loss": 2.1118, "step": 6915 }, { "epoch": 0.7107777663618617, "grad_norm": 0.06179991737008095, "learning_rate": 0.01, "loss": 2.1362, "step": 6918 }, { "epoch": 0.7110859960957567, "grad_norm": 0.05455191805958748, "learning_rate": 0.01, "loss": 2.1279, "step": 6921 }, { "epoch": 0.7113942258296517, "grad_norm": 0.11560655385255814, "learning_rate": 0.01, "loss": 2.1316, "step": 6924 }, { "epoch": 0.7117024555635467, "grad_norm": 0.12203246355056763, "learning_rate": 0.01, "loss": 2.1173, "step": 6927 }, { "epoch": 0.7120106852974417, "grad_norm": 0.07024069130420685, "learning_rate": 0.01, "loss": 2.1395, "step": 6930 }, { "epoch": 0.7123189150313367, "grad_norm": 0.04773107171058655, "learning_rate": 0.01, "loss": 2.1455, "step": 6933 }, { "epoch": 0.7126271447652317, "grad_norm": 0.06106821820139885, "learning_rate": 0.01, "loss": 2.1352, "step": 6936 }, { "epoch": 0.7129353744991267, "grad_norm": 0.11438222974538803, "learning_rate": 0.01, "loss": 2.149, "step": 6939 }, { "epoch": 0.7132436042330217, "grad_norm": 0.07224932312965393, "learning_rate": 0.01, "loss": 2.1234, "step": 6942 }, { "epoch": 0.7135518339669167, "grad_norm": 0.06790932267904282, "learning_rate": 0.01, "loss": 2.1222, "step": 6945 }, { "epoch": 0.7138600637008117, "grad_norm": 0.12322958558797836, "learning_rate": 0.01, "loss": 2.106, "step": 6948 }, { "epoch": 0.7141682934347067, "grad_norm": 0.07186157256364822, "learning_rate": 0.01, "loss": 2.1365, "step": 6951 }, { "epoch": 0.7144765231686017, "grad_norm": 0.05366130173206329, "learning_rate": 0.01, "loss": 2.1264, "step": 6954 }, { "epoch": 0.7147847529024967, "grad_norm": 0.06682512164115906, "learning_rate": 0.01, "loss": 2.1163, "step": 6957 }, { "epoch": 0.7150929826363916, "grad_norm": 0.04629479721188545, "learning_rate": 0.01, "loss": 2.126, "step": 6960 }, { "epoch": 0.7154012123702866, "grad_norm": 0.053164754062891006, "learning_rate": 0.01, "loss": 2.1262, "step": 6963 }, { "epoch": 0.7157094421041816, "grad_norm": 0.08918699622154236, "learning_rate": 0.01, "loss": 2.157, "step": 6966 }, { "epoch": 0.7160176718380766, "grad_norm": 0.06226164847612381, "learning_rate": 0.01, "loss": 2.1391, "step": 6969 }, { "epoch": 0.7163259015719716, "grad_norm": 0.08120178431272507, "learning_rate": 0.01, "loss": 2.1118, "step": 6972 }, { "epoch": 0.7166341313058666, "grad_norm": 0.06390135735273361, "learning_rate": 0.01, "loss": 2.1302, "step": 6975 }, { "epoch": 0.7169423610397616, "grad_norm": 0.039068643003702164, "learning_rate": 0.01, "loss": 2.1304, "step": 6978 }, { "epoch": 0.7172505907736566, "grad_norm": 0.05006824806332588, "learning_rate": 0.01, "loss": 2.1352, "step": 6981 }, { "epoch": 0.7175588205075516, "grad_norm": 0.03946538642048836, "learning_rate": 0.01, "loss": 2.1513, "step": 6984 }, { "epoch": 0.7178670502414466, "grad_norm": 0.05072702839970589, "learning_rate": 0.01, "loss": 2.1298, "step": 6987 }, { "epoch": 0.7181752799753416, "grad_norm": 0.06457548588514328, "learning_rate": 0.01, "loss": 2.1276, "step": 6990 }, { "epoch": 0.7184835097092366, "grad_norm": 0.05759236589074135, "learning_rate": 0.01, "loss": 2.1198, "step": 6993 }, { "epoch": 0.7187917394431316, "grad_norm": 0.1151571124792099, "learning_rate": 0.01, "loss": 2.1217, "step": 6996 }, { "epoch": 0.7190999691770266, "grad_norm": 0.04867241531610489, "learning_rate": 0.01, "loss": 2.1343, "step": 6999 }, { "epoch": 0.7194081989109216, "grad_norm": 0.074817955493927, "learning_rate": 0.01, "loss": 2.1474, "step": 7002 }, { "epoch": 0.7197164286448166, "grad_norm": 0.04749060794711113, "learning_rate": 0.01, "loss": 2.1403, "step": 7005 }, { "epoch": 0.7200246583787115, "grad_norm": 0.04965493455529213, "learning_rate": 0.01, "loss": 2.142, "step": 7008 }, { "epoch": 0.7203328881126067, "grad_norm": 0.044914234429597855, "learning_rate": 0.01, "loss": 2.1397, "step": 7011 }, { "epoch": 0.7206411178465016, "grad_norm": 0.06727777421474457, "learning_rate": 0.01, "loss": 2.1443, "step": 7014 }, { "epoch": 0.7209493475803966, "grad_norm": 0.10670837014913559, "learning_rate": 0.01, "loss": 2.1316, "step": 7017 }, { "epoch": 0.7212575773142916, "grad_norm": 0.05047740787267685, "learning_rate": 0.01, "loss": 2.1268, "step": 7020 }, { "epoch": 0.7215658070481866, "grad_norm": 0.055116791278123856, "learning_rate": 0.01, "loss": 2.1194, "step": 7023 }, { "epoch": 0.7218740367820816, "grad_norm": 0.04873311519622803, "learning_rate": 0.01, "loss": 2.1122, "step": 7026 }, { "epoch": 0.7221822665159766, "grad_norm": 0.0893159881234169, "learning_rate": 0.01, "loss": 2.1413, "step": 7029 }, { "epoch": 0.7224904962498716, "grad_norm": 0.07278893142938614, "learning_rate": 0.01, "loss": 2.1394, "step": 7032 }, { "epoch": 0.7227987259837666, "grad_norm": 0.09431196749210358, "learning_rate": 0.01, "loss": 2.1489, "step": 7035 }, { "epoch": 0.7231069557176616, "grad_norm": 0.03588537499308586, "learning_rate": 0.01, "loss": 2.1585, "step": 7038 }, { "epoch": 0.7234151854515566, "grad_norm": 0.044003136456012726, "learning_rate": 0.01, "loss": 2.1442, "step": 7041 }, { "epoch": 0.7237234151854516, "grad_norm": 0.10805044323205948, "learning_rate": 0.01, "loss": 2.127, "step": 7044 }, { "epoch": 0.7240316449193466, "grad_norm": 0.06328746676445007, "learning_rate": 0.01, "loss": 2.1166, "step": 7047 }, { "epoch": 0.7243398746532416, "grad_norm": 0.08782347291707993, "learning_rate": 0.01, "loss": 2.1474, "step": 7050 }, { "epoch": 0.7246481043871366, "grad_norm": 0.06585227698087692, "learning_rate": 0.01, "loss": 2.1228, "step": 7053 }, { "epoch": 0.7249563341210316, "grad_norm": 0.06324558705091476, "learning_rate": 0.01, "loss": 2.1313, "step": 7056 }, { "epoch": 0.7252645638549265, "grad_norm": 0.057287219911813736, "learning_rate": 0.01, "loss": 2.1241, "step": 7059 }, { "epoch": 0.7255727935888215, "grad_norm": 0.07684747129678726, "learning_rate": 0.01, "loss": 2.1299, "step": 7062 }, { "epoch": 0.7258810233227165, "grad_norm": 0.10347555577754974, "learning_rate": 0.01, "loss": 2.12, "step": 7065 }, { "epoch": 0.7261892530566115, "grad_norm": 0.06019530072808266, "learning_rate": 0.01, "loss": 2.138, "step": 7068 }, { "epoch": 0.7264974827905065, "grad_norm": 0.04816723242402077, "learning_rate": 0.01, "loss": 2.1161, "step": 7071 }, { "epoch": 0.7268057125244015, "grad_norm": 0.05839864909648895, "learning_rate": 0.01, "loss": 2.136, "step": 7074 }, { "epoch": 0.7271139422582965, "grad_norm": 0.061795271933078766, "learning_rate": 0.01, "loss": 2.1315, "step": 7077 }, { "epoch": 0.7274221719921915, "grad_norm": 0.05736471712589264, "learning_rate": 0.01, "loss": 2.1403, "step": 7080 }, { "epoch": 0.7277304017260865, "grad_norm": 0.059238459914922714, "learning_rate": 0.01, "loss": 2.1101, "step": 7083 }, { "epoch": 0.7280386314599815, "grad_norm": 0.10844148695468903, "learning_rate": 0.01, "loss": 2.1454, "step": 7086 }, { "epoch": 0.7283468611938765, "grad_norm": 0.047568898648023605, "learning_rate": 0.01, "loss": 2.1183, "step": 7089 }, { "epoch": 0.7286550909277715, "grad_norm": 0.05178900063037872, "learning_rate": 0.01, "loss": 2.1346, "step": 7092 }, { "epoch": 0.7289633206616665, "grad_norm": 0.04113532230257988, "learning_rate": 0.01, "loss": 2.0915, "step": 7095 }, { "epoch": 0.7292715503955615, "grad_norm": 0.10488615930080414, "learning_rate": 0.01, "loss": 2.1239, "step": 7098 }, { "epoch": 0.7295797801294565, "grad_norm": 0.13013161718845367, "learning_rate": 0.01, "loss": 2.1251, "step": 7101 }, { "epoch": 0.7298880098633515, "grad_norm": 0.10956915467977524, "learning_rate": 0.01, "loss": 2.1113, "step": 7104 }, { "epoch": 0.7301962395972464, "grad_norm": 0.06996689736843109, "learning_rate": 0.01, "loss": 2.118, "step": 7107 }, { "epoch": 0.7305044693311414, "grad_norm": 0.07773365080356598, "learning_rate": 0.01, "loss": 2.1144, "step": 7110 }, { "epoch": 0.7308126990650364, "grad_norm": 0.06922838091850281, "learning_rate": 0.01, "loss": 2.1148, "step": 7113 }, { "epoch": 0.7311209287989314, "grad_norm": 0.08941454440355301, "learning_rate": 0.01, "loss": 2.1493, "step": 7116 }, { "epoch": 0.7314291585328264, "grad_norm": 0.04264171048998833, "learning_rate": 0.01, "loss": 2.136, "step": 7119 }, { "epoch": 0.7317373882667214, "grad_norm": 0.04473461955785751, "learning_rate": 0.01, "loss": 2.1294, "step": 7122 }, { "epoch": 0.7320456180006164, "grad_norm": 0.0396125465631485, "learning_rate": 0.01, "loss": 2.1439, "step": 7125 }, { "epoch": 0.7323538477345115, "grad_norm": 0.04613679647445679, "learning_rate": 0.01, "loss": 2.1503, "step": 7128 }, { "epoch": 0.7326620774684065, "grad_norm": 0.04897918924689293, "learning_rate": 0.01, "loss": 2.1214, "step": 7131 }, { "epoch": 0.7329703072023015, "grad_norm": 0.05057375133037567, "learning_rate": 0.01, "loss": 2.1112, "step": 7134 }, { "epoch": 0.7332785369361965, "grad_norm": 0.05711055174469948, "learning_rate": 0.01, "loss": 2.102, "step": 7137 }, { "epoch": 0.7335867666700915, "grad_norm": 0.08658434450626373, "learning_rate": 0.01, "loss": 2.1574, "step": 7140 }, { "epoch": 0.7338949964039865, "grad_norm": 0.07044188678264618, "learning_rate": 0.01, "loss": 2.1037, "step": 7143 }, { "epoch": 0.7342032261378815, "grad_norm": 0.03941315785050392, "learning_rate": 0.01, "loss": 2.1369, "step": 7146 }, { "epoch": 0.7345114558717765, "grad_norm": 0.04527783393859863, "learning_rate": 0.01, "loss": 2.1212, "step": 7149 }, { "epoch": 0.7348196856056715, "grad_norm": 0.07909847050905228, "learning_rate": 0.01, "loss": 2.1316, "step": 7152 }, { "epoch": 0.7351279153395665, "grad_norm": 0.12793006002902985, "learning_rate": 0.01, "loss": 2.1254, "step": 7155 }, { "epoch": 0.7354361450734614, "grad_norm": 0.0639350563287735, "learning_rate": 0.01, "loss": 2.1319, "step": 7158 }, { "epoch": 0.7357443748073564, "grad_norm": 0.0342305451631546, "learning_rate": 0.01, "loss": 2.1386, "step": 7161 }, { "epoch": 0.7360526045412514, "grad_norm": 0.049001939594745636, "learning_rate": 0.01, "loss": 2.1485, "step": 7164 }, { "epoch": 0.7363608342751464, "grad_norm": 0.047717638313770294, "learning_rate": 0.01, "loss": 2.1368, "step": 7167 }, { "epoch": 0.7366690640090414, "grad_norm": 0.04402822256088257, "learning_rate": 0.01, "loss": 2.1162, "step": 7170 }, { "epoch": 0.7369772937429364, "grad_norm": 0.06922505795955658, "learning_rate": 0.01, "loss": 2.1279, "step": 7173 }, { "epoch": 0.7372855234768314, "grad_norm": 0.06231709569692612, "learning_rate": 0.01, "loss": 2.0946, "step": 7176 }, { "epoch": 0.7375937532107264, "grad_norm": 0.11480400711297989, "learning_rate": 0.01, "loss": 2.146, "step": 7179 }, { "epoch": 0.7379019829446214, "grad_norm": 0.05144179239869118, "learning_rate": 0.01, "loss": 2.1128, "step": 7182 }, { "epoch": 0.7382102126785164, "grad_norm": 0.05130591616034508, "learning_rate": 0.01, "loss": 2.0964, "step": 7185 }, { "epoch": 0.7385184424124114, "grad_norm": 0.0549122579395771, "learning_rate": 0.01, "loss": 2.165, "step": 7188 }, { "epoch": 0.7388266721463064, "grad_norm": 0.1378844678401947, "learning_rate": 0.01, "loss": 2.1367, "step": 7191 }, { "epoch": 0.7391349018802014, "grad_norm": 0.06231486052274704, "learning_rate": 0.01, "loss": 2.1341, "step": 7194 }, { "epoch": 0.7394431316140964, "grad_norm": 0.10189559310674667, "learning_rate": 0.01, "loss": 2.1161, "step": 7197 }, { "epoch": 0.7397513613479914, "grad_norm": 0.053364284336566925, "learning_rate": 0.01, "loss": 2.1043, "step": 7200 }, { "epoch": 0.7400595910818863, "grad_norm": 0.046057943254709244, "learning_rate": 0.01, "loss": 2.1011, "step": 7203 }, { "epoch": 0.7403678208157813, "grad_norm": 0.04084615036845207, "learning_rate": 0.01, "loss": 2.1253, "step": 7206 }, { "epoch": 0.7406760505496763, "grad_norm": 0.04594961181282997, "learning_rate": 0.01, "loss": 2.1228, "step": 7209 }, { "epoch": 0.7409842802835713, "grad_norm": 0.06608622521162033, "learning_rate": 0.01, "loss": 2.1188, "step": 7212 }, { "epoch": 0.7412925100174663, "grad_norm": 0.125398188829422, "learning_rate": 0.01, "loss": 2.1057, "step": 7215 }, { "epoch": 0.7416007397513613, "grad_norm": 0.08068963885307312, "learning_rate": 0.01, "loss": 2.0947, "step": 7218 }, { "epoch": 0.7419089694852563, "grad_norm": 0.07993921637535095, "learning_rate": 0.01, "loss": 2.1214, "step": 7221 }, { "epoch": 0.7422171992191513, "grad_norm": 0.04969675466418266, "learning_rate": 0.01, "loss": 2.1099, "step": 7224 }, { "epoch": 0.7425254289530463, "grad_norm": 0.054677605628967285, "learning_rate": 0.01, "loss": 2.1229, "step": 7227 }, { "epoch": 0.7428336586869413, "grad_norm": 0.04562999680638313, "learning_rate": 0.01, "loss": 2.1409, "step": 7230 }, { "epoch": 0.7431418884208363, "grad_norm": 0.07618910074234009, "learning_rate": 0.01, "loss": 2.0924, "step": 7233 }, { "epoch": 0.7434501181547313, "grad_norm": 0.14368098974227905, "learning_rate": 0.01, "loss": 2.1348, "step": 7236 }, { "epoch": 0.7437583478886263, "grad_norm": 0.05517590045928955, "learning_rate": 0.01, "loss": 2.116, "step": 7239 }, { "epoch": 0.7440665776225214, "grad_norm": 0.17316390573978424, "learning_rate": 0.01, "loss": 2.1363, "step": 7242 }, { "epoch": 0.7443748073564164, "grad_norm": 0.15268415212631226, "learning_rate": 0.01, "loss": 2.1033, "step": 7245 }, { "epoch": 0.7446830370903114, "grad_norm": 0.06212317943572998, "learning_rate": 0.01, "loss": 2.0971, "step": 7248 }, { "epoch": 0.7449912668242064, "grad_norm": 0.04282272607088089, "learning_rate": 0.01, "loss": 2.1434, "step": 7251 }, { "epoch": 0.7452994965581013, "grad_norm": 0.04305952787399292, "learning_rate": 0.01, "loss": 2.1406, "step": 7254 }, { "epoch": 0.7456077262919963, "grad_norm": 0.048668697476387024, "learning_rate": 0.01, "loss": 2.1303, "step": 7257 }, { "epoch": 0.7459159560258913, "grad_norm": 0.05524542182683945, "learning_rate": 0.01, "loss": 2.1331, "step": 7260 }, { "epoch": 0.7462241857597863, "grad_norm": 0.0438026525080204, "learning_rate": 0.01, "loss": 2.109, "step": 7263 }, { "epoch": 0.7465324154936813, "grad_norm": 0.08154566586017609, "learning_rate": 0.01, "loss": 2.1053, "step": 7266 }, { "epoch": 0.7468406452275763, "grad_norm": 0.11754357814788818, "learning_rate": 0.01, "loss": 2.1298, "step": 7269 }, { "epoch": 0.7471488749614713, "grad_norm": 0.06593465805053711, "learning_rate": 0.01, "loss": 2.1323, "step": 7272 }, { "epoch": 0.7474571046953663, "grad_norm": 0.08065393567085266, "learning_rate": 0.01, "loss": 2.1297, "step": 7275 }, { "epoch": 0.7477653344292613, "grad_norm": 0.10624121129512787, "learning_rate": 0.01, "loss": 2.1175, "step": 7278 }, { "epoch": 0.7480735641631563, "grad_norm": 0.06357972323894501, "learning_rate": 0.01, "loss": 2.164, "step": 7281 }, { "epoch": 0.7483817938970513, "grad_norm": 0.03753754869103432, "learning_rate": 0.01, "loss": 2.1519, "step": 7284 }, { "epoch": 0.7486900236309463, "grad_norm": 0.04756931588053703, "learning_rate": 0.01, "loss": 2.0901, "step": 7287 }, { "epoch": 0.7489982533648413, "grad_norm": 0.0494108609855175, "learning_rate": 0.01, "loss": 2.1474, "step": 7290 }, { "epoch": 0.7493064830987363, "grad_norm": 0.063727006316185, "learning_rate": 0.01, "loss": 2.1425, "step": 7293 }, { "epoch": 0.7496147128326313, "grad_norm": 0.06327082961797714, "learning_rate": 0.01, "loss": 2.1346, "step": 7296 }, { "epoch": 0.7499229425665263, "grad_norm": 0.10383486747741699, "learning_rate": 0.01, "loss": 2.1245, "step": 7299 }, { "epoch": 0.7502311723004212, "grad_norm": 0.10473886877298355, "learning_rate": 0.01, "loss": 2.1302, "step": 7302 }, { "epoch": 0.7505394020343162, "grad_norm": 0.04905236139893532, "learning_rate": 0.01, "loss": 2.119, "step": 7305 }, { "epoch": 0.7508476317682112, "grad_norm": 0.04571664705872536, "learning_rate": 0.01, "loss": 2.1505, "step": 7308 }, { "epoch": 0.7511558615021062, "grad_norm": 0.06305412203073502, "learning_rate": 0.01, "loss": 2.1389, "step": 7311 }, { "epoch": 0.7514640912360012, "grad_norm": 0.05825283005833626, "learning_rate": 0.01, "loss": 2.1361, "step": 7314 }, { "epoch": 0.7517723209698962, "grad_norm": 0.059476301074028015, "learning_rate": 0.01, "loss": 2.1191, "step": 7317 }, { "epoch": 0.7520805507037912, "grad_norm": 0.042396873235702515, "learning_rate": 0.01, "loss": 2.1309, "step": 7320 }, { "epoch": 0.7523887804376862, "grad_norm": 0.04611228406429291, "learning_rate": 0.01, "loss": 2.1438, "step": 7323 }, { "epoch": 0.7526970101715812, "grad_norm": 0.09147686511278152, "learning_rate": 0.01, "loss": 2.1288, "step": 7326 }, { "epoch": 0.7530052399054762, "grad_norm": 0.08085332810878754, "learning_rate": 0.01, "loss": 2.1427, "step": 7329 }, { "epoch": 0.7533134696393712, "grad_norm": 0.03873496130108833, "learning_rate": 0.01, "loss": 2.1257, "step": 7332 }, { "epoch": 0.7536216993732662, "grad_norm": 0.05457824096083641, "learning_rate": 0.01, "loss": 2.1373, "step": 7335 }, { "epoch": 0.7539299291071612, "grad_norm": 0.049249522387981415, "learning_rate": 0.01, "loss": 2.1185, "step": 7338 }, { "epoch": 0.7542381588410562, "grad_norm": 0.07082841545343399, "learning_rate": 0.01, "loss": 2.1157, "step": 7341 }, { "epoch": 0.7545463885749512, "grad_norm": 0.046108178794384, "learning_rate": 0.01, "loss": 2.1238, "step": 7344 }, { "epoch": 0.7548546183088461, "grad_norm": 0.05572620406746864, "learning_rate": 0.01, "loss": 2.1445, "step": 7347 }, { "epoch": 0.7551628480427411, "grad_norm": 0.1091703474521637, "learning_rate": 0.01, "loss": 2.1281, "step": 7350 }, { "epoch": 0.7554710777766361, "grad_norm": 0.09372757375240326, "learning_rate": 0.01, "loss": 2.1231, "step": 7353 }, { "epoch": 0.7557793075105311, "grad_norm": 0.0482059009373188, "learning_rate": 0.01, "loss": 2.1003, "step": 7356 }, { "epoch": 0.7560875372444262, "grad_norm": 0.041941821575164795, "learning_rate": 0.01, "loss": 2.1382, "step": 7359 }, { "epoch": 0.7563957669783212, "grad_norm": 0.07122782617807388, "learning_rate": 0.01, "loss": 2.1419, "step": 7362 }, { "epoch": 0.7567039967122162, "grad_norm": 0.06854265183210373, "learning_rate": 0.01, "loss": 2.1328, "step": 7365 }, { "epoch": 0.7570122264461112, "grad_norm": 0.10073423385620117, "learning_rate": 0.01, "loss": 2.1322, "step": 7368 }, { "epoch": 0.7573204561800062, "grad_norm": 0.038869407027959824, "learning_rate": 0.01, "loss": 2.1273, "step": 7371 }, { "epoch": 0.7576286859139012, "grad_norm": 0.09483812749385834, "learning_rate": 0.01, "loss": 2.1465, "step": 7374 }, { "epoch": 0.7579369156477962, "grad_norm": 0.07226487994194031, "learning_rate": 0.01, "loss": 2.1386, "step": 7377 }, { "epoch": 0.7582451453816912, "grad_norm": 0.05041668191552162, "learning_rate": 0.01, "loss": 2.1249, "step": 7380 }, { "epoch": 0.7585533751155862, "grad_norm": 0.03839525580406189, "learning_rate": 0.01, "loss": 2.1125, "step": 7383 }, { "epoch": 0.7588616048494812, "grad_norm": 0.047746479511260986, "learning_rate": 0.01, "loss": 2.1027, "step": 7386 }, { "epoch": 0.7591698345833762, "grad_norm": 0.05524810031056404, "learning_rate": 0.01, "loss": 2.166, "step": 7389 }, { "epoch": 0.7594780643172712, "grad_norm": 0.050045181065797806, "learning_rate": 0.01, "loss": 2.1411, "step": 7392 }, { "epoch": 0.7597862940511662, "grad_norm": 0.09187906980514526, "learning_rate": 0.01, "loss": 2.13, "step": 7395 }, { "epoch": 0.7600945237850611, "grad_norm": 0.15085643529891968, "learning_rate": 0.01, "loss": 2.1198, "step": 7398 }, { "epoch": 0.7604027535189561, "grad_norm": 0.05295104160904884, "learning_rate": 0.01, "loss": 2.1067, "step": 7401 }, { "epoch": 0.7607109832528511, "grad_norm": 0.03696104511618614, "learning_rate": 0.01, "loss": 2.1159, "step": 7404 }, { "epoch": 0.7610192129867461, "grad_norm": 0.04209265485405922, "learning_rate": 0.01, "loss": 2.1243, "step": 7407 }, { "epoch": 0.7613274427206411, "grad_norm": 0.056943077594041824, "learning_rate": 0.01, "loss": 2.1038, "step": 7410 }, { "epoch": 0.7616356724545361, "grad_norm": 0.12749402225017548, "learning_rate": 0.01, "loss": 2.1087, "step": 7413 }, { "epoch": 0.7619439021884311, "grad_norm": 0.09119253605604172, "learning_rate": 0.01, "loss": 2.1252, "step": 7416 }, { "epoch": 0.7622521319223261, "grad_norm": 0.04251190647482872, "learning_rate": 0.01, "loss": 2.1384, "step": 7419 }, { "epoch": 0.7625603616562211, "grad_norm": 0.04010685533285141, "learning_rate": 0.01, "loss": 2.1449, "step": 7422 }, { "epoch": 0.7628685913901161, "grad_norm": 0.05524475499987602, "learning_rate": 0.01, "loss": 2.0841, "step": 7425 }, { "epoch": 0.7631768211240111, "grad_norm": 0.10250036418437958, "learning_rate": 0.01, "loss": 2.0827, "step": 7428 }, { "epoch": 0.7634850508579061, "grad_norm": 0.0748668685555458, "learning_rate": 0.01, "loss": 2.128, "step": 7431 }, { "epoch": 0.7637932805918011, "grad_norm": 0.08616036176681519, "learning_rate": 0.01, "loss": 2.1087, "step": 7434 }, { "epoch": 0.7641015103256961, "grad_norm": 0.09491308033466339, "learning_rate": 0.01, "loss": 2.1247, "step": 7437 }, { "epoch": 0.7644097400595911, "grad_norm": 0.08575759083032608, "learning_rate": 0.01, "loss": 2.1419, "step": 7440 }, { "epoch": 0.764717969793486, "grad_norm": 0.04314613714814186, "learning_rate": 0.01, "loss": 2.1462, "step": 7443 }, { "epoch": 0.765026199527381, "grad_norm": 0.035719119012355804, "learning_rate": 0.01, "loss": 2.1337, "step": 7446 }, { "epoch": 0.765334429261276, "grad_norm": 0.04597650095820427, "learning_rate": 0.01, "loss": 2.1258, "step": 7449 }, { "epoch": 0.765642658995171, "grad_norm": 0.10039210319519043, "learning_rate": 0.01, "loss": 2.1238, "step": 7452 }, { "epoch": 0.765950888729066, "grad_norm": 0.07157409191131592, "learning_rate": 0.01, "loss": 2.1349, "step": 7455 }, { "epoch": 0.766259118462961, "grad_norm": 0.09058292210102081, "learning_rate": 0.01, "loss": 2.1232, "step": 7458 }, { "epoch": 0.766567348196856, "grad_norm": 0.06009940057992935, "learning_rate": 0.01, "loss": 2.1386, "step": 7461 }, { "epoch": 0.766875577930751, "grad_norm": 0.1165439561009407, "learning_rate": 0.01, "loss": 2.1293, "step": 7464 }, { "epoch": 0.767183807664646, "grad_norm": 0.06138407811522484, "learning_rate": 0.01, "loss": 2.1066, "step": 7467 }, { "epoch": 0.767492037398541, "grad_norm": 0.06058945506811142, "learning_rate": 0.01, "loss": 2.1044, "step": 7470 }, { "epoch": 0.767800267132436, "grad_norm": 0.06741827726364136, "learning_rate": 0.01, "loss": 2.1191, "step": 7473 }, { "epoch": 0.7681084968663311, "grad_norm": 0.047926925122737885, "learning_rate": 0.01, "loss": 2.1333, "step": 7476 }, { "epoch": 0.7684167266002261, "grad_norm": 0.06450969725847244, "learning_rate": 0.01, "loss": 2.1061, "step": 7479 }, { "epoch": 0.7687249563341211, "grad_norm": 0.11133641749620438, "learning_rate": 0.01, "loss": 2.1129, "step": 7482 }, { "epoch": 0.7690331860680161, "grad_norm": 0.049795158207416534, "learning_rate": 0.01, "loss": 2.136, "step": 7485 }, { "epoch": 0.7693414158019111, "grad_norm": 0.06083859130740166, "learning_rate": 0.01, "loss": 2.1459, "step": 7488 }, { "epoch": 0.7696496455358061, "grad_norm": 0.04686833918094635, "learning_rate": 0.01, "loss": 2.1073, "step": 7491 }, { "epoch": 0.769957875269701, "grad_norm": 0.05475611612200737, "learning_rate": 0.01, "loss": 2.1167, "step": 7494 }, { "epoch": 0.770266105003596, "grad_norm": 0.04683786630630493, "learning_rate": 0.01, "loss": 2.1491, "step": 7497 }, { "epoch": 0.770574334737491, "grad_norm": 0.10841275751590729, "learning_rate": 0.01, "loss": 2.0967, "step": 7500 }, { "epoch": 0.770882564471386, "grad_norm": 0.09716581553220749, "learning_rate": 0.01, "loss": 2.1133, "step": 7503 }, { "epoch": 0.771190794205281, "grad_norm": 0.04913085699081421, "learning_rate": 0.01, "loss": 2.1401, "step": 7506 }, { "epoch": 0.771499023939176, "grad_norm": 0.04710682854056358, "learning_rate": 0.01, "loss": 2.109, "step": 7509 }, { "epoch": 0.771807253673071, "grad_norm": 0.054945673793554306, "learning_rate": 0.01, "loss": 2.1169, "step": 7512 }, { "epoch": 0.772115483406966, "grad_norm": 0.04265155643224716, "learning_rate": 0.01, "loss": 2.1156, "step": 7515 }, { "epoch": 0.772423713140861, "grad_norm": 0.03544042259454727, "learning_rate": 0.01, "loss": 2.1172, "step": 7518 }, { "epoch": 0.772731942874756, "grad_norm": 0.05048484355211258, "learning_rate": 0.01, "loss": 2.1015, "step": 7521 }, { "epoch": 0.773040172608651, "grad_norm": 0.14160272479057312, "learning_rate": 0.01, "loss": 2.1475, "step": 7524 }, { "epoch": 0.773348402342546, "grad_norm": 0.08693049848079681, "learning_rate": 0.01, "loss": 2.1266, "step": 7527 }, { "epoch": 0.773656632076441, "grad_norm": 0.06437800824642181, "learning_rate": 0.01, "loss": 2.1273, "step": 7530 }, { "epoch": 0.773964861810336, "grad_norm": 0.04450656846165657, "learning_rate": 0.01, "loss": 2.1192, "step": 7533 }, { "epoch": 0.774273091544231, "grad_norm": 0.05369933694601059, "learning_rate": 0.01, "loss": 2.1264, "step": 7536 }, { "epoch": 0.774581321278126, "grad_norm": 0.04080953076481819, "learning_rate": 0.01, "loss": 2.1319, "step": 7539 }, { "epoch": 0.774889551012021, "grad_norm": 0.03433745354413986, "learning_rate": 0.01, "loss": 2.1024, "step": 7542 }, { "epoch": 0.7751977807459159, "grad_norm": 0.1574896275997162, "learning_rate": 0.01, "loss": 2.1182, "step": 7545 }, { "epoch": 0.7755060104798109, "grad_norm": 0.1207810789346695, "learning_rate": 0.01, "loss": 2.1052, "step": 7548 }, { "epoch": 0.7758142402137059, "grad_norm": 0.07270894944667816, "learning_rate": 0.01, "loss": 2.1331, "step": 7551 }, { "epoch": 0.7761224699476009, "grad_norm": 0.07062831521034241, "learning_rate": 0.01, "loss": 2.099, "step": 7554 }, { "epoch": 0.7764306996814959, "grad_norm": 0.04142964631319046, "learning_rate": 0.01, "loss": 2.1192, "step": 7557 }, { "epoch": 0.7767389294153909, "grad_norm": 0.04645151272416115, "learning_rate": 0.01, "loss": 2.1117, "step": 7560 }, { "epoch": 0.7770471591492859, "grad_norm": 0.046251073479652405, "learning_rate": 0.01, "loss": 2.1399, "step": 7563 }, { "epoch": 0.7773553888831809, "grad_norm": 0.07185769826173782, "learning_rate": 0.01, "loss": 2.1261, "step": 7566 }, { "epoch": 0.7776636186170759, "grad_norm": 0.045216575264930725, "learning_rate": 0.01, "loss": 2.1302, "step": 7569 }, { "epoch": 0.7779718483509709, "grad_norm": 0.04923580586910248, "learning_rate": 0.01, "loss": 2.1482, "step": 7572 }, { "epoch": 0.7782800780848659, "grad_norm": 0.06434139609336853, "learning_rate": 0.01, "loss": 2.1325, "step": 7575 }, { "epoch": 0.7785883078187609, "grad_norm": 0.11186740547418594, "learning_rate": 0.01, "loss": 2.1168, "step": 7578 }, { "epoch": 0.7788965375526559, "grad_norm": 0.06694278120994568, "learning_rate": 0.01, "loss": 2.15, "step": 7581 }, { "epoch": 0.7792047672865509, "grad_norm": 0.05431769788265228, "learning_rate": 0.01, "loss": 2.1156, "step": 7584 }, { "epoch": 0.7795129970204459, "grad_norm": 0.05853963643312454, "learning_rate": 0.01, "loss": 2.1145, "step": 7587 }, { "epoch": 0.779821226754341, "grad_norm": 0.04059399664402008, "learning_rate": 0.01, "loss": 2.1373, "step": 7590 }, { "epoch": 0.780129456488236, "grad_norm": 0.06444236636161804, "learning_rate": 0.01, "loss": 2.1167, "step": 7593 }, { "epoch": 0.7804376862221309, "grad_norm": 0.09885245561599731, "learning_rate": 0.01, "loss": 2.1146, "step": 7596 }, { "epoch": 0.7807459159560259, "grad_norm": 0.08536794036626816, "learning_rate": 0.01, "loss": 2.1282, "step": 7599 }, { "epoch": 0.7810541456899209, "grad_norm": 0.04299011081457138, "learning_rate": 0.01, "loss": 2.103, "step": 7602 }, { "epoch": 0.7813623754238159, "grad_norm": 0.060757700353860855, "learning_rate": 0.01, "loss": 2.0923, "step": 7605 }, { "epoch": 0.7816706051577109, "grad_norm": 0.037401244044303894, "learning_rate": 0.01, "loss": 2.1343, "step": 7608 }, { "epoch": 0.7819788348916059, "grad_norm": 0.12264932692050934, "learning_rate": 0.01, "loss": 2.1193, "step": 7611 }, { "epoch": 0.7822870646255009, "grad_norm": 0.052691470831632614, "learning_rate": 0.01, "loss": 2.1097, "step": 7614 }, { "epoch": 0.7825952943593959, "grad_norm": 0.05509025603532791, "learning_rate": 0.01, "loss": 2.1208, "step": 7617 }, { "epoch": 0.7829035240932909, "grad_norm": 0.10352631658315659, "learning_rate": 0.01, "loss": 2.1277, "step": 7620 }, { "epoch": 0.7832117538271859, "grad_norm": 0.05865751951932907, "learning_rate": 0.01, "loss": 2.138, "step": 7623 }, { "epoch": 0.7835199835610809, "grad_norm": 0.09445837885141373, "learning_rate": 0.01, "loss": 2.1131, "step": 7626 }, { "epoch": 0.7838282132949759, "grad_norm": 0.11066542565822601, "learning_rate": 0.01, "loss": 2.1315, "step": 7629 }, { "epoch": 0.7841364430288709, "grad_norm": 0.05489170923829079, "learning_rate": 0.01, "loss": 2.1264, "step": 7632 }, { "epoch": 0.7844446727627659, "grad_norm": 0.06804061681032181, "learning_rate": 0.01, "loss": 2.1491, "step": 7635 }, { "epoch": 0.7847529024966609, "grad_norm": 0.07411237061023712, "learning_rate": 0.01, "loss": 2.126, "step": 7638 }, { "epoch": 0.7850611322305558, "grad_norm": 0.050356972962617874, "learning_rate": 0.01, "loss": 2.1237, "step": 7641 }, { "epoch": 0.7853693619644508, "grad_norm": 0.06125912442803383, "learning_rate": 0.01, "loss": 2.1328, "step": 7644 }, { "epoch": 0.7856775916983458, "grad_norm": 0.05983618274331093, "learning_rate": 0.01, "loss": 2.1152, "step": 7647 }, { "epoch": 0.7859858214322408, "grad_norm": 0.04065684601664543, "learning_rate": 0.01, "loss": 2.1213, "step": 7650 }, { "epoch": 0.7862940511661358, "grad_norm": 0.05535745993256569, "learning_rate": 0.01, "loss": 2.1106, "step": 7653 }, { "epoch": 0.7866022809000308, "grad_norm": 0.09727519005537033, "learning_rate": 0.01, "loss": 2.1202, "step": 7656 }, { "epoch": 0.7869105106339258, "grad_norm": 0.07764584571123123, "learning_rate": 0.01, "loss": 2.1181, "step": 7659 }, { "epoch": 0.7872187403678208, "grad_norm": 0.04933121055364609, "learning_rate": 0.01, "loss": 2.1217, "step": 7662 }, { "epoch": 0.7875269701017158, "grad_norm": 0.12199501693248749, "learning_rate": 0.01, "loss": 2.1412, "step": 7665 }, { "epoch": 0.7878351998356108, "grad_norm": 0.14431309700012207, "learning_rate": 0.01, "loss": 2.1249, "step": 7668 }, { "epoch": 0.7881434295695058, "grad_norm": 0.07583998888731003, "learning_rate": 0.01, "loss": 2.139, "step": 7671 }, { "epoch": 0.7884516593034008, "grad_norm": 0.10426465421915054, "learning_rate": 0.01, "loss": 2.1053, "step": 7674 }, { "epoch": 0.7887598890372958, "grad_norm": 0.06411170959472656, "learning_rate": 0.01, "loss": 2.1105, "step": 7677 }, { "epoch": 0.7890681187711908, "grad_norm": 0.07436025142669678, "learning_rate": 0.01, "loss": 2.1301, "step": 7680 }, { "epoch": 0.7893763485050858, "grad_norm": 0.10409426689147949, "learning_rate": 0.01, "loss": 2.1319, "step": 7683 }, { "epoch": 0.7896845782389807, "grad_norm": 0.05232664570212364, "learning_rate": 0.01, "loss": 2.1458, "step": 7686 }, { "epoch": 0.7899928079728757, "grad_norm": 0.06705309450626373, "learning_rate": 0.01, "loss": 2.1231, "step": 7689 }, { "epoch": 0.7903010377067707, "grad_norm": 0.04422546550631523, "learning_rate": 0.01, "loss": 2.0836, "step": 7692 }, { "epoch": 0.7906092674406657, "grad_norm": 0.04316714033484459, "learning_rate": 0.01, "loss": 2.1117, "step": 7695 }, { "epoch": 0.7909174971745607, "grad_norm": 0.058282140642404556, "learning_rate": 0.01, "loss": 2.0904, "step": 7698 }, { "epoch": 0.7912257269084557, "grad_norm": 0.07676571607589722, "learning_rate": 0.01, "loss": 2.1402, "step": 7701 }, { "epoch": 0.7915339566423507, "grad_norm": 0.07258665561676025, "learning_rate": 0.01, "loss": 2.1458, "step": 7704 }, { "epoch": 0.7918421863762458, "grad_norm": 0.04850257560610771, "learning_rate": 0.01, "loss": 2.0886, "step": 7707 }, { "epoch": 0.7921504161101408, "grad_norm": 0.05658482015132904, "learning_rate": 0.01, "loss": 2.1174, "step": 7710 }, { "epoch": 0.7924586458440358, "grad_norm": 0.06475166231393814, "learning_rate": 0.01, "loss": 2.0995, "step": 7713 }, { "epoch": 0.7927668755779308, "grad_norm": 0.10428962856531143, "learning_rate": 0.01, "loss": 2.109, "step": 7716 }, { "epoch": 0.7930751053118258, "grad_norm": 0.04227283224463463, "learning_rate": 0.01, "loss": 2.1124, "step": 7719 }, { "epoch": 0.7933833350457208, "grad_norm": 0.0594823881983757, "learning_rate": 0.01, "loss": 2.0944, "step": 7722 }, { "epoch": 0.7936915647796158, "grad_norm": 0.08695527911186218, "learning_rate": 0.01, "loss": 2.1077, "step": 7725 }, { "epoch": 0.7939997945135108, "grad_norm": 0.06003952398896217, "learning_rate": 0.01, "loss": 2.088, "step": 7728 }, { "epoch": 0.7943080242474058, "grad_norm": 0.058509476482868195, "learning_rate": 0.01, "loss": 2.1471, "step": 7731 }, { "epoch": 0.7946162539813008, "grad_norm": 0.048057131469249725, "learning_rate": 0.01, "loss": 2.1252, "step": 7734 }, { "epoch": 0.7949244837151958, "grad_norm": 0.11144626140594482, "learning_rate": 0.01, "loss": 2.1209, "step": 7737 }, { "epoch": 0.7952327134490907, "grad_norm": 0.041008081287145615, "learning_rate": 0.01, "loss": 2.1139, "step": 7740 }, { "epoch": 0.7955409431829857, "grad_norm": 0.04088988155126572, "learning_rate": 0.01, "loss": 2.0927, "step": 7743 }, { "epoch": 0.7958491729168807, "grad_norm": 0.1495555192232132, "learning_rate": 0.01, "loss": 2.0977, "step": 7746 }, { "epoch": 0.7961574026507757, "grad_norm": 0.042645204812288284, "learning_rate": 0.01, "loss": 2.1021, "step": 7749 }, { "epoch": 0.7964656323846707, "grad_norm": 0.04671596363186836, "learning_rate": 0.01, "loss": 2.1015, "step": 7752 }, { "epoch": 0.7967738621185657, "grad_norm": 0.07249152660369873, "learning_rate": 0.01, "loss": 2.1278, "step": 7755 }, { "epoch": 0.7970820918524607, "grad_norm": 0.05848756060004234, "learning_rate": 0.01, "loss": 2.1168, "step": 7758 }, { "epoch": 0.7973903215863557, "grad_norm": 0.05428781732916832, "learning_rate": 0.01, "loss": 2.1228, "step": 7761 }, { "epoch": 0.7976985513202507, "grad_norm": 0.04751111939549446, "learning_rate": 0.01, "loss": 2.1178, "step": 7764 }, { "epoch": 0.7980067810541457, "grad_norm": 0.08653240650892258, "learning_rate": 0.01, "loss": 2.1081, "step": 7767 }, { "epoch": 0.7983150107880407, "grad_norm": 0.04038892313838005, "learning_rate": 0.01, "loss": 2.1028, "step": 7770 }, { "epoch": 0.7986232405219357, "grad_norm": 0.05703849345445633, "learning_rate": 0.01, "loss": 2.1249, "step": 7773 }, { "epoch": 0.7989314702558307, "grad_norm": 0.06425055861473083, "learning_rate": 0.01, "loss": 2.1291, "step": 7776 }, { "epoch": 0.7992396999897257, "grad_norm": 0.05537475273013115, "learning_rate": 0.01, "loss": 2.1122, "step": 7779 }, { "epoch": 0.7995479297236207, "grad_norm": 0.05172963812947273, "learning_rate": 0.01, "loss": 2.1218, "step": 7782 }, { "epoch": 0.7998561594575156, "grad_norm": 0.05907023698091507, "learning_rate": 0.01, "loss": 2.1041, "step": 7785 }, { "epoch": 0.8001643891914106, "grad_norm": 0.10618621110916138, "learning_rate": 0.01, "loss": 2.1266, "step": 7788 }, { "epoch": 0.8004726189253056, "grad_norm": 0.06189849600195885, "learning_rate": 0.01, "loss": 2.1327, "step": 7791 }, { "epoch": 0.8007808486592006, "grad_norm": 0.10624901950359344, "learning_rate": 0.01, "loss": 2.0943, "step": 7794 }, { "epoch": 0.8010890783930956, "grad_norm": 0.04061825945973396, "learning_rate": 0.01, "loss": 2.0859, "step": 7797 }, { "epoch": 0.8013973081269906, "grad_norm": 0.04402461647987366, "learning_rate": 0.01, "loss": 2.1303, "step": 7800 }, { "epoch": 0.8017055378608856, "grad_norm": 0.05029004439711571, "learning_rate": 0.01, "loss": 2.1224, "step": 7803 }, { "epoch": 0.8020137675947806, "grad_norm": 0.055786702781915665, "learning_rate": 0.01, "loss": 2.1296, "step": 7806 }, { "epoch": 0.8023219973286756, "grad_norm": 0.11740477383136749, "learning_rate": 0.01, "loss": 2.1222, "step": 7809 }, { "epoch": 0.8026302270625706, "grad_norm": 0.10261218994855881, "learning_rate": 0.01, "loss": 2.1405, "step": 7812 }, { "epoch": 0.8029384567964656, "grad_norm": 0.05233708769083023, "learning_rate": 0.01, "loss": 2.1118, "step": 7815 }, { "epoch": 0.8032466865303606, "grad_norm": 0.04390858858823776, "learning_rate": 0.01, "loss": 2.1299, "step": 7818 }, { "epoch": 0.8035549162642556, "grad_norm": 0.05893026292324066, "learning_rate": 0.01, "loss": 2.1184, "step": 7821 }, { "epoch": 0.8038631459981507, "grad_norm": 0.06398338079452515, "learning_rate": 0.01, "loss": 2.1057, "step": 7824 }, { "epoch": 0.8041713757320457, "grad_norm": 0.07129772752523422, "learning_rate": 0.01, "loss": 2.1056, "step": 7827 }, { "epoch": 0.8044796054659407, "grad_norm": 0.07481534779071808, "learning_rate": 0.01, "loss": 2.1272, "step": 7830 }, { "epoch": 0.8047878351998357, "grad_norm": 0.049200594425201416, "learning_rate": 0.01, "loss": 2.0942, "step": 7833 }, { "epoch": 0.8050960649337306, "grad_norm": 0.05124384164810181, "learning_rate": 0.01, "loss": 2.0859, "step": 7836 }, { "epoch": 0.8054042946676256, "grad_norm": 0.07997792959213257, "learning_rate": 0.01, "loss": 2.1412, "step": 7839 }, { "epoch": 0.8057125244015206, "grad_norm": 0.12280064076185226, "learning_rate": 0.01, "loss": 2.0826, "step": 7842 }, { "epoch": 0.8060207541354156, "grad_norm": 0.05292202904820442, "learning_rate": 0.01, "loss": 2.0965, "step": 7845 }, { "epoch": 0.8063289838693106, "grad_norm": 0.04903187230229378, "learning_rate": 0.01, "loss": 2.0911, "step": 7848 }, { "epoch": 0.8066372136032056, "grad_norm": 0.06882268935441971, "learning_rate": 0.01, "loss": 2.108, "step": 7851 }, { "epoch": 0.8069454433371006, "grad_norm": 0.06937083601951599, "learning_rate": 0.01, "loss": 2.1234, "step": 7854 }, { "epoch": 0.8072536730709956, "grad_norm": 0.10075647383928299, "learning_rate": 0.01, "loss": 2.0983, "step": 7857 }, { "epoch": 0.8075619028048906, "grad_norm": 0.07185733318328857, "learning_rate": 0.01, "loss": 2.0998, "step": 7860 }, { "epoch": 0.8078701325387856, "grad_norm": 0.07266184687614441, "learning_rate": 0.01, "loss": 2.1056, "step": 7863 }, { "epoch": 0.8081783622726806, "grad_norm": 0.05049808695912361, "learning_rate": 0.01, "loss": 2.1126, "step": 7866 }, { "epoch": 0.8084865920065756, "grad_norm": 0.07260838896036148, "learning_rate": 0.01, "loss": 2.1311, "step": 7869 }, { "epoch": 0.8087948217404706, "grad_norm": 0.0659325122833252, "learning_rate": 0.01, "loss": 2.1317, "step": 7872 }, { "epoch": 0.8091030514743656, "grad_norm": 0.056960709393024445, "learning_rate": 0.01, "loss": 2.0988, "step": 7875 }, { "epoch": 0.8094112812082606, "grad_norm": 0.1266620010137558, "learning_rate": 0.01, "loss": 2.1274, "step": 7878 }, { "epoch": 0.8097195109421556, "grad_norm": 0.05951874330639839, "learning_rate": 0.01, "loss": 2.1342, "step": 7881 }, { "epoch": 0.8100277406760505, "grad_norm": 0.06081915274262428, "learning_rate": 0.01, "loss": 2.1036, "step": 7884 }, { "epoch": 0.8103359704099455, "grad_norm": 0.07136547565460205, "learning_rate": 0.01, "loss": 2.1067, "step": 7887 }, { "epoch": 0.8106442001438405, "grad_norm": 0.08835722506046295, "learning_rate": 0.01, "loss": 2.1123, "step": 7890 }, { "epoch": 0.8109524298777355, "grad_norm": 0.04469553008675575, "learning_rate": 0.01, "loss": 2.1117, "step": 7893 }, { "epoch": 0.8112606596116305, "grad_norm": 0.042171087116003036, "learning_rate": 0.01, "loss": 2.0875, "step": 7896 }, { "epoch": 0.8115688893455255, "grad_norm": 0.0847015529870987, "learning_rate": 0.01, "loss": 2.0998, "step": 7899 }, { "epoch": 0.8118771190794205, "grad_norm": 0.09157509356737137, "learning_rate": 0.01, "loss": 2.121, "step": 7902 }, { "epoch": 0.8121853488133155, "grad_norm": 0.06001126766204834, "learning_rate": 0.01, "loss": 2.1441, "step": 7905 }, { "epoch": 0.8124935785472105, "grad_norm": 0.03552449122071266, "learning_rate": 0.01, "loss": 2.1371, "step": 7908 }, { "epoch": 0.8128018082811055, "grad_norm": 0.034304428845644, "learning_rate": 0.01, "loss": 2.1066, "step": 7911 }, { "epoch": 0.8131100380150005, "grad_norm": 0.04897907376289368, "learning_rate": 0.01, "loss": 2.1054, "step": 7914 }, { "epoch": 0.8134182677488955, "grad_norm": 0.06674344837665558, "learning_rate": 0.01, "loss": 2.1156, "step": 7917 }, { "epoch": 0.8137264974827905, "grad_norm": 0.06437379866838455, "learning_rate": 0.01, "loss": 2.1189, "step": 7920 }, { "epoch": 0.8140347272166855, "grad_norm": 0.06402087956666946, "learning_rate": 0.01, "loss": 2.1111, "step": 7923 }, { "epoch": 0.8143429569505805, "grad_norm": 0.11063557863235474, "learning_rate": 0.01, "loss": 2.1131, "step": 7926 }, { "epoch": 0.8146511866844754, "grad_norm": 0.10625256597995758, "learning_rate": 0.01, "loss": 2.113, "step": 7929 }, { "epoch": 0.8149594164183704, "grad_norm": 0.0682268813252449, "learning_rate": 0.01, "loss": 2.0929, "step": 7932 }, { "epoch": 0.8152676461522654, "grad_norm": 0.08721883594989777, "learning_rate": 0.01, "loss": 2.0878, "step": 7935 }, { "epoch": 0.8155758758861605, "grad_norm": 0.07372716814279556, "learning_rate": 0.01, "loss": 2.1173, "step": 7938 }, { "epoch": 0.8158841056200555, "grad_norm": 0.049299102276563644, "learning_rate": 0.01, "loss": 2.1172, "step": 7941 }, { "epoch": 0.8161923353539505, "grad_norm": 0.06552339345216751, "learning_rate": 0.01, "loss": 2.1035, "step": 7944 }, { "epoch": 0.8165005650878455, "grad_norm": 0.08362871408462524, "learning_rate": 0.01, "loss": 2.0942, "step": 7947 }, { "epoch": 0.8168087948217405, "grad_norm": 0.07610680162906647, "learning_rate": 0.01, "loss": 2.1026, "step": 7950 }, { "epoch": 0.8171170245556355, "grad_norm": 0.058830149471759796, "learning_rate": 0.01, "loss": 2.121, "step": 7953 }, { "epoch": 0.8174252542895305, "grad_norm": 0.10281010717153549, "learning_rate": 0.01, "loss": 2.1084, "step": 7956 }, { "epoch": 0.8177334840234255, "grad_norm": 0.04509102553129196, "learning_rate": 0.01, "loss": 2.0917, "step": 7959 }, { "epoch": 0.8180417137573205, "grad_norm": 0.034059979021549225, "learning_rate": 0.01, "loss": 2.1286, "step": 7962 }, { "epoch": 0.8183499434912155, "grad_norm": 0.09370562434196472, "learning_rate": 0.01, "loss": 2.1298, "step": 7965 }, { "epoch": 0.8186581732251105, "grad_norm": 0.09386254847049713, "learning_rate": 0.01, "loss": 2.1018, "step": 7968 }, { "epoch": 0.8189664029590055, "grad_norm": 0.1801362931728363, "learning_rate": 0.01, "loss": 2.1125, "step": 7971 }, { "epoch": 0.8192746326929005, "grad_norm": 0.12590090930461884, "learning_rate": 0.01, "loss": 2.1145, "step": 7974 }, { "epoch": 0.8195828624267955, "grad_norm": 0.09913074970245361, "learning_rate": 0.01, "loss": 2.1049, "step": 7977 }, { "epoch": 0.8198910921606904, "grad_norm": 0.05249069631099701, "learning_rate": 0.01, "loss": 2.1348, "step": 7980 }, { "epoch": 0.8201993218945854, "grad_norm": 0.05334639549255371, "learning_rate": 0.01, "loss": 2.0952, "step": 7983 }, { "epoch": 0.8205075516284804, "grad_norm": 0.03963373601436615, "learning_rate": 0.01, "loss": 2.1133, "step": 7986 }, { "epoch": 0.8208157813623754, "grad_norm": 0.03334924206137657, "learning_rate": 0.01, "loss": 2.1156, "step": 7989 }, { "epoch": 0.8211240110962704, "grad_norm": 0.0628419816493988, "learning_rate": 0.01, "loss": 2.1298, "step": 7992 }, { "epoch": 0.8214322408301654, "grad_norm": 0.07143758237361908, "learning_rate": 0.01, "loss": 2.0891, "step": 7995 }, { "epoch": 0.8217404705640604, "grad_norm": 0.06662650406360626, "learning_rate": 0.01, "loss": 2.0976, "step": 7998 }, { "epoch": 0.8220487002979554, "grad_norm": 0.10575726628303528, "learning_rate": 0.01, "loss": 2.0946, "step": 8001 }, { "epoch": 0.8223569300318504, "grad_norm": 0.056455157697200775, "learning_rate": 0.01, "loss": 2.0924, "step": 8004 }, { "epoch": 0.8226651597657454, "grad_norm": 0.10326797515153885, "learning_rate": 0.01, "loss": 2.0823, "step": 8007 }, { "epoch": 0.8229733894996404, "grad_norm": 0.08464314043521881, "learning_rate": 0.01, "loss": 2.1274, "step": 8010 }, { "epoch": 0.8232816192335354, "grad_norm": 0.052144117653369904, "learning_rate": 0.01, "loss": 2.0952, "step": 8013 }, { "epoch": 0.8235898489674304, "grad_norm": 0.05464213341474533, "learning_rate": 0.01, "loss": 2.1117, "step": 8016 }, { "epoch": 0.8238980787013254, "grad_norm": 0.06700276583433151, "learning_rate": 0.01, "loss": 2.1289, "step": 8019 }, { "epoch": 0.8242063084352204, "grad_norm": 0.05322539806365967, "learning_rate": 0.01, "loss": 2.1346, "step": 8022 }, { "epoch": 0.8245145381691154, "grad_norm": 0.040953267365694046, "learning_rate": 0.01, "loss": 2.1035, "step": 8025 }, { "epoch": 0.8248227679030103, "grad_norm": 0.043644580990076065, "learning_rate": 0.01, "loss": 2.1238, "step": 8028 }, { "epoch": 0.8251309976369053, "grad_norm": 0.060951683670282364, "learning_rate": 0.01, "loss": 2.1169, "step": 8031 }, { "epoch": 0.8254392273708003, "grad_norm": 0.11269400268793106, "learning_rate": 0.01, "loss": 2.1184, "step": 8034 }, { "epoch": 0.8257474571046953, "grad_norm": 0.05554080754518509, "learning_rate": 0.01, "loss": 2.112, "step": 8037 }, { "epoch": 0.8260556868385903, "grad_norm": 0.08155755698680878, "learning_rate": 0.01, "loss": 2.096, "step": 8040 }, { "epoch": 0.8263639165724853, "grad_norm": 0.0661015510559082, "learning_rate": 0.01, "loss": 2.0943, "step": 8043 }, { "epoch": 0.8266721463063803, "grad_norm": 0.06186169385910034, "learning_rate": 0.01, "loss": 2.1225, "step": 8046 }, { "epoch": 0.8269803760402753, "grad_norm": 0.06658541411161423, "learning_rate": 0.01, "loss": 2.1189, "step": 8049 }, { "epoch": 0.8272886057741703, "grad_norm": 0.09120085090398788, "learning_rate": 0.01, "loss": 2.1181, "step": 8052 }, { "epoch": 0.8275968355080654, "grad_norm": 0.059662993997335434, "learning_rate": 0.01, "loss": 2.1257, "step": 8055 }, { "epoch": 0.8279050652419604, "grad_norm": 0.08305416256189346, "learning_rate": 0.01, "loss": 2.1108, "step": 8058 }, { "epoch": 0.8282132949758554, "grad_norm": 0.047110967338085175, "learning_rate": 0.01, "loss": 2.0786, "step": 8061 }, { "epoch": 0.8285215247097504, "grad_norm": 0.042120445519685745, "learning_rate": 0.01, "loss": 2.1081, "step": 8064 }, { "epoch": 0.8288297544436454, "grad_norm": 0.04596862941980362, "learning_rate": 0.01, "loss": 2.1025, "step": 8067 }, { "epoch": 0.8291379841775404, "grad_norm": 0.055216096341609955, "learning_rate": 0.01, "loss": 2.1416, "step": 8070 }, { "epoch": 0.8294462139114354, "grad_norm": 0.05959683656692505, "learning_rate": 0.01, "loss": 2.0832, "step": 8073 }, { "epoch": 0.8297544436453304, "grad_norm": 0.045481909066438675, "learning_rate": 0.01, "loss": 2.1391, "step": 8076 }, { "epoch": 0.8300626733792253, "grad_norm": 0.049282100051641464, "learning_rate": 0.01, "loss": 2.1227, "step": 8079 }, { "epoch": 0.8303709031131203, "grad_norm": 0.058084890246391296, "learning_rate": 0.01, "loss": 2.1211, "step": 8082 }, { "epoch": 0.8306791328470153, "grad_norm": 0.11113768070936203, "learning_rate": 0.01, "loss": 2.132, "step": 8085 }, { "epoch": 0.8309873625809103, "grad_norm": 0.07015852630138397, "learning_rate": 0.01, "loss": 2.0988, "step": 8088 }, { "epoch": 0.8312955923148053, "grad_norm": 0.09509722143411636, "learning_rate": 0.01, "loss": 2.1064, "step": 8091 }, { "epoch": 0.8316038220487003, "grad_norm": 0.03616593778133392, "learning_rate": 0.01, "loss": 2.1078, "step": 8094 }, { "epoch": 0.8319120517825953, "grad_norm": 0.0486396960914135, "learning_rate": 0.01, "loss": 2.1279, "step": 8097 }, { "epoch": 0.8322202815164903, "grad_norm": 0.050647489726543427, "learning_rate": 0.01, "loss": 2.0808, "step": 8100 }, { "epoch": 0.8325285112503853, "grad_norm": 0.08125802874565125, "learning_rate": 0.01, "loss": 2.1099, "step": 8103 }, { "epoch": 0.8328367409842803, "grad_norm": 0.08078313618898392, "learning_rate": 0.01, "loss": 2.1162, "step": 8106 }, { "epoch": 0.8331449707181753, "grad_norm": 0.06157573312520981, "learning_rate": 0.01, "loss": 2.1288, "step": 8109 }, { "epoch": 0.8334532004520703, "grad_norm": 0.057771824300289154, "learning_rate": 0.01, "loss": 2.1136, "step": 8112 }, { "epoch": 0.8337614301859653, "grad_norm": 0.06634260714054108, "learning_rate": 0.01, "loss": 2.1029, "step": 8115 }, { "epoch": 0.8340696599198603, "grad_norm": 0.12186034023761749, "learning_rate": 0.01, "loss": 2.1236, "step": 8118 }, { "epoch": 0.8343778896537553, "grad_norm": 0.03940106928348541, "learning_rate": 0.01, "loss": 2.1169, "step": 8121 }, { "epoch": 0.8346861193876502, "grad_norm": 0.06003478914499283, "learning_rate": 0.01, "loss": 2.1108, "step": 8124 }, { "epoch": 0.8349943491215452, "grad_norm": 0.04963524639606476, "learning_rate": 0.01, "loss": 2.0893, "step": 8127 }, { "epoch": 0.8353025788554402, "grad_norm": 0.04543556645512581, "learning_rate": 0.01, "loss": 2.1062, "step": 8130 }, { "epoch": 0.8356108085893352, "grad_norm": 0.05210501328110695, "learning_rate": 0.01, "loss": 2.1065, "step": 8133 }, { "epoch": 0.8359190383232302, "grad_norm": 0.10866094380617142, "learning_rate": 0.01, "loss": 2.1154, "step": 8136 }, { "epoch": 0.8362272680571252, "grad_norm": 0.07595928758382797, "learning_rate": 0.01, "loss": 2.1079, "step": 8139 }, { "epoch": 0.8365354977910202, "grad_norm": 0.04948664829134941, "learning_rate": 0.01, "loss": 2.0955, "step": 8142 }, { "epoch": 0.8368437275249152, "grad_norm": 0.11719872057437897, "learning_rate": 0.01, "loss": 2.1106, "step": 8145 }, { "epoch": 0.8371519572588102, "grad_norm": 0.04469067603349686, "learning_rate": 0.01, "loss": 2.1145, "step": 8148 }, { "epoch": 0.8374601869927052, "grad_norm": 0.038385387510061264, "learning_rate": 0.01, "loss": 2.0933, "step": 8151 }, { "epoch": 0.8377684167266002, "grad_norm": 0.04040665924549103, "learning_rate": 0.01, "loss": 2.1119, "step": 8154 }, { "epoch": 0.8380766464604952, "grad_norm": 0.042900413274765015, "learning_rate": 0.01, "loss": 2.1053, "step": 8157 }, { "epoch": 0.8383848761943902, "grad_norm": 0.06709393113851547, "learning_rate": 0.01, "loss": 2.0858, "step": 8160 }, { "epoch": 0.8386931059282852, "grad_norm": 0.08388926833868027, "learning_rate": 0.01, "loss": 2.1287, "step": 8163 }, { "epoch": 0.8390013356621802, "grad_norm": 0.0701015368103981, "learning_rate": 0.01, "loss": 2.0965, "step": 8166 }, { "epoch": 0.8393095653960753, "grad_norm": 0.0841115415096283, "learning_rate": 0.01, "loss": 2.1136, "step": 8169 }, { "epoch": 0.8396177951299703, "grad_norm": 0.08542285114526749, "learning_rate": 0.01, "loss": 2.1166, "step": 8172 }, { "epoch": 0.8399260248638653, "grad_norm": 0.046626705676317215, "learning_rate": 0.01, "loss": 2.1318, "step": 8175 }, { "epoch": 0.8402342545977602, "grad_norm": 0.08752947300672531, "learning_rate": 0.01, "loss": 2.0873, "step": 8178 }, { "epoch": 0.8405424843316552, "grad_norm": 0.04728331416845322, "learning_rate": 0.01, "loss": 2.0951, "step": 8181 }, { "epoch": 0.8408507140655502, "grad_norm": 0.04881293699145317, "learning_rate": 0.01, "loss": 2.1011, "step": 8184 }, { "epoch": 0.8411589437994452, "grad_norm": 0.049758728593587875, "learning_rate": 0.01, "loss": 2.1206, "step": 8187 }, { "epoch": 0.8414671735333402, "grad_norm": 0.037589117884635925, "learning_rate": 0.01, "loss": 2.085, "step": 8190 }, { "epoch": 0.8417754032672352, "grad_norm": 0.11785265803337097, "learning_rate": 0.01, "loss": 2.1195, "step": 8193 }, { "epoch": 0.8420836330011302, "grad_norm": 0.09815037995576859, "learning_rate": 0.01, "loss": 2.065, "step": 8196 }, { "epoch": 0.8423918627350252, "grad_norm": 0.07950727641582489, "learning_rate": 0.01, "loss": 2.1081, "step": 8199 }, { "epoch": 0.8427000924689202, "grad_norm": 0.04057254642248154, "learning_rate": 0.01, "loss": 2.0882, "step": 8202 }, { "epoch": 0.8430083222028152, "grad_norm": 0.07260222733020782, "learning_rate": 0.01, "loss": 2.1018, "step": 8205 }, { "epoch": 0.8433165519367102, "grad_norm": 0.057693734765052795, "learning_rate": 0.01, "loss": 2.112, "step": 8208 }, { "epoch": 0.8436247816706052, "grad_norm": 0.039680637419223785, "learning_rate": 0.01, "loss": 2.0974, "step": 8211 }, { "epoch": 0.8439330114045002, "grad_norm": 0.07584577798843384, "learning_rate": 0.01, "loss": 2.129, "step": 8214 }, { "epoch": 0.8442412411383952, "grad_norm": 0.044016819447278976, "learning_rate": 0.01, "loss": 2.1139, "step": 8217 }, { "epoch": 0.8445494708722902, "grad_norm": 0.04449582099914551, "learning_rate": 0.01, "loss": 2.1085, "step": 8220 }, { "epoch": 0.8448577006061851, "grad_norm": 0.044676005840301514, "learning_rate": 0.01, "loss": 2.1081, "step": 8223 }, { "epoch": 0.8451659303400801, "grad_norm": 0.04926025867462158, "learning_rate": 0.01, "loss": 2.1187, "step": 8226 }, { "epoch": 0.8454741600739751, "grad_norm": 0.10964366793632507, "learning_rate": 0.01, "loss": 2.0898, "step": 8229 }, { "epoch": 0.8457823898078701, "grad_norm": 0.09405852109193802, "learning_rate": 0.01, "loss": 2.1117, "step": 8232 }, { "epoch": 0.8460906195417651, "grad_norm": 0.09241268038749695, "learning_rate": 0.01, "loss": 2.1225, "step": 8235 }, { "epoch": 0.8463988492756601, "grad_norm": 0.05887102335691452, "learning_rate": 0.01, "loss": 2.0944, "step": 8238 }, { "epoch": 0.8467070790095551, "grad_norm": 0.13513131439685822, "learning_rate": 0.01, "loss": 2.1217, "step": 8241 }, { "epoch": 0.8470153087434501, "grad_norm": 0.06370443850755692, "learning_rate": 0.01, "loss": 2.121, "step": 8244 }, { "epoch": 0.8473235384773451, "grad_norm": 0.0426030196249485, "learning_rate": 0.01, "loss": 2.0937, "step": 8247 }, { "epoch": 0.8476317682112401, "grad_norm": 0.049412764608860016, "learning_rate": 0.01, "loss": 2.108, "step": 8250 }, { "epoch": 0.8479399979451351, "grad_norm": 0.061407607048749924, "learning_rate": 0.01, "loss": 2.1009, "step": 8253 }, { "epoch": 0.8482482276790301, "grad_norm": 0.12416908144950867, "learning_rate": 0.01, "loss": 2.0819, "step": 8256 }, { "epoch": 0.8485564574129251, "grad_norm": 0.05728744715452194, "learning_rate": 0.01, "loss": 2.1132, "step": 8259 }, { "epoch": 0.8488646871468201, "grad_norm": 0.06724981963634491, "learning_rate": 0.01, "loss": 2.0932, "step": 8262 }, { "epoch": 0.8491729168807151, "grad_norm": 0.055260930210351944, "learning_rate": 0.01, "loss": 2.0933, "step": 8265 }, { "epoch": 0.84948114661461, "grad_norm": 0.04230106249451637, "learning_rate": 0.01, "loss": 2.1135, "step": 8268 }, { "epoch": 0.849789376348505, "grad_norm": 0.04593104496598244, "learning_rate": 0.01, "loss": 2.1091, "step": 8271 }, { "epoch": 0.8500976060824, "grad_norm": 0.09625285863876343, "learning_rate": 0.01, "loss": 2.0946, "step": 8274 }, { "epoch": 0.850405835816295, "grad_norm": 0.04556501284241676, "learning_rate": 0.01, "loss": 2.0975, "step": 8277 }, { "epoch": 0.85071406555019, "grad_norm": 0.09413543343544006, "learning_rate": 0.01, "loss": 2.0863, "step": 8280 }, { "epoch": 0.851022295284085, "grad_norm": 0.08400101214647293, "learning_rate": 0.01, "loss": 2.1045, "step": 8283 }, { "epoch": 0.8513305250179801, "grad_norm": 0.06278138607740402, "learning_rate": 0.01, "loss": 2.116, "step": 8286 }, { "epoch": 0.8516387547518751, "grad_norm": 0.04442959651350975, "learning_rate": 0.01, "loss": 2.0796, "step": 8289 }, { "epoch": 0.8519469844857701, "grad_norm": 0.045807912945747375, "learning_rate": 0.01, "loss": 2.0823, "step": 8292 }, { "epoch": 0.8522552142196651, "grad_norm": 0.0426551029086113, "learning_rate": 0.01, "loss": 2.1109, "step": 8295 }, { "epoch": 0.8525634439535601, "grad_norm": 0.12200357019901276, "learning_rate": 0.01, "loss": 2.1146, "step": 8298 }, { "epoch": 0.8528716736874551, "grad_norm": 0.04152747616171837, "learning_rate": 0.01, "loss": 2.1204, "step": 8301 }, { "epoch": 0.8531799034213501, "grad_norm": 0.08464021235704422, "learning_rate": 0.01, "loss": 2.085, "step": 8304 }, { "epoch": 0.8534881331552451, "grad_norm": 0.050391390919685364, "learning_rate": 0.01, "loss": 2.0957, "step": 8307 }, { "epoch": 0.8537963628891401, "grad_norm": 0.08581732958555222, "learning_rate": 0.01, "loss": 2.1015, "step": 8310 }, { "epoch": 0.8541045926230351, "grad_norm": 0.10992308706045151, "learning_rate": 0.01, "loss": 2.0939, "step": 8313 }, { "epoch": 0.8544128223569301, "grad_norm": 0.053225912153720856, "learning_rate": 0.01, "loss": 2.1106, "step": 8316 }, { "epoch": 0.854721052090825, "grad_norm": 0.06759096682071686, "learning_rate": 0.01, "loss": 2.1035, "step": 8319 }, { "epoch": 0.85502928182472, "grad_norm": 0.058069922029972076, "learning_rate": 0.01, "loss": 2.0833, "step": 8322 }, { "epoch": 0.855337511558615, "grad_norm": 0.0657680481672287, "learning_rate": 0.01, "loss": 2.0918, "step": 8325 }, { "epoch": 0.85564574129251, "grad_norm": 0.1428556591272354, "learning_rate": 0.01, "loss": 2.0895, "step": 8328 }, { "epoch": 0.855953971026405, "grad_norm": 0.10311869531869888, "learning_rate": 0.01, "loss": 2.1041, "step": 8331 }, { "epoch": 0.8562622007603, "grad_norm": 0.12024179100990295, "learning_rate": 0.01, "loss": 2.1179, "step": 8334 }, { "epoch": 0.856570430494195, "grad_norm": 0.08294446766376495, "learning_rate": 0.01, "loss": 2.1042, "step": 8337 }, { "epoch": 0.85687866022809, "grad_norm": 0.05203935503959656, "learning_rate": 0.01, "loss": 2.1275, "step": 8340 }, { "epoch": 0.857186889961985, "grad_norm": 0.061564356088638306, "learning_rate": 0.01, "loss": 2.0951, "step": 8343 }, { "epoch": 0.85749511969588, "grad_norm": 0.043616339564323425, "learning_rate": 0.01, "loss": 2.1082, "step": 8346 }, { "epoch": 0.857803349429775, "grad_norm": 0.0885004997253418, "learning_rate": 0.01, "loss": 2.1071, "step": 8349 }, { "epoch": 0.85811157916367, "grad_norm": 0.06275481730699539, "learning_rate": 0.01, "loss": 2.137, "step": 8352 }, { "epoch": 0.858419808897565, "grad_norm": 0.054776523262262344, "learning_rate": 0.01, "loss": 2.1117, "step": 8355 }, { "epoch": 0.85872803863146, "grad_norm": 0.07782801240682602, "learning_rate": 0.01, "loss": 2.0822, "step": 8358 }, { "epoch": 0.859036268365355, "grad_norm": 0.12301263958215714, "learning_rate": 0.01, "loss": 2.1126, "step": 8361 }, { "epoch": 0.85934449809925, "grad_norm": 0.07181745767593384, "learning_rate": 0.01, "loss": 2.1359, "step": 8364 }, { "epoch": 0.859652727833145, "grad_norm": 0.07232604175806046, "learning_rate": 0.01, "loss": 2.0849, "step": 8367 }, { "epoch": 0.8599609575670399, "grad_norm": 0.06810937821865082, "learning_rate": 0.01, "loss": 2.1088, "step": 8370 }, { "epoch": 0.8602691873009349, "grad_norm": 0.048163384199142456, "learning_rate": 0.01, "loss": 2.1173, "step": 8373 }, { "epoch": 0.8605774170348299, "grad_norm": 0.05688156560063362, "learning_rate": 0.01, "loss": 2.114, "step": 8376 }, { "epoch": 0.8608856467687249, "grad_norm": 0.065540611743927, "learning_rate": 0.01, "loss": 2.0989, "step": 8379 }, { "epoch": 0.8611938765026199, "grad_norm": 0.09561596065759659, "learning_rate": 0.01, "loss": 2.0894, "step": 8382 }, { "epoch": 0.8615021062365149, "grad_norm": 0.06719313561916351, "learning_rate": 0.01, "loss": 2.0928, "step": 8385 }, { "epoch": 0.8618103359704099, "grad_norm": 0.05895761027932167, "learning_rate": 0.01, "loss": 2.1037, "step": 8388 }, { "epoch": 0.8621185657043049, "grad_norm": 0.09232669323682785, "learning_rate": 0.01, "loss": 2.1272, "step": 8391 }, { "epoch": 0.8624267954381999, "grad_norm": 0.06715840846300125, "learning_rate": 0.01, "loss": 2.072, "step": 8394 }, { "epoch": 0.8627350251720949, "grad_norm": 0.04794420674443245, "learning_rate": 0.01, "loss": 2.1087, "step": 8397 }, { "epoch": 0.8630432549059899, "grad_norm": 0.037383124232292175, "learning_rate": 0.01, "loss": 2.0761, "step": 8400 }, { "epoch": 0.863351484639885, "grad_norm": 0.05601905286312103, "learning_rate": 0.01, "loss": 2.0926, "step": 8403 }, { "epoch": 0.86365971437378, "grad_norm": 0.0839313194155693, "learning_rate": 0.01, "loss": 2.0887, "step": 8406 }, { "epoch": 0.863967944107675, "grad_norm": 0.07600929588079453, "learning_rate": 0.01, "loss": 2.1143, "step": 8409 }, { "epoch": 0.86427617384157, "grad_norm": 0.06851659715175629, "learning_rate": 0.01, "loss": 2.0921, "step": 8412 }, { "epoch": 0.864584403575465, "grad_norm": 0.05021858587861061, "learning_rate": 0.01, "loss": 2.0903, "step": 8415 }, { "epoch": 0.86489263330936, "grad_norm": 0.04881426692008972, "learning_rate": 0.01, "loss": 2.1047, "step": 8418 }, { "epoch": 0.8652008630432549, "grad_norm": 0.04262546822428703, "learning_rate": 0.01, "loss": 2.0852, "step": 8421 }, { "epoch": 0.8655090927771499, "grad_norm": 0.050467535853385925, "learning_rate": 0.01, "loss": 2.1045, "step": 8424 }, { "epoch": 0.8658173225110449, "grad_norm": 0.0725008100271225, "learning_rate": 0.01, "loss": 2.077, "step": 8427 }, { "epoch": 0.8661255522449399, "grad_norm": 0.07234456390142441, "learning_rate": 0.01, "loss": 2.13, "step": 8430 }, { "epoch": 0.8664337819788349, "grad_norm": 0.060751501470804214, "learning_rate": 0.01, "loss": 2.0948, "step": 8433 }, { "epoch": 0.8667420117127299, "grad_norm": 0.058911584317684174, "learning_rate": 0.01, "loss": 2.0908, "step": 8436 }, { "epoch": 0.8670502414466249, "grad_norm": 0.08380532264709473, "learning_rate": 0.01, "loss": 2.1438, "step": 8439 }, { "epoch": 0.8673584711805199, "grad_norm": 0.058240536600351334, "learning_rate": 0.01, "loss": 2.1384, "step": 8442 }, { "epoch": 0.8676667009144149, "grad_norm": 0.0422792062163353, "learning_rate": 0.01, "loss": 2.0926, "step": 8445 }, { "epoch": 0.8679749306483099, "grad_norm": 0.07096652686595917, "learning_rate": 0.01, "loss": 2.1195, "step": 8448 }, { "epoch": 0.8682831603822049, "grad_norm": 0.13370642066001892, "learning_rate": 0.01, "loss": 2.1367, "step": 8451 }, { "epoch": 0.8685913901160999, "grad_norm": 0.0597628615796566, "learning_rate": 0.01, "loss": 2.087, "step": 8454 }, { "epoch": 0.8688996198499949, "grad_norm": 0.039561979472637177, "learning_rate": 0.01, "loss": 2.112, "step": 8457 }, { "epoch": 0.8692078495838899, "grad_norm": 0.04080485925078392, "learning_rate": 0.01, "loss": 2.1024, "step": 8460 }, { "epoch": 0.8695160793177849, "grad_norm": 0.05293022468686104, "learning_rate": 0.01, "loss": 2.0731, "step": 8463 }, { "epoch": 0.8698243090516798, "grad_norm": 0.06960830092430115, "learning_rate": 0.01, "loss": 2.1255, "step": 8466 }, { "epoch": 0.8701325387855748, "grad_norm": 0.09768849611282349, "learning_rate": 0.01, "loss": 2.1217, "step": 8469 }, { "epoch": 0.8704407685194698, "grad_norm": 0.11970885097980499, "learning_rate": 0.01, "loss": 2.0932, "step": 8472 }, { "epoch": 0.8707489982533648, "grad_norm": 0.12014521658420563, "learning_rate": 0.01, "loss": 2.1009, "step": 8475 }, { "epoch": 0.8710572279872598, "grad_norm": 0.04288540408015251, "learning_rate": 0.01, "loss": 2.1111, "step": 8478 }, { "epoch": 0.8713654577211548, "grad_norm": 0.033004507422447205, "learning_rate": 0.01, "loss": 2.1029, "step": 8481 }, { "epoch": 0.8716736874550498, "grad_norm": 0.03685779869556427, "learning_rate": 0.01, "loss": 2.1077, "step": 8484 }, { "epoch": 0.8719819171889448, "grad_norm": 0.06450948119163513, "learning_rate": 0.01, "loss": 2.102, "step": 8487 }, { "epoch": 0.8722901469228398, "grad_norm": 0.04806706681847572, "learning_rate": 0.01, "loss": 2.1056, "step": 8490 }, { "epoch": 0.8725983766567348, "grad_norm": 0.05847964435815811, "learning_rate": 0.01, "loss": 2.095, "step": 8493 }, { "epoch": 0.8729066063906298, "grad_norm": 0.11569567024707794, "learning_rate": 0.01, "loss": 2.1058, "step": 8496 }, { "epoch": 0.8732148361245248, "grad_norm": 0.04440119490027428, "learning_rate": 0.01, "loss": 2.1127, "step": 8499 }, { "epoch": 0.8735230658584198, "grad_norm": 0.13856938481330872, "learning_rate": 0.01, "loss": 2.1072, "step": 8502 }, { "epoch": 0.8738312955923148, "grad_norm": 0.06448937207460403, "learning_rate": 0.01, "loss": 2.0813, "step": 8505 }, { "epoch": 0.8741395253262098, "grad_norm": 0.05872811749577522, "learning_rate": 0.01, "loss": 2.1227, "step": 8508 }, { "epoch": 0.8744477550601047, "grad_norm": 0.06387540698051453, "learning_rate": 0.01, "loss": 2.099, "step": 8511 }, { "epoch": 0.8747559847939997, "grad_norm": 0.044399481266736984, "learning_rate": 0.01, "loss": 2.0989, "step": 8514 }, { "epoch": 0.8750642145278948, "grad_norm": 0.118850938975811, "learning_rate": 0.01, "loss": 2.1261, "step": 8517 }, { "epoch": 0.8753724442617898, "grad_norm": 0.05479248985648155, "learning_rate": 0.01, "loss": 2.0701, "step": 8520 }, { "epoch": 0.8756806739956848, "grad_norm": 0.06442543119192123, "learning_rate": 0.01, "loss": 2.0844, "step": 8523 }, { "epoch": 0.8759889037295798, "grad_norm": 0.054294027388095856, "learning_rate": 0.01, "loss": 2.1051, "step": 8526 }, { "epoch": 0.8762971334634748, "grad_norm": 0.04776893928647041, "learning_rate": 0.01, "loss": 2.1056, "step": 8529 }, { "epoch": 0.8766053631973698, "grad_norm": 0.06740310043096542, "learning_rate": 0.01, "loss": 2.0956, "step": 8532 }, { "epoch": 0.8769135929312648, "grad_norm": 0.048034511506557465, "learning_rate": 0.01, "loss": 2.1223, "step": 8535 }, { "epoch": 0.8772218226651598, "grad_norm": 0.05819391459226608, "learning_rate": 0.01, "loss": 2.1133, "step": 8538 }, { "epoch": 0.8775300523990548, "grad_norm": 0.06093437224626541, "learning_rate": 0.01, "loss": 2.0889, "step": 8541 }, { "epoch": 0.8778382821329498, "grad_norm": 0.04628787562251091, "learning_rate": 0.01, "loss": 2.1202, "step": 8544 }, { "epoch": 0.8781465118668448, "grad_norm": 0.0903085321187973, "learning_rate": 0.01, "loss": 2.0495, "step": 8547 }, { "epoch": 0.8784547416007398, "grad_norm": 0.06924945116043091, "learning_rate": 0.01, "loss": 2.1004, "step": 8550 }, { "epoch": 0.8787629713346348, "grad_norm": 0.04104374721646309, "learning_rate": 0.01, "loss": 2.0954, "step": 8553 }, { "epoch": 0.8790712010685298, "grad_norm": 0.11671441793441772, "learning_rate": 0.01, "loss": 2.1027, "step": 8556 }, { "epoch": 0.8793794308024248, "grad_norm": 0.10247964411973953, "learning_rate": 0.01, "loss": 2.0861, "step": 8559 }, { "epoch": 0.8796876605363197, "grad_norm": 0.03979288041591644, "learning_rate": 0.01, "loss": 2.1307, "step": 8562 }, { "epoch": 0.8799958902702147, "grad_norm": 0.0406351312994957, "learning_rate": 0.01, "loss": 2.0868, "step": 8565 }, { "epoch": 0.8803041200041097, "grad_norm": 0.04127006232738495, "learning_rate": 0.01, "loss": 2.0899, "step": 8568 }, { "epoch": 0.8806123497380047, "grad_norm": 0.04559047520160675, "learning_rate": 0.01, "loss": 2.1071, "step": 8571 }, { "epoch": 0.8809205794718997, "grad_norm": 0.12507610023021698, "learning_rate": 0.01, "loss": 2.0944, "step": 8574 }, { "epoch": 0.8812288092057947, "grad_norm": 0.042683400213718414, "learning_rate": 0.01, "loss": 2.078, "step": 8577 }, { "epoch": 0.8815370389396897, "grad_norm": 0.04022818058729172, "learning_rate": 0.01, "loss": 2.0797, "step": 8580 }, { "epoch": 0.8818452686735847, "grad_norm": 0.0382862351834774, "learning_rate": 0.01, "loss": 2.0859, "step": 8583 }, { "epoch": 0.8821534984074797, "grad_norm": 0.05260771885514259, "learning_rate": 0.01, "loss": 2.0832, "step": 8586 }, { "epoch": 0.8824617281413747, "grad_norm": 0.05381648615002632, "learning_rate": 0.01, "loss": 2.1211, "step": 8589 }, { "epoch": 0.8827699578752697, "grad_norm": 0.055818814784288406, "learning_rate": 0.01, "loss": 2.1108, "step": 8592 }, { "epoch": 0.8830781876091647, "grad_norm": 0.16680215299129486, "learning_rate": 0.01, "loss": 2.0961, "step": 8595 }, { "epoch": 0.8833864173430597, "grad_norm": 0.10034742951393127, "learning_rate": 0.01, "loss": 2.1187, "step": 8598 }, { "epoch": 0.8836946470769547, "grad_norm": 0.0827341303229332, "learning_rate": 0.01, "loss": 2.1112, "step": 8601 }, { "epoch": 0.8840028768108497, "grad_norm": 0.07657956331968307, "learning_rate": 0.01, "loss": 2.0711, "step": 8604 }, { "epoch": 0.8843111065447447, "grad_norm": 0.036220960319042206, "learning_rate": 0.01, "loss": 2.1097, "step": 8607 }, { "epoch": 0.8846193362786396, "grad_norm": 0.04672658443450928, "learning_rate": 0.01, "loss": 2.1099, "step": 8610 }, { "epoch": 0.8849275660125346, "grad_norm": 0.04827800393104553, "learning_rate": 0.01, "loss": 2.1081, "step": 8613 }, { "epoch": 0.8852357957464296, "grad_norm": 0.04962724447250366, "learning_rate": 0.01, "loss": 2.0895, "step": 8616 }, { "epoch": 0.8855440254803246, "grad_norm": 0.03474809601902962, "learning_rate": 0.01, "loss": 2.0942, "step": 8619 }, { "epoch": 0.8858522552142196, "grad_norm": 0.07395246624946594, "learning_rate": 0.01, "loss": 2.1145, "step": 8622 }, { "epoch": 0.8861604849481146, "grad_norm": 0.09853484481573105, "learning_rate": 0.01, "loss": 2.0991, "step": 8625 }, { "epoch": 0.8864687146820096, "grad_norm": 0.11892013251781464, "learning_rate": 0.01, "loss": 2.0968, "step": 8628 }, { "epoch": 0.8867769444159046, "grad_norm": 0.12780621647834778, "learning_rate": 0.01, "loss": 2.1154, "step": 8631 }, { "epoch": 0.8870851741497997, "grad_norm": 0.04470033943653107, "learning_rate": 0.01, "loss": 2.1027, "step": 8634 }, { "epoch": 0.8873934038836947, "grad_norm": 0.054323747754096985, "learning_rate": 0.01, "loss": 2.0952, "step": 8637 }, { "epoch": 0.8877016336175897, "grad_norm": 0.08175788819789886, "learning_rate": 0.01, "loss": 2.0882, "step": 8640 }, { "epoch": 0.8880098633514847, "grad_norm": 0.07456079125404358, "learning_rate": 0.01, "loss": 2.141, "step": 8643 }, { "epoch": 0.8883180930853797, "grad_norm": 0.055910736322402954, "learning_rate": 0.01, "loss": 2.1102, "step": 8646 }, { "epoch": 0.8886263228192747, "grad_norm": 0.05231192335486412, "learning_rate": 0.01, "loss": 2.1026, "step": 8649 }, { "epoch": 0.8889345525531697, "grad_norm": 0.05306578800082207, "learning_rate": 0.01, "loss": 2.1051, "step": 8652 }, { "epoch": 0.8892427822870647, "grad_norm": 0.05569072067737579, "learning_rate": 0.01, "loss": 2.0835, "step": 8655 }, { "epoch": 0.8895510120209597, "grad_norm": 0.050971515476703644, "learning_rate": 0.01, "loss": 2.0718, "step": 8658 }, { "epoch": 0.8898592417548546, "grad_norm": 0.061436936259269714, "learning_rate": 0.01, "loss": 2.1167, "step": 8661 }, { "epoch": 0.8901674714887496, "grad_norm": 0.04307536780834198, "learning_rate": 0.01, "loss": 2.0972, "step": 8664 }, { "epoch": 0.8904757012226446, "grad_norm": 0.1459832638502121, "learning_rate": 0.01, "loss": 2.1306, "step": 8667 }, { "epoch": 0.8907839309565396, "grad_norm": 0.05527958646416664, "learning_rate": 0.01, "loss": 2.0974, "step": 8670 }, { "epoch": 0.8910921606904346, "grad_norm": 0.1319393813610077, "learning_rate": 0.01, "loss": 2.1259, "step": 8673 }, { "epoch": 0.8914003904243296, "grad_norm": 0.06124665215611458, "learning_rate": 0.01, "loss": 2.0997, "step": 8676 }, { "epoch": 0.8917086201582246, "grad_norm": 0.08667455613613129, "learning_rate": 0.01, "loss": 2.0941, "step": 8679 }, { "epoch": 0.8920168498921196, "grad_norm": 0.06631213426589966, "learning_rate": 0.01, "loss": 2.1196, "step": 8682 }, { "epoch": 0.8923250796260146, "grad_norm": 0.060188647359609604, "learning_rate": 0.01, "loss": 2.0971, "step": 8685 }, { "epoch": 0.8926333093599096, "grad_norm": 0.039312943816185, "learning_rate": 0.01, "loss": 2.1119, "step": 8688 }, { "epoch": 0.8929415390938046, "grad_norm": 0.03959662839770317, "learning_rate": 0.01, "loss": 2.0897, "step": 8691 }, { "epoch": 0.8932497688276996, "grad_norm": 0.09711046516895294, "learning_rate": 0.01, "loss": 2.1133, "step": 8694 }, { "epoch": 0.8935579985615946, "grad_norm": 0.07965920865535736, "learning_rate": 0.01, "loss": 2.0635, "step": 8697 }, { "epoch": 0.8938662282954896, "grad_norm": 0.08770687133073807, "learning_rate": 0.01, "loss": 2.0885, "step": 8700 }, { "epoch": 0.8941744580293846, "grad_norm": 0.04591045528650284, "learning_rate": 0.01, "loss": 2.0926, "step": 8703 }, { "epoch": 0.8944826877632795, "grad_norm": 0.09602218866348267, "learning_rate": 0.01, "loss": 2.0856, "step": 8706 }, { "epoch": 0.8947909174971745, "grad_norm": 0.09482742100954056, "learning_rate": 0.01, "loss": 2.0966, "step": 8709 }, { "epoch": 0.8950991472310695, "grad_norm": 0.03937089815735817, "learning_rate": 0.01, "loss": 2.1043, "step": 8712 }, { "epoch": 0.8954073769649645, "grad_norm": 0.056832704693078995, "learning_rate": 0.01, "loss": 2.1165, "step": 8715 }, { "epoch": 0.8957156066988595, "grad_norm": 0.06370353698730469, "learning_rate": 0.01, "loss": 2.1144, "step": 8718 }, { "epoch": 0.8960238364327545, "grad_norm": 0.06752549856901169, "learning_rate": 0.01, "loss": 2.1026, "step": 8721 }, { "epoch": 0.8963320661666495, "grad_norm": 0.13301892578601837, "learning_rate": 0.01, "loss": 2.11, "step": 8724 }, { "epoch": 0.8966402959005445, "grad_norm": 0.05210836976766586, "learning_rate": 0.01, "loss": 2.0925, "step": 8727 }, { "epoch": 0.8969485256344395, "grad_norm": 0.03570270165801048, "learning_rate": 0.01, "loss": 2.0809, "step": 8730 }, { "epoch": 0.8972567553683345, "grad_norm": 0.05898820236325264, "learning_rate": 0.01, "loss": 2.0786, "step": 8733 }, { "epoch": 0.8975649851022295, "grad_norm": 0.05087563395500183, "learning_rate": 0.01, "loss": 2.1071, "step": 8736 }, { "epoch": 0.8978732148361245, "grad_norm": 0.09473355114459991, "learning_rate": 0.01, "loss": 2.103, "step": 8739 }, { "epoch": 0.8981814445700195, "grad_norm": 0.09793075919151306, "learning_rate": 0.01, "loss": 2.0972, "step": 8742 }, { "epoch": 0.8984896743039145, "grad_norm": 0.05115204304456711, "learning_rate": 0.01, "loss": 2.0979, "step": 8745 }, { "epoch": 0.8987979040378095, "grad_norm": 0.057413987815380096, "learning_rate": 0.01, "loss": 2.1156, "step": 8748 }, { "epoch": 0.8991061337717046, "grad_norm": 0.04136224836111069, "learning_rate": 0.01, "loss": 2.1269, "step": 8751 }, { "epoch": 0.8994143635055996, "grad_norm": 0.06866753846406937, "learning_rate": 0.01, "loss": 2.1092, "step": 8754 }, { "epoch": 0.8997225932394946, "grad_norm": 0.0757627934217453, "learning_rate": 0.01, "loss": 2.0933, "step": 8757 }, { "epoch": 0.9000308229733895, "grad_norm": 0.08082983642816544, "learning_rate": 0.01, "loss": 2.1124, "step": 8760 }, { "epoch": 0.9003390527072845, "grad_norm": 0.046828944236040115, "learning_rate": 0.01, "loss": 2.0978, "step": 8763 }, { "epoch": 0.9006472824411795, "grad_norm": 0.11039458215236664, "learning_rate": 0.01, "loss": 2.0989, "step": 8766 }, { "epoch": 0.9009555121750745, "grad_norm": 0.048537638038396835, "learning_rate": 0.01, "loss": 2.0946, "step": 8769 }, { "epoch": 0.9012637419089695, "grad_norm": 0.06700310111045837, "learning_rate": 0.01, "loss": 2.1184, "step": 8772 }, { "epoch": 0.9015719716428645, "grad_norm": 0.044369909912347794, "learning_rate": 0.01, "loss": 2.1026, "step": 8775 }, { "epoch": 0.9018802013767595, "grad_norm": 0.041071876883506775, "learning_rate": 0.01, "loss": 2.0774, "step": 8778 }, { "epoch": 0.9021884311106545, "grad_norm": 0.04735315591096878, "learning_rate": 0.01, "loss": 2.0812, "step": 8781 }, { "epoch": 0.9024966608445495, "grad_norm": 0.11621284484863281, "learning_rate": 0.01, "loss": 2.0766, "step": 8784 }, { "epoch": 0.9028048905784445, "grad_norm": 0.11453153938055038, "learning_rate": 0.01, "loss": 2.0866, "step": 8787 }, { "epoch": 0.9031131203123395, "grad_norm": 0.057418763637542725, "learning_rate": 0.01, "loss": 2.081, "step": 8790 }, { "epoch": 0.9034213500462345, "grad_norm": 0.041579000651836395, "learning_rate": 0.01, "loss": 2.1154, "step": 8793 }, { "epoch": 0.9037295797801295, "grad_norm": 0.045673951506614685, "learning_rate": 0.01, "loss": 2.1241, "step": 8796 }, { "epoch": 0.9040378095140245, "grad_norm": 0.05963718518614769, "learning_rate": 0.01, "loss": 2.0955, "step": 8799 }, { "epoch": 0.9043460392479195, "grad_norm": 0.04776541888713837, "learning_rate": 0.01, "loss": 2.1138, "step": 8802 }, { "epoch": 0.9046542689818144, "grad_norm": 0.09103482216596603, "learning_rate": 0.01, "loss": 2.1192, "step": 8805 }, { "epoch": 0.9049624987157094, "grad_norm": 0.09218809008598328, "learning_rate": 0.01, "loss": 2.0985, "step": 8808 }, { "epoch": 0.9052707284496044, "grad_norm": 0.10253725945949554, "learning_rate": 0.01, "loss": 2.1189, "step": 8811 }, { "epoch": 0.9055789581834994, "grad_norm": 0.09638465940952301, "learning_rate": 0.01, "loss": 2.1008, "step": 8814 }, { "epoch": 0.9058871879173944, "grad_norm": 0.0947449579834938, "learning_rate": 0.01, "loss": 2.1222, "step": 8817 }, { "epoch": 0.9061954176512894, "grad_norm": 0.04588090255856514, "learning_rate": 0.01, "loss": 2.1198, "step": 8820 }, { "epoch": 0.9065036473851844, "grad_norm": 0.05041109770536423, "learning_rate": 0.01, "loss": 2.0843, "step": 8823 }, { "epoch": 0.9068118771190794, "grad_norm": 0.038898076862096786, "learning_rate": 0.01, "loss": 2.125, "step": 8826 }, { "epoch": 0.9071201068529744, "grad_norm": 0.03356321156024933, "learning_rate": 0.01, "loss": 2.0985, "step": 8829 }, { "epoch": 0.9074283365868694, "grad_norm": 0.04668448120355606, "learning_rate": 0.01, "loss": 2.1071, "step": 8832 }, { "epoch": 0.9077365663207644, "grad_norm": 0.051277391612529755, "learning_rate": 0.01, "loss": 2.0702, "step": 8835 }, { "epoch": 0.9080447960546594, "grad_norm": 0.049883171916007996, "learning_rate": 0.01, "loss": 2.1111, "step": 8838 }, { "epoch": 0.9083530257885544, "grad_norm": 0.04149313643574715, "learning_rate": 0.01, "loss": 2.0991, "step": 8841 }, { "epoch": 0.9086612555224494, "grad_norm": 0.09206261485815048, "learning_rate": 0.01, "loss": 2.0961, "step": 8844 }, { "epoch": 0.9089694852563444, "grad_norm": 0.1830751895904541, "learning_rate": 0.01, "loss": 2.1093, "step": 8847 }, { "epoch": 0.9092777149902393, "grad_norm": 0.0757865458726883, "learning_rate": 0.01, "loss": 2.115, "step": 8850 }, { "epoch": 0.9095859447241343, "grad_norm": 0.06030673533678055, "learning_rate": 0.01, "loss": 2.0874, "step": 8853 }, { "epoch": 0.9098941744580293, "grad_norm": 0.03440079465508461, "learning_rate": 0.01, "loss": 2.0997, "step": 8856 }, { "epoch": 0.9102024041919243, "grad_norm": 0.040004558861255646, "learning_rate": 0.01, "loss": 2.0767, "step": 8859 }, { "epoch": 0.9105106339258193, "grad_norm": 0.033261023461818695, "learning_rate": 0.01, "loss": 2.0834, "step": 8862 }, { "epoch": 0.9108188636597144, "grad_norm": 0.04814066365361214, "learning_rate": 0.01, "loss": 2.0868, "step": 8865 }, { "epoch": 0.9111270933936094, "grad_norm": 0.04939806088805199, "learning_rate": 0.01, "loss": 2.0944, "step": 8868 }, { "epoch": 0.9114353231275044, "grad_norm": 0.05242007225751877, "learning_rate": 0.01, "loss": 2.1035, "step": 8871 }, { "epoch": 0.9117435528613994, "grad_norm": 0.04576495289802551, "learning_rate": 0.01, "loss": 2.0881, "step": 8874 }, { "epoch": 0.9120517825952944, "grad_norm": 0.0369776152074337, "learning_rate": 0.01, "loss": 2.1017, "step": 8877 }, { "epoch": 0.9123600123291894, "grad_norm": 0.08296829462051392, "learning_rate": 0.01, "loss": 2.1199, "step": 8880 }, { "epoch": 0.9126682420630844, "grad_norm": 0.07186676561832428, "learning_rate": 0.01, "loss": 2.0906, "step": 8883 }, { "epoch": 0.9129764717969794, "grad_norm": 0.06849399209022522, "learning_rate": 0.01, "loss": 2.0944, "step": 8886 }, { "epoch": 0.9132847015308744, "grad_norm": 0.1285102367401123, "learning_rate": 0.01, "loss": 2.0959, "step": 8889 }, { "epoch": 0.9135929312647694, "grad_norm": 0.045700203627347946, "learning_rate": 0.01, "loss": 2.0924, "step": 8892 }, { "epoch": 0.9139011609986644, "grad_norm": 0.04561945050954819, "learning_rate": 0.01, "loss": 2.1126, "step": 8895 }, { "epoch": 0.9142093907325594, "grad_norm": 0.0417817123234272, "learning_rate": 0.01, "loss": 2.0692, "step": 8898 }, { "epoch": 0.9145176204664544, "grad_norm": 0.07923369109630585, "learning_rate": 0.01, "loss": 2.1059, "step": 8901 }, { "epoch": 0.9148258502003493, "grad_norm": 0.052836060523986816, "learning_rate": 0.01, "loss": 2.1089, "step": 8904 }, { "epoch": 0.9151340799342443, "grad_norm": 0.04591790586709976, "learning_rate": 0.01, "loss": 2.1007, "step": 8907 }, { "epoch": 0.9154423096681393, "grad_norm": 0.09871240705251694, "learning_rate": 0.01, "loss": 2.0718, "step": 8910 }, { "epoch": 0.9157505394020343, "grad_norm": 0.044554613530635834, "learning_rate": 0.01, "loss": 2.0956, "step": 8913 }, { "epoch": 0.9160587691359293, "grad_norm": 0.10009585320949554, "learning_rate": 0.01, "loss": 2.0838, "step": 8916 }, { "epoch": 0.9163669988698243, "grad_norm": 0.07252159714698792, "learning_rate": 0.01, "loss": 2.0973, "step": 8919 }, { "epoch": 0.9166752286037193, "grad_norm": 0.09162852168083191, "learning_rate": 0.01, "loss": 2.0961, "step": 8922 }, { "epoch": 0.9169834583376143, "grad_norm": 0.06149733439087868, "learning_rate": 0.01, "loss": 2.1377, "step": 8925 }, { "epoch": 0.9172916880715093, "grad_norm": 0.09315814077854156, "learning_rate": 0.01, "loss": 2.0901, "step": 8928 }, { "epoch": 0.9175999178054043, "grad_norm": 0.056877728551626205, "learning_rate": 0.01, "loss": 2.0934, "step": 8931 }, { "epoch": 0.9179081475392993, "grad_norm": 0.0976705476641655, "learning_rate": 0.01, "loss": 2.0791, "step": 8934 }, { "epoch": 0.9182163772731943, "grad_norm": 0.0493176206946373, "learning_rate": 0.01, "loss": 2.0937, "step": 8937 }, { "epoch": 0.9185246070070893, "grad_norm": 0.06268187612295151, "learning_rate": 0.01, "loss": 2.1053, "step": 8940 }, { "epoch": 0.9188328367409843, "grad_norm": 0.049251820892095566, "learning_rate": 0.01, "loss": 2.1104, "step": 8943 }, { "epoch": 0.9191410664748793, "grad_norm": 0.05342431366443634, "learning_rate": 0.01, "loss": 2.1005, "step": 8946 }, { "epoch": 0.9194492962087742, "grad_norm": 0.036090634763240814, "learning_rate": 0.01, "loss": 2.0815, "step": 8949 }, { "epoch": 0.9197575259426692, "grad_norm": 0.0320359505712986, "learning_rate": 0.01, "loss": 2.0704, "step": 8952 }, { "epoch": 0.9200657556765642, "grad_norm": 0.03514352813363075, "learning_rate": 0.01, "loss": 2.1046, "step": 8955 }, { "epoch": 0.9203739854104592, "grad_norm": 0.06132291629910469, "learning_rate": 0.01, "loss": 2.0887, "step": 8958 }, { "epoch": 0.9206822151443542, "grad_norm": 0.07312822341918945, "learning_rate": 0.01, "loss": 2.1079, "step": 8961 }, { "epoch": 0.9209904448782492, "grad_norm": 0.09670150279998779, "learning_rate": 0.01, "loss": 2.1195, "step": 8964 }, { "epoch": 0.9212986746121442, "grad_norm": 0.1106385663151741, "learning_rate": 0.01, "loss": 2.0809, "step": 8967 }, { "epoch": 0.9216069043460392, "grad_norm": 0.05964332073926926, "learning_rate": 0.01, "loss": 2.1108, "step": 8970 }, { "epoch": 0.9219151340799342, "grad_norm": 0.05584556236863136, "learning_rate": 0.01, "loss": 2.1274, "step": 8973 }, { "epoch": 0.9222233638138292, "grad_norm": 0.04485652595758438, "learning_rate": 0.01, "loss": 2.0627, "step": 8976 }, { "epoch": 0.9225315935477242, "grad_norm": 0.07286686450242996, "learning_rate": 0.01, "loss": 2.1087, "step": 8979 }, { "epoch": 0.9228398232816193, "grad_norm": 0.10815869271755219, "learning_rate": 0.01, "loss": 2.1057, "step": 8982 }, { "epoch": 0.9231480530155143, "grad_norm": 0.1037832722067833, "learning_rate": 0.01, "loss": 2.0836, "step": 8985 }, { "epoch": 0.9234562827494093, "grad_norm": 0.08297618478536606, "learning_rate": 0.01, "loss": 2.1181, "step": 8988 }, { "epoch": 0.9237645124833043, "grad_norm": 0.04203306511044502, "learning_rate": 0.01, "loss": 2.1112, "step": 8991 }, { "epoch": 0.9240727422171993, "grad_norm": 0.06641580909490585, "learning_rate": 0.01, "loss": 2.1004, "step": 8994 }, { "epoch": 0.9243809719510943, "grad_norm": 0.04921744763851166, "learning_rate": 0.01, "loss": 2.1116, "step": 8997 }, { "epoch": 0.9246892016849892, "grad_norm": 0.03472235053777695, "learning_rate": 0.01, "loss": 2.0777, "step": 9000 }, { "epoch": 0.9249974314188842, "grad_norm": 0.03650922700762749, "learning_rate": 0.01, "loss": 2.0802, "step": 9003 }, { "epoch": 0.9253056611527792, "grad_norm": 0.04657342657446861, "learning_rate": 0.01, "loss": 2.0773, "step": 9006 }, { "epoch": 0.9256138908866742, "grad_norm": 0.05943501368165016, "learning_rate": 0.01, "loss": 2.0753, "step": 9009 }, { "epoch": 0.9259221206205692, "grad_norm": 0.04763554409146309, "learning_rate": 0.01, "loss": 2.0959, "step": 9012 }, { "epoch": 0.9262303503544642, "grad_norm": 0.1267511248588562, "learning_rate": 0.01, "loss": 2.0971, "step": 9015 }, { "epoch": 0.9265385800883592, "grad_norm": 0.055529460310935974, "learning_rate": 0.01, "loss": 2.1327, "step": 9018 }, { "epoch": 0.9268468098222542, "grad_norm": 0.15508927404880524, "learning_rate": 0.01, "loss": 2.0947, "step": 9021 }, { "epoch": 0.9271550395561492, "grad_norm": 0.0593777671456337, "learning_rate": 0.01, "loss": 2.1171, "step": 9024 }, { "epoch": 0.9274632692900442, "grad_norm": 0.08907107263803482, "learning_rate": 0.01, "loss": 2.093, "step": 9027 }, { "epoch": 0.9277714990239392, "grad_norm": 0.07041808217763901, "learning_rate": 0.01, "loss": 2.0676, "step": 9030 }, { "epoch": 0.9280797287578342, "grad_norm": 0.03434208780527115, "learning_rate": 0.01, "loss": 2.0928, "step": 9033 }, { "epoch": 0.9283879584917292, "grad_norm": 0.07591548562049866, "learning_rate": 0.01, "loss": 2.0857, "step": 9036 }, { "epoch": 0.9286961882256242, "grad_norm": 0.08999443799257278, "learning_rate": 0.01, "loss": 2.0984, "step": 9039 }, { "epoch": 0.9290044179595192, "grad_norm": 0.11046464741230011, "learning_rate": 0.01, "loss": 2.1009, "step": 9042 }, { "epoch": 0.9293126476934142, "grad_norm": 0.08271370083093643, "learning_rate": 0.01, "loss": 2.1027, "step": 9045 }, { "epoch": 0.9296208774273091, "grad_norm": 0.046337299048900604, "learning_rate": 0.01, "loss": 2.0826, "step": 9048 }, { "epoch": 0.9299291071612041, "grad_norm": 0.037284769117832184, "learning_rate": 0.01, "loss": 2.1015, "step": 9051 }, { "epoch": 0.9302373368950991, "grad_norm": 0.04956496134400368, "learning_rate": 0.01, "loss": 2.1036, "step": 9054 }, { "epoch": 0.9305455666289941, "grad_norm": 0.12329571694135666, "learning_rate": 0.01, "loss": 2.0917, "step": 9057 }, { "epoch": 0.9308537963628891, "grad_norm": 0.06971380859613419, "learning_rate": 0.01, "loss": 2.114, "step": 9060 }, { "epoch": 0.9311620260967841, "grad_norm": 0.06084508076310158, "learning_rate": 0.01, "loss": 2.1122, "step": 9063 }, { "epoch": 0.9314702558306791, "grad_norm": 0.049602411687374115, "learning_rate": 0.01, "loss": 2.1268, "step": 9066 }, { "epoch": 0.9317784855645741, "grad_norm": 0.05200349539518356, "learning_rate": 0.01, "loss": 2.0979, "step": 9069 }, { "epoch": 0.9320867152984691, "grad_norm": 0.05793909728527069, "learning_rate": 0.01, "loss": 2.096, "step": 9072 }, { "epoch": 0.9323949450323641, "grad_norm": 0.10819883644580841, "learning_rate": 0.01, "loss": 2.1096, "step": 9075 }, { "epoch": 0.9327031747662591, "grad_norm": 0.07809442281723022, "learning_rate": 0.01, "loss": 2.0968, "step": 9078 }, { "epoch": 0.9330114045001541, "grad_norm": 0.09595733880996704, "learning_rate": 0.01, "loss": 2.0769, "step": 9081 }, { "epoch": 0.9333196342340491, "grad_norm": 0.11658616364002228, "learning_rate": 0.01, "loss": 2.0945, "step": 9084 }, { "epoch": 0.9336278639679441, "grad_norm": 0.07642678171396255, "learning_rate": 0.01, "loss": 2.0811, "step": 9087 }, { "epoch": 0.933936093701839, "grad_norm": 0.03174865245819092, "learning_rate": 0.01, "loss": 2.1017, "step": 9090 }, { "epoch": 0.934244323435734, "grad_norm": 0.05137626454234123, "learning_rate": 0.01, "loss": 2.0878, "step": 9093 }, { "epoch": 0.9345525531696292, "grad_norm": 0.05306951329112053, "learning_rate": 0.01, "loss": 2.1163, "step": 9096 }, { "epoch": 0.9348607829035241, "grad_norm": 0.0716642439365387, "learning_rate": 0.01, "loss": 2.0903, "step": 9099 }, { "epoch": 0.9351690126374191, "grad_norm": 0.10328514873981476, "learning_rate": 0.01, "loss": 2.0789, "step": 9102 }, { "epoch": 0.9354772423713141, "grad_norm": 0.04914560168981552, "learning_rate": 0.01, "loss": 2.0963, "step": 9105 }, { "epoch": 0.9357854721052091, "grad_norm": 0.04810576140880585, "learning_rate": 0.01, "loss": 2.1119, "step": 9108 }, { "epoch": 0.9360937018391041, "grad_norm": 0.05689787119626999, "learning_rate": 0.01, "loss": 2.0955, "step": 9111 }, { "epoch": 0.9364019315729991, "grad_norm": 0.06455382704734802, "learning_rate": 0.01, "loss": 2.0894, "step": 9114 }, { "epoch": 0.9367101613068941, "grad_norm": 0.044911760836839676, "learning_rate": 0.01, "loss": 2.0967, "step": 9117 }, { "epoch": 0.9370183910407891, "grad_norm": 0.06244887784123421, "learning_rate": 0.01, "loss": 2.0921, "step": 9120 }, { "epoch": 0.9373266207746841, "grad_norm": 0.052621614187955856, "learning_rate": 0.01, "loss": 2.1296, "step": 9123 }, { "epoch": 0.9376348505085791, "grad_norm": 0.05098232626914978, "learning_rate": 0.01, "loss": 2.0807, "step": 9126 }, { "epoch": 0.9379430802424741, "grad_norm": 0.058582011610269547, "learning_rate": 0.01, "loss": 2.0973, "step": 9129 }, { "epoch": 0.9382513099763691, "grad_norm": 0.10984500497579575, "learning_rate": 0.01, "loss": 2.0789, "step": 9132 }, { "epoch": 0.9385595397102641, "grad_norm": 0.045173123478889465, "learning_rate": 0.01, "loss": 2.0937, "step": 9135 }, { "epoch": 0.9388677694441591, "grad_norm": 0.06749478727579117, "learning_rate": 0.01, "loss": 2.1051, "step": 9138 }, { "epoch": 0.939175999178054, "grad_norm": 0.06236808001995087, "learning_rate": 0.01, "loss": 2.1099, "step": 9141 }, { "epoch": 0.939484228911949, "grad_norm": 0.06205837428569794, "learning_rate": 0.01, "loss": 2.0893, "step": 9144 }, { "epoch": 0.939792458645844, "grad_norm": 0.0742972195148468, "learning_rate": 0.01, "loss": 2.1034, "step": 9147 }, { "epoch": 0.940100688379739, "grad_norm": 0.06998419016599655, "learning_rate": 0.01, "loss": 2.0558, "step": 9150 }, { "epoch": 0.940408918113634, "grad_norm": 0.04214362055063248, "learning_rate": 0.01, "loss": 2.0968, "step": 9153 }, { "epoch": 0.940717147847529, "grad_norm": 0.055913276970386505, "learning_rate": 0.01, "loss": 2.0736, "step": 9156 }, { "epoch": 0.941025377581424, "grad_norm": 0.0941486805677414, "learning_rate": 0.01, "loss": 2.1038, "step": 9159 }, { "epoch": 0.941333607315319, "grad_norm": 0.05609782040119171, "learning_rate": 0.01, "loss": 2.096, "step": 9162 }, { "epoch": 0.941641837049214, "grad_norm": 0.05714662745594978, "learning_rate": 0.01, "loss": 2.0939, "step": 9165 }, { "epoch": 0.941950066783109, "grad_norm": 0.05364496633410454, "learning_rate": 0.01, "loss": 2.0838, "step": 9168 }, { "epoch": 0.942258296517004, "grad_norm": 0.050090350210666656, "learning_rate": 0.01, "loss": 2.087, "step": 9171 }, { "epoch": 0.942566526250899, "grad_norm": 0.07287559658288956, "learning_rate": 0.01, "loss": 2.098, "step": 9174 }, { "epoch": 0.942874755984794, "grad_norm": 0.04061901941895485, "learning_rate": 0.01, "loss": 2.0677, "step": 9177 }, { "epoch": 0.943182985718689, "grad_norm": 0.10750306397676468, "learning_rate": 0.01, "loss": 2.1105, "step": 9180 }, { "epoch": 0.943491215452584, "grad_norm": 0.10353365540504456, "learning_rate": 0.01, "loss": 2.0712, "step": 9183 }, { "epoch": 0.943799445186479, "grad_norm": 0.07502592355012894, "learning_rate": 0.01, "loss": 2.1115, "step": 9186 }, { "epoch": 0.944107674920374, "grad_norm": 0.046962104737758636, "learning_rate": 0.01, "loss": 2.0937, "step": 9189 }, { "epoch": 0.944415904654269, "grad_norm": 0.05084332078695297, "learning_rate": 0.01, "loss": 2.0943, "step": 9192 }, { "epoch": 0.9447241343881639, "grad_norm": 0.0458371527493, "learning_rate": 0.01, "loss": 2.0967, "step": 9195 }, { "epoch": 0.9450323641220589, "grad_norm": 0.040458545088768005, "learning_rate": 0.01, "loss": 2.0949, "step": 9198 }, { "epoch": 0.9453405938559539, "grad_norm": 0.046158358454704285, "learning_rate": 0.01, "loss": 2.0912, "step": 9201 }, { "epoch": 0.9456488235898489, "grad_norm": 0.10080043226480484, "learning_rate": 0.01, "loss": 2.1, "step": 9204 }, { "epoch": 0.9459570533237439, "grad_norm": 0.07679333537817001, "learning_rate": 0.01, "loss": 2.1013, "step": 9207 }, { "epoch": 0.9462652830576389, "grad_norm": 0.07189175486564636, "learning_rate": 0.01, "loss": 2.1008, "step": 9210 }, { "epoch": 0.946573512791534, "grad_norm": 0.07828579097986221, "learning_rate": 0.01, "loss": 2.1063, "step": 9213 }, { "epoch": 0.946881742525429, "grad_norm": 0.07649674266576767, "learning_rate": 0.01, "loss": 2.1146, "step": 9216 }, { "epoch": 0.947189972259324, "grad_norm": 0.06558651477098465, "learning_rate": 0.01, "loss": 2.0705, "step": 9219 }, { "epoch": 0.947498201993219, "grad_norm": 0.03276702016592026, "learning_rate": 0.01, "loss": 2.1065, "step": 9222 }, { "epoch": 0.947806431727114, "grad_norm": 0.03779645636677742, "learning_rate": 0.01, "loss": 2.0924, "step": 9225 }, { "epoch": 0.948114661461009, "grad_norm": 0.048466913402080536, "learning_rate": 0.01, "loss": 2.1037, "step": 9228 }, { "epoch": 0.948422891194904, "grad_norm": 0.04391203075647354, "learning_rate": 0.01, "loss": 2.0722, "step": 9231 }, { "epoch": 0.948731120928799, "grad_norm": 0.11353743076324463, "learning_rate": 0.01, "loss": 2.113, "step": 9234 }, { "epoch": 0.949039350662694, "grad_norm": 0.045930709689855576, "learning_rate": 0.01, "loss": 2.0699, "step": 9237 }, { "epoch": 0.949347580396589, "grad_norm": 0.06440164893865585, "learning_rate": 0.01, "loss": 2.0786, "step": 9240 }, { "epoch": 0.949655810130484, "grad_norm": 0.08666238933801651, "learning_rate": 0.01, "loss": 2.1049, "step": 9243 }, { "epoch": 0.9499640398643789, "grad_norm": 0.11012524366378784, "learning_rate": 0.01, "loss": 2.1018, "step": 9246 }, { "epoch": 0.9502722695982739, "grad_norm": 0.047307875007390976, "learning_rate": 0.01, "loss": 2.0943, "step": 9249 }, { "epoch": 0.9505804993321689, "grad_norm": 0.04565277695655823, "learning_rate": 0.01, "loss": 2.1174, "step": 9252 }, { "epoch": 0.9508887290660639, "grad_norm": 0.03389623388648033, "learning_rate": 0.01, "loss": 2.0896, "step": 9255 }, { "epoch": 0.9511969587999589, "grad_norm": 0.04582008346915245, "learning_rate": 0.01, "loss": 2.0888, "step": 9258 }, { "epoch": 0.9515051885338539, "grad_norm": 0.07722247391939163, "learning_rate": 0.01, "loss": 2.0843, "step": 9261 }, { "epoch": 0.9518134182677489, "grad_norm": 0.03505149856209755, "learning_rate": 0.01, "loss": 2.0903, "step": 9264 }, { "epoch": 0.9521216480016439, "grad_norm": 0.08010539412498474, "learning_rate": 0.01, "loss": 2.1249, "step": 9267 }, { "epoch": 0.9524298777355389, "grad_norm": 0.0723007321357727, "learning_rate": 0.01, "loss": 2.0951, "step": 9270 }, { "epoch": 0.9527381074694339, "grad_norm": 0.05629736930131912, "learning_rate": 0.01, "loss": 2.0948, "step": 9273 }, { "epoch": 0.9530463372033289, "grad_norm": 0.05514506623148918, "learning_rate": 0.01, "loss": 2.1214, "step": 9276 }, { "epoch": 0.9533545669372239, "grad_norm": 0.1107834130525589, "learning_rate": 0.01, "loss": 2.0876, "step": 9279 }, { "epoch": 0.9536627966711189, "grad_norm": 0.046309590339660645, "learning_rate": 0.01, "loss": 2.0669, "step": 9282 }, { "epoch": 0.9539710264050139, "grad_norm": 0.06956466287374496, "learning_rate": 0.01, "loss": 2.0903, "step": 9285 }, { "epoch": 0.9542792561389088, "grad_norm": 0.086011603474617, "learning_rate": 0.01, "loss": 2.0896, "step": 9288 }, { "epoch": 0.9545874858728038, "grad_norm": 0.04768074303865433, "learning_rate": 0.01, "loss": 2.0923, "step": 9291 }, { "epoch": 0.9548957156066988, "grad_norm": 0.0958017110824585, "learning_rate": 0.01, "loss": 2.1134, "step": 9294 }, { "epoch": 0.9552039453405938, "grad_norm": 0.06098558008670807, "learning_rate": 0.01, "loss": 2.0775, "step": 9297 }, { "epoch": 0.9555121750744888, "grad_norm": 0.05258086323738098, "learning_rate": 0.01, "loss": 2.0998, "step": 9300 }, { "epoch": 0.9558204048083838, "grad_norm": 0.06664231419563293, "learning_rate": 0.01, "loss": 2.1215, "step": 9303 }, { "epoch": 0.9561286345422788, "grad_norm": 0.05491488054394722, "learning_rate": 0.01, "loss": 2.0837, "step": 9306 }, { "epoch": 0.9564368642761738, "grad_norm": 0.0436725877225399, "learning_rate": 0.01, "loss": 2.1268, "step": 9309 }, { "epoch": 0.9567450940100688, "grad_norm": 0.08737560361623764, "learning_rate": 0.01, "loss": 2.0901, "step": 9312 }, { "epoch": 0.9570533237439638, "grad_norm": 0.08130110800266266, "learning_rate": 0.01, "loss": 2.0766, "step": 9315 }, { "epoch": 0.9573615534778588, "grad_norm": 0.07826768606901169, "learning_rate": 0.01, "loss": 2.0836, "step": 9318 }, { "epoch": 0.9576697832117538, "grad_norm": 0.09330857545137405, "learning_rate": 0.01, "loss": 2.0794, "step": 9321 }, { "epoch": 0.9579780129456488, "grad_norm": 0.03914652019739151, "learning_rate": 0.01, "loss": 2.102, "step": 9324 }, { "epoch": 0.9582862426795438, "grad_norm": 0.03853154182434082, "learning_rate": 0.01, "loss": 2.0915, "step": 9327 }, { "epoch": 0.9585944724134389, "grad_norm": 0.07349935919046402, "learning_rate": 0.01, "loss": 2.0856, "step": 9330 }, { "epoch": 0.9589027021473339, "grad_norm": 0.1473885178565979, "learning_rate": 0.01, "loss": 2.0904, "step": 9333 }, { "epoch": 0.9592109318812289, "grad_norm": 0.11091527342796326, "learning_rate": 0.01, "loss": 2.0934, "step": 9336 }, { "epoch": 0.9595191616151239, "grad_norm": 0.0400085523724556, "learning_rate": 0.01, "loss": 2.0924, "step": 9339 }, { "epoch": 0.9598273913490188, "grad_norm": 0.05025499314069748, "learning_rate": 0.01, "loss": 2.0845, "step": 9342 }, { "epoch": 0.9601356210829138, "grad_norm": 0.03745681792497635, "learning_rate": 0.01, "loss": 2.1006, "step": 9345 }, { "epoch": 0.9604438508168088, "grad_norm": 0.05147318169474602, "learning_rate": 0.01, "loss": 2.0912, "step": 9348 }, { "epoch": 0.9607520805507038, "grad_norm": 0.06338364630937576, "learning_rate": 0.01, "loss": 2.1169, "step": 9351 }, { "epoch": 0.9610603102845988, "grad_norm": 0.09458258748054504, "learning_rate": 0.01, "loss": 2.1005, "step": 9354 }, { "epoch": 0.9613685400184938, "grad_norm": 0.09883291274309158, "learning_rate": 0.01, "loss": 2.0934, "step": 9357 }, { "epoch": 0.9616767697523888, "grad_norm": 0.048908524215221405, "learning_rate": 0.01, "loss": 2.0863, "step": 9360 }, { "epoch": 0.9619849994862838, "grad_norm": 0.11762084811925888, "learning_rate": 0.01, "loss": 2.1182, "step": 9363 }, { "epoch": 0.9622932292201788, "grad_norm": 0.0835133045911789, "learning_rate": 0.01, "loss": 2.0728, "step": 9366 }, { "epoch": 0.9626014589540738, "grad_norm": 0.0580466203391552, "learning_rate": 0.01, "loss": 2.0756, "step": 9369 }, { "epoch": 0.9629096886879688, "grad_norm": 0.051043394953012466, "learning_rate": 0.01, "loss": 2.0936, "step": 9372 }, { "epoch": 0.9632179184218638, "grad_norm": 0.1081843450665474, "learning_rate": 0.01, "loss": 2.107, "step": 9375 }, { "epoch": 0.9635261481557588, "grad_norm": 0.04656577482819557, "learning_rate": 0.01, "loss": 2.1084, "step": 9378 }, { "epoch": 0.9638343778896538, "grad_norm": 0.03988798335194588, "learning_rate": 0.01, "loss": 2.1015, "step": 9381 }, { "epoch": 0.9641426076235488, "grad_norm": 0.07686126232147217, "learning_rate": 0.01, "loss": 2.1417, "step": 9384 }, { "epoch": 0.9644508373574437, "grad_norm": 0.057407401502132416, "learning_rate": 0.01, "loss": 2.1191, "step": 9387 }, { "epoch": 0.9647590670913387, "grad_norm": 0.0947386845946312, "learning_rate": 0.01, "loss": 2.0796, "step": 9390 }, { "epoch": 0.9650672968252337, "grad_norm": 0.05064699798822403, "learning_rate": 0.01, "loss": 2.1001, "step": 9393 }, { "epoch": 0.9653755265591287, "grad_norm": 0.04948986694216728, "learning_rate": 0.01, "loss": 2.0736, "step": 9396 }, { "epoch": 0.9656837562930237, "grad_norm": 0.10736438632011414, "learning_rate": 0.01, "loss": 2.0939, "step": 9399 }, { "epoch": 0.9659919860269187, "grad_norm": 0.039317477494478226, "learning_rate": 0.01, "loss": 2.1077, "step": 9402 }, { "epoch": 0.9663002157608137, "grad_norm": 0.06933067739009857, "learning_rate": 0.01, "loss": 2.1056, "step": 9405 }, { "epoch": 0.9666084454947087, "grad_norm": 0.03649623692035675, "learning_rate": 0.01, "loss": 2.0838, "step": 9408 }, { "epoch": 0.9669166752286037, "grad_norm": 0.09309684485197067, "learning_rate": 0.01, "loss": 2.0913, "step": 9411 }, { "epoch": 0.9672249049624987, "grad_norm": 0.11532922834157944, "learning_rate": 0.01, "loss": 2.1127, "step": 9414 }, { "epoch": 0.9675331346963937, "grad_norm": 0.053582970052957535, "learning_rate": 0.01, "loss": 2.0812, "step": 9417 }, { "epoch": 0.9678413644302887, "grad_norm": 0.07581201195716858, "learning_rate": 0.01, "loss": 2.1148, "step": 9420 }, { "epoch": 0.9681495941641837, "grad_norm": 0.051002178341150284, "learning_rate": 0.01, "loss": 2.0834, "step": 9423 }, { "epoch": 0.9684578238980787, "grad_norm": 0.06385383754968643, "learning_rate": 0.01, "loss": 2.0826, "step": 9426 }, { "epoch": 0.9687660536319737, "grad_norm": 0.10576994717121124, "learning_rate": 0.01, "loss": 2.0768, "step": 9429 }, { "epoch": 0.9690742833658686, "grad_norm": 0.054983410984277725, "learning_rate": 0.01, "loss": 2.0604, "step": 9432 }, { "epoch": 0.9693825130997636, "grad_norm": 0.09159716218709946, "learning_rate": 0.01, "loss": 2.0613, "step": 9435 }, { "epoch": 0.9696907428336586, "grad_norm": 0.07718406617641449, "learning_rate": 0.01, "loss": 2.1132, "step": 9438 }, { "epoch": 0.9699989725675536, "grad_norm": 0.0788009986281395, "learning_rate": 0.01, "loss": 2.0887, "step": 9441 }, { "epoch": 0.9703072023014487, "grad_norm": 0.040717653930187225, "learning_rate": 0.01, "loss": 2.084, "step": 9444 }, { "epoch": 0.9706154320353437, "grad_norm": 0.09677381813526154, "learning_rate": 0.01, "loss": 2.0903, "step": 9447 }, { "epoch": 0.9709236617692387, "grad_norm": 0.0706525593996048, "learning_rate": 0.01, "loss": 2.0647, "step": 9450 }, { "epoch": 0.9712318915031337, "grad_norm": 0.04624510183930397, "learning_rate": 0.01, "loss": 2.0818, "step": 9453 }, { "epoch": 0.9715401212370287, "grad_norm": 0.04585500434041023, "learning_rate": 0.01, "loss": 2.0927, "step": 9456 }, { "epoch": 0.9718483509709237, "grad_norm": 0.03468145430088043, "learning_rate": 0.01, "loss": 2.0759, "step": 9459 }, { "epoch": 0.9721565807048187, "grad_norm": 0.06956649571657181, "learning_rate": 0.01, "loss": 2.092, "step": 9462 }, { "epoch": 0.9724648104387137, "grad_norm": 0.04509080946445465, "learning_rate": 0.01, "loss": 2.1095, "step": 9465 }, { "epoch": 0.9727730401726087, "grad_norm": 0.09959586709737778, "learning_rate": 0.01, "loss": 2.0921, "step": 9468 }, { "epoch": 0.9730812699065037, "grad_norm": 0.08427727967500687, "learning_rate": 0.01, "loss": 2.1031, "step": 9471 }, { "epoch": 0.9733894996403987, "grad_norm": 0.14798741042613983, "learning_rate": 0.01, "loss": 2.091, "step": 9474 }, { "epoch": 0.9736977293742937, "grad_norm": 0.057735662907361984, "learning_rate": 0.01, "loss": 2.0701, "step": 9477 }, { "epoch": 0.9740059591081887, "grad_norm": 0.04484837129712105, "learning_rate": 0.01, "loss": 2.1015, "step": 9480 }, { "epoch": 0.9743141888420837, "grad_norm": 0.04166285693645477, "learning_rate": 0.01, "loss": 2.1021, "step": 9483 }, { "epoch": 0.9746224185759786, "grad_norm": 0.05640358105301857, "learning_rate": 0.01, "loss": 2.0925, "step": 9486 }, { "epoch": 0.9749306483098736, "grad_norm": 0.040314216166734695, "learning_rate": 0.01, "loss": 2.0797, "step": 9489 }, { "epoch": 0.9752388780437686, "grad_norm": 0.04522860422730446, "learning_rate": 0.01, "loss": 2.0935, "step": 9492 }, { "epoch": 0.9755471077776636, "grad_norm": 0.03492886200547218, "learning_rate": 0.01, "loss": 2.0968, "step": 9495 }, { "epoch": 0.9758553375115586, "grad_norm": 0.03252703323960304, "learning_rate": 0.01, "loss": 2.125, "step": 9498 }, { "epoch": 0.9761635672454536, "grad_norm": 0.04002056270837784, "learning_rate": 0.01, "loss": 2.0651, "step": 9501 }, { "epoch": 0.9764717969793486, "grad_norm": 0.07364718616008759, "learning_rate": 0.01, "loss": 2.0629, "step": 9504 }, { "epoch": 0.9767800267132436, "grad_norm": 0.05577448755502701, "learning_rate": 0.01, "loss": 2.0726, "step": 9507 }, { "epoch": 0.9770882564471386, "grad_norm": 0.13259132206439972, "learning_rate": 0.01, "loss": 2.1075, "step": 9510 }, { "epoch": 0.9773964861810336, "grad_norm": 0.06911557912826538, "learning_rate": 0.01, "loss": 2.0887, "step": 9513 }, { "epoch": 0.9777047159149286, "grad_norm": 0.10592345148324966, "learning_rate": 0.01, "loss": 2.0982, "step": 9516 }, { "epoch": 0.9780129456488236, "grad_norm": 0.05682144686579704, "learning_rate": 0.01, "loss": 2.0961, "step": 9519 }, { "epoch": 0.9783211753827186, "grad_norm": 0.07456633448600769, "learning_rate": 0.01, "loss": 2.0983, "step": 9522 }, { "epoch": 0.9786294051166136, "grad_norm": 0.062031425535678864, "learning_rate": 0.01, "loss": 2.1163, "step": 9525 }, { "epoch": 0.9789376348505086, "grad_norm": 0.0570233091711998, "learning_rate": 0.01, "loss": 2.1046, "step": 9528 }, { "epoch": 0.9792458645844035, "grad_norm": 0.04668619483709335, "learning_rate": 0.01, "loss": 2.081, "step": 9531 }, { "epoch": 0.9795540943182985, "grad_norm": 0.04718153178691864, "learning_rate": 0.01, "loss": 2.0678, "step": 9534 }, { "epoch": 0.9798623240521935, "grad_norm": 0.054066251963377, "learning_rate": 0.01, "loss": 2.0911, "step": 9537 }, { "epoch": 0.9801705537860885, "grad_norm": 0.1274210512638092, "learning_rate": 0.01, "loss": 2.097, "step": 9540 }, { "epoch": 0.9804787835199835, "grad_norm": 0.07543773949146271, "learning_rate": 0.01, "loss": 2.0824, "step": 9543 }, { "epoch": 0.9807870132538785, "grad_norm": 0.07845018804073334, "learning_rate": 0.01, "loss": 2.0749, "step": 9546 }, { "epoch": 0.9810952429877735, "grad_norm": 0.08444254100322723, "learning_rate": 0.01, "loss": 2.1019, "step": 9549 }, { "epoch": 0.9814034727216685, "grad_norm": 0.07719142735004425, "learning_rate": 0.01, "loss": 2.0811, "step": 9552 }, { "epoch": 0.9817117024555635, "grad_norm": 0.05624673515558243, "learning_rate": 0.01, "loss": 2.0752, "step": 9555 }, { "epoch": 0.9820199321894585, "grad_norm": 0.0419309176504612, "learning_rate": 0.01, "loss": 2.0812, "step": 9558 }, { "epoch": 0.9823281619233536, "grad_norm": 0.0343257375061512, "learning_rate": 0.01, "loss": 2.0694, "step": 9561 }, { "epoch": 0.9826363916572486, "grad_norm": 0.059452395886182785, "learning_rate": 0.01, "loss": 2.0521, "step": 9564 }, { "epoch": 0.9829446213911436, "grad_norm": 0.09073518216609955, "learning_rate": 0.01, "loss": 2.0636, "step": 9567 }, { "epoch": 0.9832528511250386, "grad_norm": 0.10660509765148163, "learning_rate": 0.01, "loss": 2.0796, "step": 9570 }, { "epoch": 0.9835610808589336, "grad_norm": 0.04380667209625244, "learning_rate": 0.01, "loss": 2.0992, "step": 9573 }, { "epoch": 0.9838693105928286, "grad_norm": 0.06383811682462692, "learning_rate": 0.01, "loss": 2.0722, "step": 9576 }, { "epoch": 0.9841775403267236, "grad_norm": 0.07926032692193985, "learning_rate": 0.01, "loss": 2.0571, "step": 9579 }, { "epoch": 0.9844857700606185, "grad_norm": 0.05310386046767235, "learning_rate": 0.01, "loss": 2.0739, "step": 9582 }, { "epoch": 0.9847939997945135, "grad_norm": 0.03591843321919441, "learning_rate": 0.01, "loss": 2.0757, "step": 9585 }, { "epoch": 0.9851022295284085, "grad_norm": 0.04773431271314621, "learning_rate": 0.01, "loss": 2.0525, "step": 9588 }, { "epoch": 0.9854104592623035, "grad_norm": 0.04679710045456886, "learning_rate": 0.01, "loss": 2.0771, "step": 9591 }, { "epoch": 0.9857186889961985, "grad_norm": 0.05671774223446846, "learning_rate": 0.01, "loss": 2.1106, "step": 9594 }, { "epoch": 0.9860269187300935, "grad_norm": 0.049488577991724014, "learning_rate": 0.01, "loss": 2.0695, "step": 9597 }, { "epoch": 0.9863351484639885, "grad_norm": 0.04207129031419754, "learning_rate": 0.01, "loss": 2.0903, "step": 9600 }, { "epoch": 0.9866433781978835, "grad_norm": 0.10019747167825699, "learning_rate": 0.01, "loss": 2.073, "step": 9603 }, { "epoch": 0.9869516079317785, "grad_norm": 0.051381729543209076, "learning_rate": 0.01, "loss": 2.0626, "step": 9606 }, { "epoch": 0.9872598376656735, "grad_norm": 0.13477744162082672, "learning_rate": 0.01, "loss": 2.1098, "step": 9609 }, { "epoch": 0.9875680673995685, "grad_norm": 0.09002148360013962, "learning_rate": 0.01, "loss": 2.0927, "step": 9612 }, { "epoch": 0.9878762971334635, "grad_norm": 0.05230112001299858, "learning_rate": 0.01, "loss": 2.0902, "step": 9615 }, { "epoch": 0.9881845268673585, "grad_norm": 0.0639885738492012, "learning_rate": 0.01, "loss": 2.1179, "step": 9618 }, { "epoch": 0.9884927566012535, "grad_norm": 0.0553070530295372, "learning_rate": 0.01, "loss": 2.0923, "step": 9621 }, { "epoch": 0.9888009863351485, "grad_norm": 0.04541468620300293, "learning_rate": 0.01, "loss": 2.0965, "step": 9624 }, { "epoch": 0.9891092160690435, "grad_norm": 0.08656930178403854, "learning_rate": 0.01, "loss": 2.1038, "step": 9627 }, { "epoch": 0.9894174458029384, "grad_norm": 0.04954921826720238, "learning_rate": 0.01, "loss": 2.0759, "step": 9630 }, { "epoch": 0.9897256755368334, "grad_norm": 0.07971720397472382, "learning_rate": 0.01, "loss": 2.0837, "step": 9633 }, { "epoch": 0.9900339052707284, "grad_norm": 0.12388944625854492, "learning_rate": 0.01, "loss": 2.1181, "step": 9636 }, { "epoch": 0.9903421350046234, "grad_norm": 0.040693242102861404, "learning_rate": 0.01, "loss": 2.0806, "step": 9639 }, { "epoch": 0.9906503647385184, "grad_norm": 0.032711997628211975, "learning_rate": 0.01, "loss": 2.0925, "step": 9642 }, { "epoch": 0.9909585944724134, "grad_norm": 0.04089382663369179, "learning_rate": 0.01, "loss": 2.0841, "step": 9645 }, { "epoch": 0.9912668242063084, "grad_norm": 0.05480481684207916, "learning_rate": 0.01, "loss": 2.0769, "step": 9648 }, { "epoch": 0.9915750539402034, "grad_norm": 0.04627472907304764, "learning_rate": 0.01, "loss": 2.094, "step": 9651 }, { "epoch": 0.9918832836740984, "grad_norm": 0.0517272874712944, "learning_rate": 0.01, "loss": 2.1181, "step": 9654 }, { "epoch": 0.9921915134079934, "grad_norm": 0.051012761890888214, "learning_rate": 0.01, "loss": 2.0985, "step": 9657 }, { "epoch": 0.9924997431418884, "grad_norm": 0.08666348457336426, "learning_rate": 0.01, "loss": 2.0875, "step": 9660 }, { "epoch": 0.9928079728757834, "grad_norm": 0.0972173810005188, "learning_rate": 0.01, "loss": 2.0995, "step": 9663 }, { "epoch": 0.9931162026096784, "grad_norm": 0.0765865370631218, "learning_rate": 0.01, "loss": 2.0729, "step": 9666 }, { "epoch": 0.9934244323435734, "grad_norm": 0.04532674700021744, "learning_rate": 0.01, "loss": 2.0656, "step": 9669 }, { "epoch": 0.9937326620774684, "grad_norm": 0.08642619848251343, "learning_rate": 0.01, "loss": 2.1036, "step": 9672 }, { "epoch": 0.9940408918113633, "grad_norm": 0.04758689925074577, "learning_rate": 0.01, "loss": 2.0683, "step": 9675 }, { "epoch": 0.9943491215452585, "grad_norm": 0.07701463252305984, "learning_rate": 0.01, "loss": 2.0898, "step": 9678 }, { "epoch": 0.9946573512791534, "grad_norm": 0.05999990925192833, "learning_rate": 0.01, "loss": 2.0694, "step": 9681 }, { "epoch": 0.9949655810130484, "grad_norm": 0.08793257176876068, "learning_rate": 0.01, "loss": 2.0689, "step": 9684 }, { "epoch": 0.9952738107469434, "grad_norm": 0.06139199063181877, "learning_rate": 0.01, "loss": 2.0801, "step": 9687 }, { "epoch": 0.9955820404808384, "grad_norm": 0.09202239662408829, "learning_rate": 0.01, "loss": 2.0837, "step": 9690 }, { "epoch": 0.9958902702147334, "grad_norm": 0.09284163266420364, "learning_rate": 0.01, "loss": 2.107, "step": 9693 }, { "epoch": 0.9961984999486284, "grad_norm": 0.08113729953765869, "learning_rate": 0.01, "loss": 2.076, "step": 9696 }, { "epoch": 0.9965067296825234, "grad_norm": 0.10663104802370071, "learning_rate": 0.01, "loss": 2.0973, "step": 9699 }, { "epoch": 0.9968149594164184, "grad_norm": 0.11791951954364777, "learning_rate": 0.01, "loss": 2.0885, "step": 9702 }, { "epoch": 0.9971231891503134, "grad_norm": 0.09039194136857986, "learning_rate": 0.01, "loss": 2.0957, "step": 9705 }, { "epoch": 0.9974314188842084, "grad_norm": 0.08142858743667603, "learning_rate": 0.01, "loss": 2.0721, "step": 9708 }, { "epoch": 0.9977396486181034, "grad_norm": 0.07347192615270615, "learning_rate": 0.01, "loss": 2.0985, "step": 9711 }, { "epoch": 0.9980478783519984, "grad_norm": 0.04449746012687683, "learning_rate": 0.01, "loss": 2.0728, "step": 9714 }, { "epoch": 0.9983561080858934, "grad_norm": 0.040178634226322174, "learning_rate": 0.01, "loss": 2.0773, "step": 9717 }, { "epoch": 0.9986643378197884, "grad_norm": 0.0577414333820343, "learning_rate": 0.01, "loss": 2.0854, "step": 9720 }, { "epoch": 0.9989725675536834, "grad_norm": 0.07444582879543304, "learning_rate": 0.01, "loss": 2.0834, "step": 9723 }, { "epoch": 0.9992807972875783, "grad_norm": 0.10387948155403137, "learning_rate": 0.01, "loss": 2.0698, "step": 9726 }, { "epoch": 0.9995890270214733, "grad_norm": 0.11066528409719467, "learning_rate": 0.01, "loss": 2.1035, "step": 9729 }, { "epoch": 0.9998972567553683, "grad_norm": 0.06454616039991379, "learning_rate": 0.01, "loss": 2.0692, "step": 9732 }, { "epoch": 0.9990763546798029, "grad_norm": 0.048325520008802414, "learning_rate": 0.01, "loss": 2.1225, "step": 9735 }, { "epoch": 0.999384236453202, "grad_norm": 0.03542228788137436, "learning_rate": 0.01, "loss": 2.1024, "step": 9738 }, { "epoch": 0.999692118226601, "grad_norm": 0.042020559310913086, "learning_rate": 0.01, "loss": 2.0968, "step": 9741 }, { "epoch": 1.0, "grad_norm": 0.04916913062334061, "learning_rate": 0.01, "loss": 2.1244, "step": 9744 }, { "epoch": 1.000307881773399, "grad_norm": 0.08905553072690964, "learning_rate": 0.01, "loss": 2.0867, "step": 9747 }, { "epoch": 1.000615763546798, "grad_norm": 0.07140953093767166, "learning_rate": 0.01, "loss": 2.0863, "step": 9750 }, { "epoch": 1.000923645320197, "grad_norm": 0.05284767597913742, "learning_rate": 0.01, "loss": 2.1131, "step": 9753 }, { "epoch": 1.001231527093596, "grad_norm": 0.1293289214372635, "learning_rate": 0.01, "loss": 2.1036, "step": 9756 }, { "epoch": 1.001539408866995, "grad_norm": 0.06052086502313614, "learning_rate": 0.01, "loss": 2.1189, "step": 9759 }, { "epoch": 1.0018472906403941, "grad_norm": 0.07361391931772232, "learning_rate": 0.01, "loss": 2.0962, "step": 9762 }, { "epoch": 1.0021551724137931, "grad_norm": 0.06513562798500061, "learning_rate": 0.01, "loss": 2.129, "step": 9765 }, { "epoch": 1.0024630541871922, "grad_norm": 0.036649156361818314, "learning_rate": 0.01, "loss": 2.0964, "step": 9768 }, { "epoch": 1.0027709359605912, "grad_norm": 0.05371764674782753, "learning_rate": 0.01, "loss": 2.0976, "step": 9771 }, { "epoch": 1.0030788177339902, "grad_norm": 0.06316730380058289, "learning_rate": 0.01, "loss": 2.097, "step": 9774 }, { "epoch": 1.0033866995073892, "grad_norm": 0.03097986802458763, "learning_rate": 0.01, "loss": 2.1128, "step": 9777 }, { "epoch": 1.0036945812807883, "grad_norm": 0.046021945774555206, "learning_rate": 0.01, "loss": 2.1296, "step": 9780 }, { "epoch": 1.0040024630541873, "grad_norm": 0.06580191850662231, "learning_rate": 0.01, "loss": 2.1106, "step": 9783 }, { "epoch": 1.0043103448275863, "grad_norm": 0.054073531180620193, "learning_rate": 0.01, "loss": 2.0986, "step": 9786 }, { "epoch": 1.0046182266009853, "grad_norm": 0.10088641196489334, "learning_rate": 0.01, "loss": 2.1301, "step": 9789 }, { "epoch": 1.0049261083743843, "grad_norm": 0.03944807127118111, "learning_rate": 0.01, "loss": 2.1337, "step": 9792 }, { "epoch": 1.0052339901477831, "grad_norm": 0.07183028757572174, "learning_rate": 0.01, "loss": 2.1272, "step": 9795 }, { "epoch": 1.0055418719211822, "grad_norm": 0.13821956515312195, "learning_rate": 0.01, "loss": 2.1016, "step": 9798 }, { "epoch": 1.0058497536945812, "grad_norm": 0.14031893014907837, "learning_rate": 0.01, "loss": 2.0924, "step": 9801 }, { "epoch": 1.0061576354679802, "grad_norm": 0.06494525820016861, "learning_rate": 0.01, "loss": 2.128, "step": 9804 }, { "epoch": 1.0064655172413792, "grad_norm": 0.05946667864918709, "learning_rate": 0.01, "loss": 2.1335, "step": 9807 }, { "epoch": 1.0067733990147782, "grad_norm": 0.05583272874355316, "learning_rate": 0.01, "loss": 2.1186, "step": 9810 }, { "epoch": 1.0070812807881773, "grad_norm": 0.06858284026384354, "learning_rate": 0.01, "loss": 2.1207, "step": 9813 }, { "epoch": 1.0073891625615763, "grad_norm": 0.05864641070365906, "learning_rate": 0.01, "loss": 2.0869, "step": 9816 }, { "epoch": 1.0076970443349753, "grad_norm": 0.043661102652549744, "learning_rate": 0.01, "loss": 2.1067, "step": 9819 }, { "epoch": 1.0080049261083743, "grad_norm": 0.07878375053405762, "learning_rate": 0.01, "loss": 2.1149, "step": 9822 }, { "epoch": 1.0083128078817734, "grad_norm": 0.04246210679411888, "learning_rate": 0.01, "loss": 2.1241, "step": 9825 }, { "epoch": 1.0086206896551724, "grad_norm": 0.06508597731590271, "learning_rate": 0.01, "loss": 2.1232, "step": 9828 }, { "epoch": 1.0089285714285714, "grad_norm": 0.07472758740186691, "learning_rate": 0.01, "loss": 2.0893, "step": 9831 }, { "epoch": 1.0092364532019704, "grad_norm": 0.13144147396087646, "learning_rate": 0.01, "loss": 2.1194, "step": 9834 }, { "epoch": 1.0095443349753694, "grad_norm": 0.08961367607116699, "learning_rate": 0.01, "loss": 2.1215, "step": 9837 }, { "epoch": 1.0098522167487685, "grad_norm": 0.053439076989889145, "learning_rate": 0.01, "loss": 2.1173, "step": 9840 }, { "epoch": 1.0101600985221675, "grad_norm": 0.03234443441033363, "learning_rate": 0.01, "loss": 2.104, "step": 9843 }, { "epoch": 1.0104679802955665, "grad_norm": 0.07516933977603912, "learning_rate": 0.01, "loss": 2.1186, "step": 9846 }, { "epoch": 1.0107758620689655, "grad_norm": 0.12221794575452805, "learning_rate": 0.01, "loss": 2.0934, "step": 9849 }, { "epoch": 1.0110837438423645, "grad_norm": 0.08198120445013046, "learning_rate": 0.01, "loss": 2.1495, "step": 9852 }, { "epoch": 1.0113916256157636, "grad_norm": 0.058380696922540665, "learning_rate": 0.01, "loss": 2.1234, "step": 9855 }, { "epoch": 1.0116995073891626, "grad_norm": 0.04831172525882721, "learning_rate": 0.01, "loss": 2.0977, "step": 9858 }, { "epoch": 1.0120073891625616, "grad_norm": 0.045920804142951965, "learning_rate": 0.01, "loss": 2.0842, "step": 9861 }, { "epoch": 1.0123152709359606, "grad_norm": 0.12969541549682617, "learning_rate": 0.01, "loss": 2.1005, "step": 9864 }, { "epoch": 1.0126231527093597, "grad_norm": 0.09659627079963684, "learning_rate": 0.01, "loss": 2.1126, "step": 9867 }, { "epoch": 1.0129310344827587, "grad_norm": 0.033160608261823654, "learning_rate": 0.01, "loss": 2.1244, "step": 9870 }, { "epoch": 1.0132389162561577, "grad_norm": 0.03523699939250946, "learning_rate": 0.01, "loss": 2.1009, "step": 9873 }, { "epoch": 1.0135467980295567, "grad_norm": 0.04670235142111778, "learning_rate": 0.01, "loss": 2.1107, "step": 9876 }, { "epoch": 1.0138546798029557, "grad_norm": 0.05278048664331436, "learning_rate": 0.01, "loss": 2.133, "step": 9879 }, { "epoch": 1.0141625615763548, "grad_norm": 0.1409105509519577, "learning_rate": 0.01, "loss": 2.105, "step": 9882 }, { "epoch": 1.0144704433497538, "grad_norm": 0.08208174258470535, "learning_rate": 0.01, "loss": 2.1202, "step": 9885 }, { "epoch": 1.0147783251231528, "grad_norm": 0.052980221807956696, "learning_rate": 0.01, "loss": 2.1108, "step": 9888 }, { "epoch": 1.0150862068965518, "grad_norm": 0.03402642160654068, "learning_rate": 0.01, "loss": 2.1058, "step": 9891 }, { "epoch": 1.0153940886699508, "grad_norm": 0.05165582895278931, "learning_rate": 0.01, "loss": 2.0962, "step": 9894 }, { "epoch": 1.0157019704433496, "grad_norm": 0.0488906130194664, "learning_rate": 0.01, "loss": 2.1157, "step": 9897 }, { "epoch": 1.0160098522167487, "grad_norm": 0.06578544527292252, "learning_rate": 0.01, "loss": 2.0783, "step": 9900 }, { "epoch": 1.0163177339901477, "grad_norm": 0.05930023267865181, "learning_rate": 0.01, "loss": 2.115, "step": 9903 }, { "epoch": 1.0166256157635467, "grad_norm": 0.07461842894554138, "learning_rate": 0.01, "loss": 2.0833, "step": 9906 }, { "epoch": 1.0169334975369457, "grad_norm": 0.04523751139640808, "learning_rate": 0.01, "loss": 2.1062, "step": 9909 }, { "epoch": 1.0172413793103448, "grad_norm": 0.05342249572277069, "learning_rate": 0.01, "loss": 2.1127, "step": 9912 }, { "epoch": 1.0175492610837438, "grad_norm": 0.040748368948698044, "learning_rate": 0.01, "loss": 2.0893, "step": 9915 }, { "epoch": 1.0178571428571428, "grad_norm": 0.03435824438929558, "learning_rate": 0.01, "loss": 2.1151, "step": 9918 }, { "epoch": 1.0181650246305418, "grad_norm": 0.04769265651702881, "learning_rate": 0.01, "loss": 2.0984, "step": 9921 }, { "epoch": 1.0184729064039408, "grad_norm": 0.07814217358827591, "learning_rate": 0.01, "loss": 2.1038, "step": 9924 }, { "epoch": 1.0187807881773399, "grad_norm": 0.12953363358974457, "learning_rate": 0.01, "loss": 2.1051, "step": 9927 }, { "epoch": 1.0190886699507389, "grad_norm": 0.11376773566007614, "learning_rate": 0.01, "loss": 2.0989, "step": 9930 }, { "epoch": 1.019396551724138, "grad_norm": 0.05323106423020363, "learning_rate": 0.01, "loss": 2.1135, "step": 9933 }, { "epoch": 1.019704433497537, "grad_norm": 0.07705114781856537, "learning_rate": 0.01, "loss": 2.1046, "step": 9936 }, { "epoch": 1.020012315270936, "grad_norm": 0.05934451147913933, "learning_rate": 0.01, "loss": 2.1207, "step": 9939 }, { "epoch": 1.020320197044335, "grad_norm": 0.10474961996078491, "learning_rate": 0.01, "loss": 2.1134, "step": 9942 }, { "epoch": 1.020628078817734, "grad_norm": 0.05283385515213013, "learning_rate": 0.01, "loss": 2.1085, "step": 9945 }, { "epoch": 1.020935960591133, "grad_norm": 0.043369196355342865, "learning_rate": 0.01, "loss": 2.1265, "step": 9948 }, { "epoch": 1.021243842364532, "grad_norm": 0.0366055853664875, "learning_rate": 0.01, "loss": 2.1214, "step": 9951 }, { "epoch": 1.021551724137931, "grad_norm": 0.06200672313570976, "learning_rate": 0.01, "loss": 2.0943, "step": 9954 }, { "epoch": 1.02185960591133, "grad_norm": 0.06652572005987167, "learning_rate": 0.01, "loss": 2.1139, "step": 9957 }, { "epoch": 1.022167487684729, "grad_norm": 0.04040740057826042, "learning_rate": 0.01, "loss": 2.0894, "step": 9960 }, { "epoch": 1.0224753694581281, "grad_norm": 0.049162358045578, "learning_rate": 0.01, "loss": 2.0955, "step": 9963 }, { "epoch": 1.0227832512315271, "grad_norm": 0.05465700104832649, "learning_rate": 0.01, "loss": 2.1109, "step": 9966 }, { "epoch": 1.0230911330049262, "grad_norm": 0.0575067512691021, "learning_rate": 0.01, "loss": 2.0956, "step": 9969 }, { "epoch": 1.0233990147783252, "grad_norm": 0.14622198045253754, "learning_rate": 0.01, "loss": 2.1031, "step": 9972 }, { "epoch": 1.0237068965517242, "grad_norm": 0.04765618219971657, "learning_rate": 0.01, "loss": 2.0834, "step": 9975 }, { "epoch": 1.0240147783251232, "grad_norm": 0.04039911553263664, "learning_rate": 0.01, "loss": 2.0933, "step": 9978 }, { "epoch": 1.0243226600985222, "grad_norm": 0.06009029969573021, "learning_rate": 0.01, "loss": 2.115, "step": 9981 }, { "epoch": 1.0246305418719213, "grad_norm": 0.06187298893928528, "learning_rate": 0.01, "loss": 2.079, "step": 9984 }, { "epoch": 1.0249384236453203, "grad_norm": 0.05368026718497276, "learning_rate": 0.01, "loss": 2.0875, "step": 9987 }, { "epoch": 1.0252463054187193, "grad_norm": 0.051921263337135315, "learning_rate": 0.01, "loss": 2.1243, "step": 9990 }, { "epoch": 1.0255541871921183, "grad_norm": 0.09820009768009186, "learning_rate": 0.01, "loss": 2.0983, "step": 9993 }, { "epoch": 1.0258620689655173, "grad_norm": 0.10601375997066498, "learning_rate": 0.01, "loss": 2.1288, "step": 9996 }, { "epoch": 1.0261699507389161, "grad_norm": 0.05488260090351105, "learning_rate": 0.01, "loss": 2.1033, "step": 9999 }, { "epoch": 1.0264778325123152, "grad_norm": 0.07482553273439407, "learning_rate": 0.01, "loss": 2.1181, "step": 10002 }, { "epoch": 1.0267857142857142, "grad_norm": 0.044733475893735886, "learning_rate": 0.01, "loss": 2.1237, "step": 10005 }, { "epoch": 1.0270935960591132, "grad_norm": 0.04775967076420784, "learning_rate": 0.01, "loss": 2.1288, "step": 10008 }, { "epoch": 1.0274014778325122, "grad_norm": 0.05972621962428093, "learning_rate": 0.01, "loss": 2.0878, "step": 10011 }, { "epoch": 1.0277093596059113, "grad_norm": 0.12219330668449402, "learning_rate": 0.01, "loss": 2.1034, "step": 10014 }, { "epoch": 1.0280172413793103, "grad_norm": 0.05171920731663704, "learning_rate": 0.01, "loss": 2.0925, "step": 10017 }, { "epoch": 1.0283251231527093, "grad_norm": 0.04166760668158531, "learning_rate": 0.01, "loss": 2.0928, "step": 10020 }, { "epoch": 1.0286330049261083, "grad_norm": 0.05231022089719772, "learning_rate": 0.01, "loss": 2.0945, "step": 10023 }, { "epoch": 1.0289408866995073, "grad_norm": 0.1091604232788086, "learning_rate": 0.01, "loss": 2.0878, "step": 10026 }, { "epoch": 1.0292487684729064, "grad_norm": 0.07104350626468658, "learning_rate": 0.01, "loss": 2.1125, "step": 10029 }, { "epoch": 1.0295566502463054, "grad_norm": 0.0466371588408947, "learning_rate": 0.01, "loss": 2.0973, "step": 10032 }, { "epoch": 1.0298645320197044, "grad_norm": 0.05548730120062828, "learning_rate": 0.01, "loss": 2.0846, "step": 10035 }, { "epoch": 1.0301724137931034, "grad_norm": 0.06483764201402664, "learning_rate": 0.01, "loss": 2.105, "step": 10038 }, { "epoch": 1.0304802955665024, "grad_norm": 0.05243910476565361, "learning_rate": 0.01, "loss": 2.1011, "step": 10041 }, { "epoch": 1.0307881773399015, "grad_norm": 0.09996815025806427, "learning_rate": 0.01, "loss": 2.1389, "step": 10044 }, { "epoch": 1.0310960591133005, "grad_norm": 0.04864559695124626, "learning_rate": 0.01, "loss": 2.1069, "step": 10047 }, { "epoch": 1.0314039408866995, "grad_norm": 0.14447607100009918, "learning_rate": 0.01, "loss": 2.1075, "step": 10050 }, { "epoch": 1.0317118226600985, "grad_norm": 0.050261352211236954, "learning_rate": 0.01, "loss": 2.1147, "step": 10053 }, { "epoch": 1.0320197044334976, "grad_norm": 0.07719244807958603, "learning_rate": 0.01, "loss": 2.1061, "step": 10056 }, { "epoch": 1.0323275862068966, "grad_norm": 0.10620381683111191, "learning_rate": 0.01, "loss": 2.1129, "step": 10059 }, { "epoch": 1.0326354679802956, "grad_norm": 0.05358508229255676, "learning_rate": 0.01, "loss": 2.1156, "step": 10062 }, { "epoch": 1.0329433497536946, "grad_norm": 0.04341145232319832, "learning_rate": 0.01, "loss": 2.1046, "step": 10065 }, { "epoch": 1.0332512315270936, "grad_norm": 0.04785105213522911, "learning_rate": 0.01, "loss": 2.0804, "step": 10068 }, { "epoch": 1.0335591133004927, "grad_norm": 0.04886849224567413, "learning_rate": 0.01, "loss": 2.0691, "step": 10071 }, { "epoch": 1.0338669950738917, "grad_norm": 0.03917735815048218, "learning_rate": 0.01, "loss": 2.0808, "step": 10074 }, { "epoch": 1.0341748768472907, "grad_norm": 0.10696244239807129, "learning_rate": 0.01, "loss": 2.085, "step": 10077 }, { "epoch": 1.0344827586206897, "grad_norm": 0.14525163173675537, "learning_rate": 0.01, "loss": 2.1246, "step": 10080 }, { "epoch": 1.0347906403940887, "grad_norm": 0.06464140862226486, "learning_rate": 0.01, "loss": 2.1088, "step": 10083 }, { "epoch": 1.0350985221674878, "grad_norm": 0.055628299713134766, "learning_rate": 0.01, "loss": 2.1013, "step": 10086 }, { "epoch": 1.0354064039408868, "grad_norm": 0.0457589291036129, "learning_rate": 0.01, "loss": 2.118, "step": 10089 }, { "epoch": 1.0357142857142858, "grad_norm": 0.07108809798955917, "learning_rate": 0.01, "loss": 2.0882, "step": 10092 }, { "epoch": 1.0360221674876848, "grad_norm": 0.07304032146930695, "learning_rate": 0.01, "loss": 2.1632, "step": 10095 }, { "epoch": 1.0363300492610836, "grad_norm": 0.04778844490647316, "learning_rate": 0.01, "loss": 2.1076, "step": 10098 }, { "epoch": 1.0366379310344827, "grad_norm": 0.0444946251809597, "learning_rate": 0.01, "loss": 2.1092, "step": 10101 }, { "epoch": 1.0369458128078817, "grad_norm": 0.03863450884819031, "learning_rate": 0.01, "loss": 2.0973, "step": 10104 }, { "epoch": 1.0372536945812807, "grad_norm": 0.11049003899097443, "learning_rate": 0.01, "loss": 2.1069, "step": 10107 }, { "epoch": 1.0375615763546797, "grad_norm": 0.055413637310266495, "learning_rate": 0.01, "loss": 2.0935, "step": 10110 }, { "epoch": 1.0378694581280787, "grad_norm": 0.1212301105260849, "learning_rate": 0.01, "loss": 2.1033, "step": 10113 }, { "epoch": 1.0381773399014778, "grad_norm": 0.06444283574819565, "learning_rate": 0.01, "loss": 2.0821, "step": 10116 }, { "epoch": 1.0384852216748768, "grad_norm": 0.048522353172302246, "learning_rate": 0.01, "loss": 2.1129, "step": 10119 }, { "epoch": 1.0387931034482758, "grad_norm": 0.03755674138665199, "learning_rate": 0.01, "loss": 2.0773, "step": 10122 }, { "epoch": 1.0391009852216748, "grad_norm": 0.03873259574174881, "learning_rate": 0.01, "loss": 2.0877, "step": 10125 }, { "epoch": 1.0394088669950738, "grad_norm": 0.062387898564338684, "learning_rate": 0.01, "loss": 2.1119, "step": 10128 }, { "epoch": 1.0397167487684729, "grad_norm": 0.037559203803539276, "learning_rate": 0.01, "loss": 2.1165, "step": 10131 }, { "epoch": 1.0400246305418719, "grad_norm": 0.0703917145729065, "learning_rate": 0.01, "loss": 2.0877, "step": 10134 }, { "epoch": 1.040332512315271, "grad_norm": 0.05063795670866966, "learning_rate": 0.01, "loss": 2.1282, "step": 10137 }, { "epoch": 1.04064039408867, "grad_norm": 0.08476493507623672, "learning_rate": 0.01, "loss": 2.1217, "step": 10140 }, { "epoch": 1.040948275862069, "grad_norm": 0.09482383728027344, "learning_rate": 0.01, "loss": 2.1002, "step": 10143 }, { "epoch": 1.041256157635468, "grad_norm": 0.1094396710395813, "learning_rate": 0.01, "loss": 2.1138, "step": 10146 }, { "epoch": 1.041564039408867, "grad_norm": 0.17252720892429352, "learning_rate": 0.01, "loss": 2.1079, "step": 10149 }, { "epoch": 1.041871921182266, "grad_norm": 0.11076754331588745, "learning_rate": 0.01, "loss": 2.1198, "step": 10152 }, { "epoch": 1.042179802955665, "grad_norm": 0.06879215687513351, "learning_rate": 0.01, "loss": 2.0878, "step": 10155 }, { "epoch": 1.042487684729064, "grad_norm": 0.07402212172746658, "learning_rate": 0.01, "loss": 2.0869, "step": 10158 }, { "epoch": 1.042795566502463, "grad_norm": 0.04562051594257355, "learning_rate": 0.01, "loss": 2.1139, "step": 10161 }, { "epoch": 1.043103448275862, "grad_norm": 0.04578396677970886, "learning_rate": 0.01, "loss": 2.0974, "step": 10164 }, { "epoch": 1.0434113300492611, "grad_norm": 0.051678020507097244, "learning_rate": 0.01, "loss": 2.0995, "step": 10167 }, { "epoch": 1.0437192118226601, "grad_norm": 0.03445015102624893, "learning_rate": 0.01, "loss": 2.106, "step": 10170 }, { "epoch": 1.0440270935960592, "grad_norm": 0.03868851810693741, "learning_rate": 0.01, "loss": 2.0732, "step": 10173 }, { "epoch": 1.0443349753694582, "grad_norm": 0.058904558420181274, "learning_rate": 0.01, "loss": 2.085, "step": 10176 }, { "epoch": 1.0446428571428572, "grad_norm": 0.10729484260082245, "learning_rate": 0.01, "loss": 2.0909, "step": 10179 }, { "epoch": 1.0449507389162562, "grad_norm": 0.10037554055452347, "learning_rate": 0.01, "loss": 2.0945, "step": 10182 }, { "epoch": 1.0452586206896552, "grad_norm": 0.07336730509996414, "learning_rate": 0.01, "loss": 2.0885, "step": 10185 }, { "epoch": 1.0455665024630543, "grad_norm": 0.11717227101325989, "learning_rate": 0.01, "loss": 2.1019, "step": 10188 }, { "epoch": 1.0458743842364533, "grad_norm": 0.06263696402311325, "learning_rate": 0.01, "loss": 2.1113, "step": 10191 }, { "epoch": 1.0461822660098523, "grad_norm": 0.07939436286687851, "learning_rate": 0.01, "loss": 2.0803, "step": 10194 }, { "epoch": 1.0464901477832513, "grad_norm": 0.05761004984378815, "learning_rate": 0.01, "loss": 2.1155, "step": 10197 }, { "epoch": 1.0467980295566504, "grad_norm": 0.04293765127658844, "learning_rate": 0.01, "loss": 2.0944, "step": 10200 }, { "epoch": 1.0471059113300492, "grad_norm": 0.04638001322746277, "learning_rate": 0.01, "loss": 2.113, "step": 10203 }, { "epoch": 1.0474137931034482, "grad_norm": 0.047882046550512314, "learning_rate": 0.01, "loss": 2.0733, "step": 10206 }, { "epoch": 1.0477216748768472, "grad_norm": 0.07461071759462357, "learning_rate": 0.01, "loss": 2.107, "step": 10209 }, { "epoch": 1.0480295566502462, "grad_norm": 0.10987289249897003, "learning_rate": 0.01, "loss": 2.105, "step": 10212 }, { "epoch": 1.0483374384236452, "grad_norm": 0.04183235019445419, "learning_rate": 0.01, "loss": 2.0953, "step": 10215 }, { "epoch": 1.0486453201970443, "grad_norm": 0.049700990319252014, "learning_rate": 0.01, "loss": 2.1067, "step": 10218 }, { "epoch": 1.0489532019704433, "grad_norm": 0.08448828011751175, "learning_rate": 0.01, "loss": 2.1113, "step": 10221 }, { "epoch": 1.0492610837438423, "grad_norm": 0.05486508831381798, "learning_rate": 0.01, "loss": 2.1156, "step": 10224 }, { "epoch": 1.0495689655172413, "grad_norm": 0.057925377041101456, "learning_rate": 0.01, "loss": 2.127, "step": 10227 }, { "epoch": 1.0498768472906403, "grad_norm": 0.05322302505373955, "learning_rate": 0.01, "loss": 2.0861, "step": 10230 }, { "epoch": 1.0501847290640394, "grad_norm": 0.046823181211948395, "learning_rate": 0.01, "loss": 2.089, "step": 10233 }, { "epoch": 1.0504926108374384, "grad_norm": 0.05037027224898338, "learning_rate": 0.01, "loss": 2.0841, "step": 10236 }, { "epoch": 1.0508004926108374, "grad_norm": 0.05172303318977356, "learning_rate": 0.01, "loss": 2.105, "step": 10239 }, { "epoch": 1.0511083743842364, "grad_norm": 0.07993052154779434, "learning_rate": 0.01, "loss": 2.1097, "step": 10242 }, { "epoch": 1.0514162561576355, "grad_norm": 0.039322953671216965, "learning_rate": 0.01, "loss": 2.0951, "step": 10245 }, { "epoch": 1.0517241379310345, "grad_norm": 0.05829343572258949, "learning_rate": 0.01, "loss": 2.1257, "step": 10248 }, { "epoch": 1.0520320197044335, "grad_norm": 0.12303601950407028, "learning_rate": 0.01, "loss": 2.1143, "step": 10251 }, { "epoch": 1.0523399014778325, "grad_norm": 0.07176418602466583, "learning_rate": 0.01, "loss": 2.1297, "step": 10254 }, { "epoch": 1.0526477832512315, "grad_norm": 0.05229344964027405, "learning_rate": 0.01, "loss": 2.0934, "step": 10257 }, { "epoch": 1.0529556650246306, "grad_norm": 0.041665658354759216, "learning_rate": 0.01, "loss": 2.116, "step": 10260 }, { "epoch": 1.0532635467980296, "grad_norm": 0.04542261362075806, "learning_rate": 0.01, "loss": 2.1277, "step": 10263 }, { "epoch": 1.0535714285714286, "grad_norm": 0.0501495897769928, "learning_rate": 0.01, "loss": 2.0911, "step": 10266 }, { "epoch": 1.0538793103448276, "grad_norm": 0.06474924832582474, "learning_rate": 0.01, "loss": 2.1254, "step": 10269 }, { "epoch": 1.0541871921182266, "grad_norm": 0.0736108273267746, "learning_rate": 0.01, "loss": 2.0685, "step": 10272 }, { "epoch": 1.0544950738916257, "grad_norm": 0.07487022131681442, "learning_rate": 0.01, "loss": 2.113, "step": 10275 }, { "epoch": 1.0548029556650247, "grad_norm": 0.04876410961151123, "learning_rate": 0.01, "loss": 2.1051, "step": 10278 }, { "epoch": 1.0551108374384237, "grad_norm": 0.056595779955387115, "learning_rate": 0.01, "loss": 2.0864, "step": 10281 }, { "epoch": 1.0554187192118227, "grad_norm": 0.06958241015672684, "learning_rate": 0.01, "loss": 2.1, "step": 10284 }, { "epoch": 1.0557266009852218, "grad_norm": 0.08811846375465393, "learning_rate": 0.01, "loss": 2.1021, "step": 10287 }, { "epoch": 1.0560344827586208, "grad_norm": 0.061557747423648834, "learning_rate": 0.01, "loss": 2.1063, "step": 10290 }, { "epoch": 1.0563423645320198, "grad_norm": 0.07043389976024628, "learning_rate": 0.01, "loss": 2.106, "step": 10293 }, { "epoch": 1.0566502463054188, "grad_norm": 0.0916379988193512, "learning_rate": 0.01, "loss": 2.0851, "step": 10296 }, { "epoch": 1.0569581280788178, "grad_norm": 0.050577979534864426, "learning_rate": 0.01, "loss": 2.0966, "step": 10299 }, { "epoch": 1.0572660098522166, "grad_norm": 0.06576110422611237, "learning_rate": 0.01, "loss": 2.1038, "step": 10302 }, { "epoch": 1.0575738916256157, "grad_norm": 0.09315023571252823, "learning_rate": 0.01, "loss": 2.1341, "step": 10305 }, { "epoch": 1.0578817733990147, "grad_norm": 0.0649820864200592, "learning_rate": 0.01, "loss": 2.1064, "step": 10308 }, { "epoch": 1.0581896551724137, "grad_norm": 0.07930494844913483, "learning_rate": 0.01, "loss": 2.107, "step": 10311 }, { "epoch": 1.0584975369458127, "grad_norm": 0.09142257273197174, "learning_rate": 0.01, "loss": 2.1162, "step": 10314 }, { "epoch": 1.0588054187192117, "grad_norm": 0.05011974647641182, "learning_rate": 0.01, "loss": 2.0686, "step": 10317 }, { "epoch": 1.0591133004926108, "grad_norm": 0.1002635508775711, "learning_rate": 0.01, "loss": 2.138, "step": 10320 }, { "epoch": 1.0594211822660098, "grad_norm": 0.07570278644561768, "learning_rate": 0.01, "loss": 2.0693, "step": 10323 }, { "epoch": 1.0597290640394088, "grad_norm": 0.05086719989776611, "learning_rate": 0.01, "loss": 2.0991, "step": 10326 }, { "epoch": 1.0600369458128078, "grad_norm": 0.03596855327486992, "learning_rate": 0.01, "loss": 2.1038, "step": 10329 }, { "epoch": 1.0603448275862069, "grad_norm": 0.05059434473514557, "learning_rate": 0.01, "loss": 2.1, "step": 10332 }, { "epoch": 1.0606527093596059, "grad_norm": 0.058818116784095764, "learning_rate": 0.01, "loss": 2.0855, "step": 10335 }, { "epoch": 1.060960591133005, "grad_norm": 0.14139403402805328, "learning_rate": 0.01, "loss": 2.0755, "step": 10338 }, { "epoch": 1.061268472906404, "grad_norm": 0.12123113870620728, "learning_rate": 0.01, "loss": 2.0896, "step": 10341 }, { "epoch": 1.061576354679803, "grad_norm": 0.04767270013689995, "learning_rate": 0.01, "loss": 2.11, "step": 10344 }, { "epoch": 1.061884236453202, "grad_norm": 0.03506815433502197, "learning_rate": 0.01, "loss": 2.0953, "step": 10347 }, { "epoch": 1.062192118226601, "grad_norm": 0.08807789534330368, "learning_rate": 0.01, "loss": 2.0903, "step": 10350 }, { "epoch": 1.0625, "grad_norm": 0.1130862608551979, "learning_rate": 0.01, "loss": 2.0888, "step": 10353 }, { "epoch": 1.062807881773399, "grad_norm": 0.05720696598291397, "learning_rate": 0.01, "loss": 2.0904, "step": 10356 }, { "epoch": 1.063115763546798, "grad_norm": 0.057933416217565536, "learning_rate": 0.01, "loss": 2.1138, "step": 10359 }, { "epoch": 1.063423645320197, "grad_norm": 0.056713253259658813, "learning_rate": 0.01, "loss": 2.0965, "step": 10362 }, { "epoch": 1.063731527093596, "grad_norm": 0.05062280595302582, "learning_rate": 0.01, "loss": 2.1058, "step": 10365 }, { "epoch": 1.064039408866995, "grad_norm": 0.03439073637127876, "learning_rate": 0.01, "loss": 2.0945, "step": 10368 }, { "epoch": 1.0643472906403941, "grad_norm": 0.10244173556566238, "learning_rate": 0.01, "loss": 2.0916, "step": 10371 }, { "epoch": 1.0646551724137931, "grad_norm": 0.04706069454550743, "learning_rate": 0.01, "loss": 2.103, "step": 10374 }, { "epoch": 1.0649630541871922, "grad_norm": 0.11580058932304382, "learning_rate": 0.01, "loss": 2.0995, "step": 10377 }, { "epoch": 1.0652709359605912, "grad_norm": 0.044736508280038834, "learning_rate": 0.01, "loss": 2.0906, "step": 10380 }, { "epoch": 1.0655788177339902, "grad_norm": 0.08990567922592163, "learning_rate": 0.01, "loss": 2.1197, "step": 10383 }, { "epoch": 1.0658866995073892, "grad_norm": 0.06923419237136841, "learning_rate": 0.01, "loss": 2.0997, "step": 10386 }, { "epoch": 1.0661945812807883, "grad_norm": 0.059495240449905396, "learning_rate": 0.01, "loss": 2.1106, "step": 10389 }, { "epoch": 1.0665024630541873, "grad_norm": 0.07906550914049149, "learning_rate": 0.01, "loss": 2.1196, "step": 10392 }, { "epoch": 1.0668103448275863, "grad_norm": 0.08792297542095184, "learning_rate": 0.01, "loss": 2.0985, "step": 10395 }, { "epoch": 1.0671182266009853, "grad_norm": 0.06077072396874428, "learning_rate": 0.01, "loss": 2.088, "step": 10398 }, { "epoch": 1.0674261083743843, "grad_norm": 0.03865751996636391, "learning_rate": 0.01, "loss": 2.0894, "step": 10401 }, { "epoch": 1.0677339901477834, "grad_norm": 0.03158612549304962, "learning_rate": 0.01, "loss": 2.0861, "step": 10404 }, { "epoch": 1.0680418719211822, "grad_norm": 0.03455328568816185, "learning_rate": 0.01, "loss": 2.0819, "step": 10407 }, { "epoch": 1.0683497536945812, "grad_norm": 0.062100328505039215, "learning_rate": 0.01, "loss": 2.0967, "step": 10410 }, { "epoch": 1.0686576354679802, "grad_norm": 0.10934283584356308, "learning_rate": 0.01, "loss": 2.1135, "step": 10413 }, { "epoch": 1.0689655172413792, "grad_norm": 0.07184179127216339, "learning_rate": 0.01, "loss": 2.0949, "step": 10416 }, { "epoch": 1.0692733990147782, "grad_norm": 0.06610151380300522, "learning_rate": 0.01, "loss": 2.1001, "step": 10419 }, { "epoch": 1.0695812807881773, "grad_norm": 0.06064629554748535, "learning_rate": 0.01, "loss": 2.0835, "step": 10422 }, { "epoch": 1.0698891625615763, "grad_norm": 0.0531432181596756, "learning_rate": 0.01, "loss": 2.1105, "step": 10425 }, { "epoch": 1.0701970443349753, "grad_norm": 0.056448470801115036, "learning_rate": 0.01, "loss": 2.0724, "step": 10428 }, { "epoch": 1.0705049261083743, "grad_norm": 0.03736816346645355, "learning_rate": 0.01, "loss": 2.1243, "step": 10431 }, { "epoch": 1.0708128078817734, "grad_norm": 0.12693117558956146, "learning_rate": 0.01, "loss": 2.1156, "step": 10434 }, { "epoch": 1.0711206896551724, "grad_norm": 0.0428193174302578, "learning_rate": 0.01, "loss": 2.1025, "step": 10437 }, { "epoch": 1.0714285714285714, "grad_norm": 0.0464596189558506, "learning_rate": 0.01, "loss": 2.1067, "step": 10440 }, { "epoch": 1.0717364532019704, "grad_norm": 0.07535267621278763, "learning_rate": 0.01, "loss": 2.0785, "step": 10443 }, { "epoch": 1.0720443349753694, "grad_norm": 0.0537327378988266, "learning_rate": 0.01, "loss": 2.0775, "step": 10446 }, { "epoch": 1.0723522167487685, "grad_norm": 0.03783145919442177, "learning_rate": 0.01, "loss": 2.0921, "step": 10449 }, { "epoch": 1.0726600985221675, "grad_norm": 0.052689142525196075, "learning_rate": 0.01, "loss": 2.116, "step": 10452 }, { "epoch": 1.0729679802955665, "grad_norm": 0.1437288373708725, "learning_rate": 0.01, "loss": 2.071, "step": 10455 }, { "epoch": 1.0732758620689655, "grad_norm": 0.07633062452077866, "learning_rate": 0.01, "loss": 2.088, "step": 10458 }, { "epoch": 1.0735837438423645, "grad_norm": 0.061189718544483185, "learning_rate": 0.01, "loss": 2.0796, "step": 10461 }, { "epoch": 1.0738916256157636, "grad_norm": 0.06256800144910812, "learning_rate": 0.01, "loss": 2.1056, "step": 10464 }, { "epoch": 1.0741995073891626, "grad_norm": 0.0745188519358635, "learning_rate": 0.01, "loss": 2.0782, "step": 10467 }, { "epoch": 1.0745073891625616, "grad_norm": 0.0663486197590828, "learning_rate": 0.01, "loss": 2.0704, "step": 10470 }, { "epoch": 1.0748152709359606, "grad_norm": 0.05472427234053612, "learning_rate": 0.01, "loss": 2.082, "step": 10473 }, { "epoch": 1.0751231527093597, "grad_norm": 0.10171230137348175, "learning_rate": 0.01, "loss": 2.1135, "step": 10476 }, { "epoch": 1.0754310344827587, "grad_norm": 0.05689026787877083, "learning_rate": 0.01, "loss": 2.0748, "step": 10479 }, { "epoch": 1.0757389162561577, "grad_norm": 0.0593440905213356, "learning_rate": 0.01, "loss": 2.0922, "step": 10482 }, { "epoch": 1.0760467980295567, "grad_norm": 0.07408995181322098, "learning_rate": 0.01, "loss": 2.0781, "step": 10485 }, { "epoch": 1.0763546798029557, "grad_norm": 0.05688070133328438, "learning_rate": 0.01, "loss": 2.1085, "step": 10488 }, { "epoch": 1.0766625615763548, "grad_norm": 0.05378828942775726, "learning_rate": 0.01, "loss": 2.1084, "step": 10491 }, { "epoch": 1.0769704433497538, "grad_norm": 0.057735592126846313, "learning_rate": 0.01, "loss": 2.1023, "step": 10494 }, { "epoch": 1.0772783251231528, "grad_norm": 0.0586666576564312, "learning_rate": 0.01, "loss": 2.1003, "step": 10497 }, { "epoch": 1.0775862068965518, "grad_norm": 0.12087473273277283, "learning_rate": 0.01, "loss": 2.0974, "step": 10500 }, { "epoch": 1.0778940886699506, "grad_norm": 0.07307861000299454, "learning_rate": 0.01, "loss": 2.0913, "step": 10503 }, { "epoch": 1.0782019704433496, "grad_norm": 0.06621012091636658, "learning_rate": 0.01, "loss": 2.1173, "step": 10506 }, { "epoch": 1.0785098522167487, "grad_norm": 0.0647876039147377, "learning_rate": 0.01, "loss": 2.1006, "step": 10509 }, { "epoch": 1.0788177339901477, "grad_norm": 0.06163914501667023, "learning_rate": 0.01, "loss": 2.0892, "step": 10512 }, { "epoch": 1.0791256157635467, "grad_norm": 0.04312353581190109, "learning_rate": 0.01, "loss": 2.0901, "step": 10515 }, { "epoch": 1.0794334975369457, "grad_norm": 0.0760812908411026, "learning_rate": 0.01, "loss": 2.0995, "step": 10518 }, { "epoch": 1.0797413793103448, "grad_norm": 0.0802140161395073, "learning_rate": 0.01, "loss": 2.0905, "step": 10521 }, { "epoch": 1.0800492610837438, "grad_norm": 0.09008529782295227, "learning_rate": 0.01, "loss": 2.08, "step": 10524 }, { "epoch": 1.0803571428571428, "grad_norm": 0.07469696551561356, "learning_rate": 0.01, "loss": 2.0725, "step": 10527 }, { "epoch": 1.0806650246305418, "grad_norm": 0.08821582794189453, "learning_rate": 0.01, "loss": 2.1086, "step": 10530 }, { "epoch": 1.0809729064039408, "grad_norm": 0.04690997302532196, "learning_rate": 0.01, "loss": 2.1095, "step": 10533 }, { "epoch": 1.0812807881773399, "grad_norm": 0.04316158965229988, "learning_rate": 0.01, "loss": 2.0818, "step": 10536 }, { "epoch": 1.0815886699507389, "grad_norm": 0.06996279209852219, "learning_rate": 0.01, "loss": 2.0993, "step": 10539 }, { "epoch": 1.081896551724138, "grad_norm": 0.10073279589414597, "learning_rate": 0.01, "loss": 2.112, "step": 10542 }, { "epoch": 1.082204433497537, "grad_norm": 0.0448322668671608, "learning_rate": 0.01, "loss": 2.0834, "step": 10545 }, { "epoch": 1.082512315270936, "grad_norm": 0.11411638557910919, "learning_rate": 0.01, "loss": 2.1082, "step": 10548 }, { "epoch": 1.082820197044335, "grad_norm": 0.10779088735580444, "learning_rate": 0.01, "loss": 2.0702, "step": 10551 }, { "epoch": 1.083128078817734, "grad_norm": 0.041448626667261124, "learning_rate": 0.01, "loss": 2.1041, "step": 10554 }, { "epoch": 1.083435960591133, "grad_norm": 0.07522560656070709, "learning_rate": 0.01, "loss": 2.0794, "step": 10557 }, { "epoch": 1.083743842364532, "grad_norm": 0.048221901059150696, "learning_rate": 0.01, "loss": 2.0936, "step": 10560 }, { "epoch": 1.084051724137931, "grad_norm": 0.05512038618326187, "learning_rate": 0.01, "loss": 2.0898, "step": 10563 }, { "epoch": 1.08435960591133, "grad_norm": 0.07599300891160965, "learning_rate": 0.01, "loss": 2.1246, "step": 10566 }, { "epoch": 1.084667487684729, "grad_norm": 0.06631644070148468, "learning_rate": 0.01, "loss": 2.0861, "step": 10569 }, { "epoch": 1.0849753694581281, "grad_norm": 0.04972488060593605, "learning_rate": 0.01, "loss": 2.11, "step": 10572 }, { "epoch": 1.0852832512315271, "grad_norm": 0.08250217139720917, "learning_rate": 0.01, "loss": 2.1142, "step": 10575 }, { "epoch": 1.0855911330049262, "grad_norm": 0.09104974567890167, "learning_rate": 0.01, "loss": 2.0822, "step": 10578 }, { "epoch": 1.0858990147783252, "grad_norm": 0.057310063391923904, "learning_rate": 0.01, "loss": 2.0819, "step": 10581 }, { "epoch": 1.0862068965517242, "grad_norm": 0.08102291077375412, "learning_rate": 0.01, "loss": 2.0931, "step": 10584 }, { "epoch": 1.0865147783251232, "grad_norm": 0.045641325414180756, "learning_rate": 0.01, "loss": 2.1096, "step": 10587 }, { "epoch": 1.0868226600985222, "grad_norm": 0.05350523442029953, "learning_rate": 0.01, "loss": 2.1151, "step": 10590 }, { "epoch": 1.0871305418719213, "grad_norm": 0.045734379440546036, "learning_rate": 0.01, "loss": 2.1043, "step": 10593 }, { "epoch": 1.0874384236453203, "grad_norm": 0.044645924121141434, "learning_rate": 0.01, "loss": 2.0882, "step": 10596 }, { "epoch": 1.0877463054187193, "grad_norm": 0.046704743057489395, "learning_rate": 0.01, "loss": 2.0823, "step": 10599 }, { "epoch": 1.0880541871921183, "grad_norm": 0.09600807726383209, "learning_rate": 0.01, "loss": 2.09, "step": 10602 }, { "epoch": 1.0883620689655173, "grad_norm": 0.062323443591594696, "learning_rate": 0.01, "loss": 2.091, "step": 10605 }, { "epoch": 1.0886699507389164, "grad_norm": 0.08459887653589249, "learning_rate": 0.01, "loss": 2.094, "step": 10608 }, { "epoch": 1.0889778325123152, "grad_norm": 0.0621943362057209, "learning_rate": 0.01, "loss": 2.0735, "step": 10611 }, { "epoch": 1.0892857142857142, "grad_norm": 0.10963741689920425, "learning_rate": 0.01, "loss": 2.0769, "step": 10614 }, { "epoch": 1.0895935960591132, "grad_norm": 0.07325689494609833, "learning_rate": 0.01, "loss": 2.0905, "step": 10617 }, { "epoch": 1.0899014778325122, "grad_norm": 0.08307964354753494, "learning_rate": 0.01, "loss": 2.0977, "step": 10620 }, { "epoch": 1.0902093596059113, "grad_norm": 0.18072094023227692, "learning_rate": 0.01, "loss": 2.1096, "step": 10623 }, { "epoch": 1.0905172413793103, "grad_norm": 0.10427471250295639, "learning_rate": 0.01, "loss": 2.0761, "step": 10626 }, { "epoch": 1.0908251231527093, "grad_norm": 0.0732191875576973, "learning_rate": 0.01, "loss": 2.1031, "step": 10629 }, { "epoch": 1.0911330049261083, "grad_norm": 0.03703717514872551, "learning_rate": 0.01, "loss": 2.0734, "step": 10632 }, { "epoch": 1.0914408866995073, "grad_norm": 0.04907006770372391, "learning_rate": 0.01, "loss": 2.096, "step": 10635 }, { "epoch": 1.0917487684729064, "grad_norm": 0.04126304015517235, "learning_rate": 0.01, "loss": 2.0824, "step": 10638 }, { "epoch": 1.0920566502463054, "grad_norm": 0.04017401486635208, "learning_rate": 0.01, "loss": 2.0694, "step": 10641 }, { "epoch": 1.0923645320197044, "grad_norm": 0.036132264882326126, "learning_rate": 0.01, "loss": 2.0792, "step": 10644 }, { "epoch": 1.0926724137931034, "grad_norm": 0.06275150179862976, "learning_rate": 0.01, "loss": 2.1172, "step": 10647 }, { "epoch": 1.0929802955665024, "grad_norm": 0.08319203555583954, "learning_rate": 0.01, "loss": 2.0868, "step": 10650 }, { "epoch": 1.0932881773399015, "grad_norm": 0.08663000166416168, "learning_rate": 0.01, "loss": 2.0834, "step": 10653 }, { "epoch": 1.0935960591133005, "grad_norm": 0.10765951871871948, "learning_rate": 0.01, "loss": 2.0891, "step": 10656 }, { "epoch": 1.0939039408866995, "grad_norm": 0.035412587225437164, "learning_rate": 0.01, "loss": 2.0912, "step": 10659 }, { "epoch": 1.0942118226600985, "grad_norm": 0.051735054701566696, "learning_rate": 0.01, "loss": 2.0986, "step": 10662 }, { "epoch": 1.0945197044334976, "grad_norm": 0.04320614039897919, "learning_rate": 0.01, "loss": 2.0912, "step": 10665 }, { "epoch": 1.0948275862068966, "grad_norm": 0.03285462409257889, "learning_rate": 0.01, "loss": 2.0957, "step": 10668 }, { "epoch": 1.0951354679802956, "grad_norm": 0.05172726511955261, "learning_rate": 0.01, "loss": 2.0706, "step": 10671 }, { "epoch": 1.0954433497536946, "grad_norm": 0.04941645637154579, "learning_rate": 0.01, "loss": 2.1018, "step": 10674 }, { "epoch": 1.0957512315270936, "grad_norm": 0.04746576398611069, "learning_rate": 0.01, "loss": 2.1002, "step": 10677 }, { "epoch": 1.0960591133004927, "grad_norm": 0.10900839418172836, "learning_rate": 0.01, "loss": 2.1188, "step": 10680 }, { "epoch": 1.0963669950738917, "grad_norm": 0.06924229860305786, "learning_rate": 0.01, "loss": 2.097, "step": 10683 }, { "epoch": 1.0966748768472907, "grad_norm": 0.11047599464654922, "learning_rate": 0.01, "loss": 2.0607, "step": 10686 }, { "epoch": 1.0969827586206897, "grad_norm": 0.10662158578634262, "learning_rate": 0.01, "loss": 2.078, "step": 10689 }, { "epoch": 1.0972906403940887, "grad_norm": 0.07408568263053894, "learning_rate": 0.01, "loss": 2.0918, "step": 10692 }, { "epoch": 1.0975985221674878, "grad_norm": 0.0471009686589241, "learning_rate": 0.01, "loss": 2.1248, "step": 10695 }, { "epoch": 1.0979064039408868, "grad_norm": 0.049591194838285446, "learning_rate": 0.01, "loss": 2.082, "step": 10698 }, { "epoch": 1.0982142857142858, "grad_norm": 0.0919683426618576, "learning_rate": 0.01, "loss": 2.1229, "step": 10701 }, { "epoch": 1.0985221674876848, "grad_norm": 0.05292963236570358, "learning_rate": 0.01, "loss": 2.1097, "step": 10704 }, { "epoch": 1.0988300492610836, "grad_norm": 0.053880974650382996, "learning_rate": 0.01, "loss": 2.0787, "step": 10707 }, { "epoch": 1.0991379310344827, "grad_norm": 0.05608196556568146, "learning_rate": 0.01, "loss": 2.0735, "step": 10710 }, { "epoch": 1.0994458128078817, "grad_norm": 0.06456641852855682, "learning_rate": 0.01, "loss": 2.1148, "step": 10713 }, { "epoch": 1.0997536945812807, "grad_norm": 0.08165917545557022, "learning_rate": 0.01, "loss": 2.1199, "step": 10716 }, { "epoch": 1.1000615763546797, "grad_norm": 0.0773044228553772, "learning_rate": 0.01, "loss": 2.0972, "step": 10719 }, { "epoch": 1.1003694581280787, "grad_norm": 0.07669848203659058, "learning_rate": 0.01, "loss": 2.101, "step": 10722 }, { "epoch": 1.1006773399014778, "grad_norm": 0.0773942843079567, "learning_rate": 0.01, "loss": 2.0573, "step": 10725 }, { "epoch": 1.1009852216748768, "grad_norm": 0.06698640435934067, "learning_rate": 0.01, "loss": 2.1189, "step": 10728 }, { "epoch": 1.1012931034482758, "grad_norm": 0.098200224339962, "learning_rate": 0.01, "loss": 2.0739, "step": 10731 }, { "epoch": 1.1016009852216748, "grad_norm": 0.06676481664180756, "learning_rate": 0.01, "loss": 2.097, "step": 10734 }, { "epoch": 1.1019088669950738, "grad_norm": 0.03925321251153946, "learning_rate": 0.01, "loss": 2.0904, "step": 10737 }, { "epoch": 1.1022167487684729, "grad_norm": 0.08387935161590576, "learning_rate": 0.01, "loss": 2.1069, "step": 10740 }, { "epoch": 1.1025246305418719, "grad_norm": 0.06382130831480026, "learning_rate": 0.01, "loss": 2.091, "step": 10743 }, { "epoch": 1.102832512315271, "grad_norm": 0.04457903653383255, "learning_rate": 0.01, "loss": 2.074, "step": 10746 }, { "epoch": 1.10314039408867, "grad_norm": 0.057858239859342575, "learning_rate": 0.01, "loss": 2.1021, "step": 10749 }, { "epoch": 1.103448275862069, "grad_norm": 0.055992983281612396, "learning_rate": 0.01, "loss": 2.0894, "step": 10752 }, { "epoch": 1.103756157635468, "grad_norm": 0.10200835764408112, "learning_rate": 0.01, "loss": 2.0948, "step": 10755 }, { "epoch": 1.104064039408867, "grad_norm": 0.11163626611232758, "learning_rate": 0.01, "loss": 2.0963, "step": 10758 }, { "epoch": 1.104371921182266, "grad_norm": 0.11462046951055527, "learning_rate": 0.01, "loss": 2.0808, "step": 10761 }, { "epoch": 1.104679802955665, "grad_norm": 0.08823121339082718, "learning_rate": 0.01, "loss": 2.1136, "step": 10764 }, { "epoch": 1.104987684729064, "grad_norm": 0.08843538910150528, "learning_rate": 0.01, "loss": 2.0767, "step": 10767 }, { "epoch": 1.105295566502463, "grad_norm": 0.05961614102125168, "learning_rate": 0.01, "loss": 2.1067, "step": 10770 }, { "epoch": 1.105603448275862, "grad_norm": 0.08095360547304153, "learning_rate": 0.01, "loss": 2.066, "step": 10773 }, { "epoch": 1.1059113300492611, "grad_norm": 0.08094312995672226, "learning_rate": 0.01, "loss": 2.0849, "step": 10776 }, { "epoch": 1.1062192118226601, "grad_norm": 0.05718453973531723, "learning_rate": 0.01, "loss": 2.1097, "step": 10779 }, { "epoch": 1.1065270935960592, "grad_norm": 0.0537499338388443, "learning_rate": 0.01, "loss": 2.082, "step": 10782 }, { "epoch": 1.1068349753694582, "grad_norm": 0.06437748670578003, "learning_rate": 0.01, "loss": 2.0982, "step": 10785 }, { "epoch": 1.1071428571428572, "grad_norm": 0.03420199081301689, "learning_rate": 0.01, "loss": 2.0919, "step": 10788 }, { "epoch": 1.1074507389162562, "grad_norm": 0.049510665237903595, "learning_rate": 0.01, "loss": 2.0777, "step": 10791 }, { "epoch": 1.1077586206896552, "grad_norm": 0.044145356863737106, "learning_rate": 0.01, "loss": 2.0994, "step": 10794 }, { "epoch": 1.1080665024630543, "grad_norm": 0.0494622103869915, "learning_rate": 0.01, "loss": 2.103, "step": 10797 }, { "epoch": 1.1083743842364533, "grad_norm": 0.039029188454151154, "learning_rate": 0.01, "loss": 2.1026, "step": 10800 }, { "epoch": 1.1086822660098523, "grad_norm": 0.05786842480301857, "learning_rate": 0.01, "loss": 2.0767, "step": 10803 }, { "epoch": 1.1089901477832513, "grad_norm": 0.07576561719179153, "learning_rate": 0.01, "loss": 2.1078, "step": 10806 }, { "epoch": 1.1092980295566504, "grad_norm": 0.084762342274189, "learning_rate": 0.01, "loss": 2.1027, "step": 10809 }, { "epoch": 1.1096059113300494, "grad_norm": 0.05042179673910141, "learning_rate": 0.01, "loss": 2.0742, "step": 10812 }, { "epoch": 1.1099137931034482, "grad_norm": 0.07194402068853378, "learning_rate": 0.01, "loss": 2.0985, "step": 10815 }, { "epoch": 1.1102216748768472, "grad_norm": 0.13966146111488342, "learning_rate": 0.01, "loss": 2.0924, "step": 10818 }, { "epoch": 1.1105295566502462, "grad_norm": 0.060582250356674194, "learning_rate": 0.01, "loss": 2.1039, "step": 10821 }, { "epoch": 1.1108374384236452, "grad_norm": 0.03663609176874161, "learning_rate": 0.01, "loss": 2.0731, "step": 10824 }, { "epoch": 1.1111453201970443, "grad_norm": 0.09468091279268265, "learning_rate": 0.01, "loss": 2.0961, "step": 10827 }, { "epoch": 1.1114532019704433, "grad_norm": 0.07199615240097046, "learning_rate": 0.01, "loss": 2.0834, "step": 10830 }, { "epoch": 1.1117610837438423, "grad_norm": 0.06624965369701385, "learning_rate": 0.01, "loss": 2.1286, "step": 10833 }, { "epoch": 1.1120689655172413, "grad_norm": 0.0414128340780735, "learning_rate": 0.01, "loss": 2.0922, "step": 10836 }, { "epoch": 1.1123768472906403, "grad_norm": 0.06416642665863037, "learning_rate": 0.01, "loss": 2.0908, "step": 10839 }, { "epoch": 1.1126847290640394, "grad_norm": 0.05309692397713661, "learning_rate": 0.01, "loss": 2.117, "step": 10842 }, { "epoch": 1.1129926108374384, "grad_norm": 0.04576392099261284, "learning_rate": 0.01, "loss": 2.0801, "step": 10845 }, { "epoch": 1.1133004926108374, "grad_norm": 0.0887250304222107, "learning_rate": 0.01, "loss": 2.0815, "step": 10848 }, { "epoch": 1.1136083743842364, "grad_norm": 0.061223480850458145, "learning_rate": 0.01, "loss": 2.0607, "step": 10851 }, { "epoch": 1.1139162561576355, "grad_norm": 0.12983545660972595, "learning_rate": 0.01, "loss": 2.0882, "step": 10854 }, { "epoch": 1.1142241379310345, "grad_norm": 0.09382637590169907, "learning_rate": 0.01, "loss": 2.0838, "step": 10857 }, { "epoch": 1.1145320197044335, "grad_norm": 0.04275491461157799, "learning_rate": 0.01, "loss": 2.0905, "step": 10860 }, { "epoch": 1.1148399014778325, "grad_norm": 0.044315680861473083, "learning_rate": 0.01, "loss": 2.0924, "step": 10863 }, { "epoch": 1.1151477832512315, "grad_norm": 0.05177663639187813, "learning_rate": 0.01, "loss": 2.0985, "step": 10866 }, { "epoch": 1.1154556650246306, "grad_norm": 0.08161107450723648, "learning_rate": 0.01, "loss": 2.105, "step": 10869 }, { "epoch": 1.1157635467980296, "grad_norm": 0.08273576200008392, "learning_rate": 0.01, "loss": 2.0991, "step": 10872 }, { "epoch": 1.1160714285714286, "grad_norm": 0.04973771795630455, "learning_rate": 0.01, "loss": 2.0849, "step": 10875 }, { "epoch": 1.1163793103448276, "grad_norm": 0.036696773022413254, "learning_rate": 0.01, "loss": 2.0651, "step": 10878 }, { "epoch": 1.1166871921182266, "grad_norm": 0.03647401183843613, "learning_rate": 0.01, "loss": 2.0772, "step": 10881 }, { "epoch": 1.1169950738916257, "grad_norm": 0.03360895812511444, "learning_rate": 0.01, "loss": 2.0952, "step": 10884 }, { "epoch": 1.1173029556650247, "grad_norm": 0.037918057292699814, "learning_rate": 0.01, "loss": 2.0776, "step": 10887 }, { "epoch": 1.1176108374384237, "grad_norm": 0.10544890910387039, "learning_rate": 0.01, "loss": 2.1079, "step": 10890 }, { "epoch": 1.1179187192118227, "grad_norm": 0.15091745555400848, "learning_rate": 0.01, "loss": 2.1231, "step": 10893 }, { "epoch": 1.1182266009852218, "grad_norm": 0.07386527210474014, "learning_rate": 0.01, "loss": 2.0922, "step": 10896 }, { "epoch": 1.1185344827586208, "grad_norm": 0.04889804869890213, "learning_rate": 0.01, "loss": 2.1016, "step": 10899 }, { "epoch": 1.1188423645320198, "grad_norm": 0.04805940017104149, "learning_rate": 0.01, "loss": 2.0833, "step": 10902 }, { "epoch": 1.1191502463054188, "grad_norm": 0.040073320269584656, "learning_rate": 0.01, "loss": 2.0943, "step": 10905 }, { "epoch": 1.1194581280788178, "grad_norm": 0.046124961227178574, "learning_rate": 0.01, "loss": 2.0891, "step": 10908 }, { "epoch": 1.1197660098522166, "grad_norm": 0.04982076957821846, "learning_rate": 0.01, "loss": 2.0595, "step": 10911 }, { "epoch": 1.1200738916256157, "grad_norm": 0.036569107323884964, "learning_rate": 0.01, "loss": 2.0602, "step": 10914 }, { "epoch": 1.1203817733990147, "grad_norm": 0.033519893884658813, "learning_rate": 0.01, "loss": 2.1026, "step": 10917 }, { "epoch": 1.1206896551724137, "grad_norm": 0.0513744130730629, "learning_rate": 0.01, "loss": 2.1119, "step": 10920 }, { "epoch": 1.1209975369458127, "grad_norm": 0.08677095174789429, "learning_rate": 0.01, "loss": 2.0791, "step": 10923 }, { "epoch": 1.1213054187192117, "grad_norm": 0.1263512223958969, "learning_rate": 0.01, "loss": 2.0912, "step": 10926 }, { "epoch": 1.1216133004926108, "grad_norm": 0.0737731009721756, "learning_rate": 0.01, "loss": 2.1193, "step": 10929 }, { "epoch": 1.1219211822660098, "grad_norm": 0.045122213661670685, "learning_rate": 0.01, "loss": 2.1029, "step": 10932 }, { "epoch": 1.1222290640394088, "grad_norm": 0.04616571217775345, "learning_rate": 0.01, "loss": 2.062, "step": 10935 }, { "epoch": 1.1225369458128078, "grad_norm": 0.03985420614480972, "learning_rate": 0.01, "loss": 2.0868, "step": 10938 }, { "epoch": 1.1228448275862069, "grad_norm": 0.11042526364326477, "learning_rate": 0.01, "loss": 2.1057, "step": 10941 }, { "epoch": 1.1231527093596059, "grad_norm": 0.08071359992027283, "learning_rate": 0.01, "loss": 2.0796, "step": 10944 }, { "epoch": 1.123460591133005, "grad_norm": 0.049534909427165985, "learning_rate": 0.01, "loss": 2.1055, "step": 10947 }, { "epoch": 1.123768472906404, "grad_norm": 0.08341135829687119, "learning_rate": 0.01, "loss": 2.0948, "step": 10950 }, { "epoch": 1.124076354679803, "grad_norm": 0.03842156007885933, "learning_rate": 0.01, "loss": 2.1051, "step": 10953 }, { "epoch": 1.124384236453202, "grad_norm": 0.04978267103433609, "learning_rate": 0.01, "loss": 2.0927, "step": 10956 }, { "epoch": 1.124692118226601, "grad_norm": 0.04545191302895546, "learning_rate": 0.01, "loss": 2.0847, "step": 10959 }, { "epoch": 1.125, "grad_norm": 0.10103368014097214, "learning_rate": 0.01, "loss": 2.105, "step": 10962 }, { "epoch": 1.125307881773399, "grad_norm": 0.05956938862800598, "learning_rate": 0.01, "loss": 2.1043, "step": 10965 }, { "epoch": 1.125615763546798, "grad_norm": 0.048797741532325745, "learning_rate": 0.01, "loss": 2.1044, "step": 10968 }, { "epoch": 1.125923645320197, "grad_norm": 0.041901495307683945, "learning_rate": 0.01, "loss": 2.0847, "step": 10971 }, { "epoch": 1.126231527093596, "grad_norm": 0.14950989186763763, "learning_rate": 0.01, "loss": 2.0919, "step": 10974 }, { "epoch": 1.126539408866995, "grad_norm": 0.049760669469833374, "learning_rate": 0.01, "loss": 2.0707, "step": 10977 }, { "epoch": 1.1268472906403941, "grad_norm": 0.07016187906265259, "learning_rate": 0.01, "loss": 2.0709, "step": 10980 }, { "epoch": 1.1271551724137931, "grad_norm": 0.057528458535671234, "learning_rate": 0.01, "loss": 2.0759, "step": 10983 }, { "epoch": 1.1274630541871922, "grad_norm": 0.06690733879804611, "learning_rate": 0.01, "loss": 2.102, "step": 10986 }, { "epoch": 1.1277709359605912, "grad_norm": 0.05225450173020363, "learning_rate": 0.01, "loss": 2.0675, "step": 10989 }, { "epoch": 1.1280788177339902, "grad_norm": 0.048363544046878815, "learning_rate": 0.01, "loss": 2.0634, "step": 10992 }, { "epoch": 1.1283866995073892, "grad_norm": 0.05356382206082344, "learning_rate": 0.01, "loss": 2.1003, "step": 10995 }, { "epoch": 1.1286945812807883, "grad_norm": 0.06921149045228958, "learning_rate": 0.01, "loss": 2.0934, "step": 10998 }, { "epoch": 1.1290024630541873, "grad_norm": 0.04210525006055832, "learning_rate": 0.01, "loss": 2.096, "step": 11001 }, { "epoch": 1.1293103448275863, "grad_norm": 0.11790584027767181, "learning_rate": 0.01, "loss": 2.0746, "step": 11004 }, { "epoch": 1.1296182266009853, "grad_norm": 0.08045307546854019, "learning_rate": 0.01, "loss": 2.0929, "step": 11007 }, { "epoch": 1.1299261083743843, "grad_norm": 0.10474243015050888, "learning_rate": 0.01, "loss": 2.1418, "step": 11010 }, { "epoch": 1.1302339901477834, "grad_norm": 0.06073759123682976, "learning_rate": 0.01, "loss": 2.1089, "step": 11013 }, { "epoch": 1.1305418719211824, "grad_norm": 0.057685475796461105, "learning_rate": 0.01, "loss": 2.0959, "step": 11016 }, { "epoch": 1.1308497536945814, "grad_norm": 0.04218476638197899, "learning_rate": 0.01, "loss": 2.0834, "step": 11019 }, { "epoch": 1.1311576354679802, "grad_norm": 0.04814853519201279, "learning_rate": 0.01, "loss": 2.1134, "step": 11022 }, { "epoch": 1.1314655172413792, "grad_norm": 0.1344536989927292, "learning_rate": 0.01, "loss": 2.1121, "step": 11025 }, { "epoch": 1.1317733990147782, "grad_norm": 0.057088855654001236, "learning_rate": 0.01, "loss": 2.0978, "step": 11028 }, { "epoch": 1.1320812807881773, "grad_norm": 0.04567364603281021, "learning_rate": 0.01, "loss": 2.0837, "step": 11031 }, { "epoch": 1.1323891625615763, "grad_norm": 0.07506916671991348, "learning_rate": 0.01, "loss": 2.0926, "step": 11034 }, { "epoch": 1.1326970443349753, "grad_norm": 0.05837171897292137, "learning_rate": 0.01, "loss": 2.0961, "step": 11037 }, { "epoch": 1.1330049261083743, "grad_norm": 0.0457015223801136, "learning_rate": 0.01, "loss": 2.101, "step": 11040 }, { "epoch": 1.1333128078817734, "grad_norm": 0.061310991644859314, "learning_rate": 0.01, "loss": 2.1128, "step": 11043 }, { "epoch": 1.1336206896551724, "grad_norm": 0.05517786741256714, "learning_rate": 0.01, "loss": 2.0844, "step": 11046 }, { "epoch": 1.1339285714285714, "grad_norm": 0.07835637778043747, "learning_rate": 0.01, "loss": 2.0996, "step": 11049 }, { "epoch": 1.1342364532019704, "grad_norm": 0.05821641907095909, "learning_rate": 0.01, "loss": 2.1074, "step": 11052 }, { "epoch": 1.1345443349753694, "grad_norm": 0.04394884407520294, "learning_rate": 0.01, "loss": 2.0799, "step": 11055 }, { "epoch": 1.1348522167487685, "grad_norm": 0.05148720741271973, "learning_rate": 0.01, "loss": 2.0856, "step": 11058 }, { "epoch": 1.1351600985221675, "grad_norm": 0.05766841769218445, "learning_rate": 0.01, "loss": 2.0973, "step": 11061 }, { "epoch": 1.1354679802955665, "grad_norm": 0.09894710779190063, "learning_rate": 0.01, "loss": 2.0831, "step": 11064 }, { "epoch": 1.1357758620689655, "grad_norm": 0.11916875094175339, "learning_rate": 0.01, "loss": 2.1044, "step": 11067 }, { "epoch": 1.1360837438423645, "grad_norm": 0.03926829248666763, "learning_rate": 0.01, "loss": 2.0866, "step": 11070 }, { "epoch": 1.1363916256157636, "grad_norm": 0.05105220153927803, "learning_rate": 0.01, "loss": 2.0911, "step": 11073 }, { "epoch": 1.1366995073891626, "grad_norm": 0.04516123607754707, "learning_rate": 0.01, "loss": 2.0693, "step": 11076 }, { "epoch": 1.1370073891625616, "grad_norm": 0.046173594892024994, "learning_rate": 0.01, "loss": 2.092, "step": 11079 }, { "epoch": 1.1373152709359606, "grad_norm": 0.05173357576131821, "learning_rate": 0.01, "loss": 2.1007, "step": 11082 }, { "epoch": 1.1376231527093597, "grad_norm": 0.06486919522285461, "learning_rate": 0.01, "loss": 2.0775, "step": 11085 }, { "epoch": 1.1379310344827587, "grad_norm": 0.09763675928115845, "learning_rate": 0.01, "loss": 2.0942, "step": 11088 }, { "epoch": 1.1382389162561577, "grad_norm": 0.1281820833683014, "learning_rate": 0.01, "loss": 2.0903, "step": 11091 }, { "epoch": 1.1385467980295567, "grad_norm": 0.05734977498650551, "learning_rate": 0.01, "loss": 2.0891, "step": 11094 }, { "epoch": 1.1388546798029557, "grad_norm": 0.06809762120246887, "learning_rate": 0.01, "loss": 2.0855, "step": 11097 }, { "epoch": 1.1391625615763548, "grad_norm": 0.05105281621217728, "learning_rate": 0.01, "loss": 2.0975, "step": 11100 }, { "epoch": 1.1394704433497538, "grad_norm": 0.07381090521812439, "learning_rate": 0.01, "loss": 2.0762, "step": 11103 }, { "epoch": 1.1397783251231528, "grad_norm": 0.050722070038318634, "learning_rate": 0.01, "loss": 2.0778, "step": 11106 }, { "epoch": 1.1400862068965516, "grad_norm": 0.03850618377327919, "learning_rate": 0.01, "loss": 2.0626, "step": 11109 }, { "epoch": 1.1403940886699506, "grad_norm": 0.08264841884374619, "learning_rate": 0.01, "loss": 2.1258, "step": 11112 }, { "epoch": 1.1407019704433496, "grad_norm": 0.06493505835533142, "learning_rate": 0.01, "loss": 2.0961, "step": 11115 }, { "epoch": 1.1410098522167487, "grad_norm": 0.06895186007022858, "learning_rate": 0.01, "loss": 2.1207, "step": 11118 }, { "epoch": 1.1413177339901477, "grad_norm": 0.042232003062963486, "learning_rate": 0.01, "loss": 2.0724, "step": 11121 }, { "epoch": 1.1416256157635467, "grad_norm": 0.10296539217233658, "learning_rate": 0.01, "loss": 2.1126, "step": 11124 }, { "epoch": 1.1419334975369457, "grad_norm": 0.043095991015434265, "learning_rate": 0.01, "loss": 2.0928, "step": 11127 }, { "epoch": 1.1422413793103448, "grad_norm": 0.046020470559597015, "learning_rate": 0.01, "loss": 2.092, "step": 11130 }, { "epoch": 1.1425492610837438, "grad_norm": 0.04754204675555229, "learning_rate": 0.01, "loss": 2.126, "step": 11133 }, { "epoch": 1.1428571428571428, "grad_norm": 0.03732014447450638, "learning_rate": 0.01, "loss": 2.0637, "step": 11136 }, { "epoch": 1.1431650246305418, "grad_norm": 0.039080169051885605, "learning_rate": 0.01, "loss": 2.0877, "step": 11139 }, { "epoch": 1.1434729064039408, "grad_norm": 0.04575611278414726, "learning_rate": 0.01, "loss": 2.0646, "step": 11142 }, { "epoch": 1.1437807881773399, "grad_norm": 0.11764515191316605, "learning_rate": 0.01, "loss": 2.0964, "step": 11145 }, { "epoch": 1.1440886699507389, "grad_norm": 0.10087098181247711, "learning_rate": 0.01, "loss": 2.069, "step": 11148 }, { "epoch": 1.144396551724138, "grad_norm": 0.05462269112467766, "learning_rate": 0.01, "loss": 2.0902, "step": 11151 }, { "epoch": 1.144704433497537, "grad_norm": 0.06296168267726898, "learning_rate": 0.01, "loss": 2.1257, "step": 11154 }, { "epoch": 1.145012315270936, "grad_norm": 0.041026949882507324, "learning_rate": 0.01, "loss": 2.0672, "step": 11157 }, { "epoch": 1.145320197044335, "grad_norm": 0.05761269852519035, "learning_rate": 0.01, "loss": 2.0833, "step": 11160 }, { "epoch": 1.145628078817734, "grad_norm": 0.12491103261709213, "learning_rate": 0.01, "loss": 2.0681, "step": 11163 }, { "epoch": 1.145935960591133, "grad_norm": 0.09269531071186066, "learning_rate": 0.01, "loss": 2.0618, "step": 11166 }, { "epoch": 1.146243842364532, "grad_norm": 0.05623659864068031, "learning_rate": 0.01, "loss": 2.0861, "step": 11169 }, { "epoch": 1.146551724137931, "grad_norm": 0.04075248911976814, "learning_rate": 0.01, "loss": 2.0513, "step": 11172 }, { "epoch": 1.14685960591133, "grad_norm": 0.04557061940431595, "learning_rate": 0.01, "loss": 2.0876, "step": 11175 }, { "epoch": 1.147167487684729, "grad_norm": 0.05686535686254501, "learning_rate": 0.01, "loss": 2.0746, "step": 11178 }, { "epoch": 1.1474753694581281, "grad_norm": 0.04164785146713257, "learning_rate": 0.01, "loss": 2.0684, "step": 11181 }, { "epoch": 1.1477832512315271, "grad_norm": 0.05453825742006302, "learning_rate": 0.01, "loss": 2.0809, "step": 11184 }, { "epoch": 1.1480911330049262, "grad_norm": 0.15215769410133362, "learning_rate": 0.01, "loss": 2.0806, "step": 11187 }, { "epoch": 1.1483990147783252, "grad_norm": 0.14634385704994202, "learning_rate": 0.01, "loss": 2.11, "step": 11190 }, { "epoch": 1.1487068965517242, "grad_norm": 0.06655893474817276, "learning_rate": 0.01, "loss": 2.0953, "step": 11193 }, { "epoch": 1.1490147783251232, "grad_norm": 0.07074826955795288, "learning_rate": 0.01, "loss": 2.0984, "step": 11196 }, { "epoch": 1.1493226600985222, "grad_norm": 0.044581200927495956, "learning_rate": 0.01, "loss": 2.0816, "step": 11199 }, { "epoch": 1.1496305418719213, "grad_norm": 0.06769565492868423, "learning_rate": 0.01, "loss": 2.0809, "step": 11202 }, { "epoch": 1.1499384236453203, "grad_norm": 0.11437363177537918, "learning_rate": 0.01, "loss": 2.0761, "step": 11205 }, { "epoch": 1.1502463054187193, "grad_norm": 0.040699586272239685, "learning_rate": 0.01, "loss": 2.1028, "step": 11208 }, { "epoch": 1.1505541871921183, "grad_norm": 0.06996385753154755, "learning_rate": 0.01, "loss": 2.0755, "step": 11211 }, { "epoch": 1.1508620689655173, "grad_norm": 0.04621044918894768, "learning_rate": 0.01, "loss": 2.11, "step": 11214 }, { "epoch": 1.1511699507389164, "grad_norm": 0.07714984565973282, "learning_rate": 0.01, "loss": 2.1008, "step": 11217 }, { "epoch": 1.1514778325123154, "grad_norm": 0.11194620281457901, "learning_rate": 0.01, "loss": 2.093, "step": 11220 }, { "epoch": 1.1517857142857142, "grad_norm": 0.08710391074419022, "learning_rate": 0.01, "loss": 2.1017, "step": 11223 }, { "epoch": 1.1520935960591132, "grad_norm": 0.06653989106416702, "learning_rate": 0.01, "loss": 2.1084, "step": 11226 }, { "epoch": 1.1524014778325122, "grad_norm": 0.056591540575027466, "learning_rate": 0.01, "loss": 2.107, "step": 11229 }, { "epoch": 1.1527093596059113, "grad_norm": 0.056475669145584106, "learning_rate": 0.01, "loss": 2.0774, "step": 11232 }, { "epoch": 1.1530172413793103, "grad_norm": 0.08408259600400925, "learning_rate": 0.01, "loss": 2.1193, "step": 11235 }, { "epoch": 1.1533251231527093, "grad_norm": 0.06853178143501282, "learning_rate": 0.01, "loss": 2.0796, "step": 11238 }, { "epoch": 1.1536330049261083, "grad_norm": 0.11699818074703217, "learning_rate": 0.01, "loss": 2.0932, "step": 11241 }, { "epoch": 1.1539408866995073, "grad_norm": 0.06365542113780975, "learning_rate": 0.01, "loss": 2.079, "step": 11244 }, { "epoch": 1.1542487684729064, "grad_norm": 0.040505316108465195, "learning_rate": 0.01, "loss": 2.0688, "step": 11247 }, { "epoch": 1.1545566502463054, "grad_norm": 0.06958888471126556, "learning_rate": 0.01, "loss": 2.0882, "step": 11250 }, { "epoch": 1.1548645320197044, "grad_norm": 0.08968232572078705, "learning_rate": 0.01, "loss": 2.064, "step": 11253 }, { "epoch": 1.1551724137931034, "grad_norm": 0.05143412947654724, "learning_rate": 0.01, "loss": 2.0682, "step": 11256 }, { "epoch": 1.1554802955665024, "grad_norm": 0.1219927966594696, "learning_rate": 0.01, "loss": 2.1038, "step": 11259 }, { "epoch": 1.1557881773399015, "grad_norm": 0.11974184960126877, "learning_rate": 0.01, "loss": 2.1099, "step": 11262 }, { "epoch": 1.1560960591133005, "grad_norm": 0.09557066112756729, "learning_rate": 0.01, "loss": 2.0976, "step": 11265 }, { "epoch": 1.1564039408866995, "grad_norm": 0.10169565677642822, "learning_rate": 0.01, "loss": 2.0904, "step": 11268 }, { "epoch": 1.1567118226600985, "grad_norm": 0.0586596317589283, "learning_rate": 0.01, "loss": 2.0787, "step": 11271 }, { "epoch": 1.1570197044334976, "grad_norm": 0.0423048697412014, "learning_rate": 0.01, "loss": 2.1084, "step": 11274 }, { "epoch": 1.1573275862068966, "grad_norm": 0.07394707947969437, "learning_rate": 0.01, "loss": 2.0802, "step": 11277 }, { "epoch": 1.1576354679802956, "grad_norm": 0.05507487431168556, "learning_rate": 0.01, "loss": 2.0777, "step": 11280 }, { "epoch": 1.1579433497536946, "grad_norm": 0.03251083940267563, "learning_rate": 0.01, "loss": 2.1128, "step": 11283 }, { "epoch": 1.1582512315270936, "grad_norm": 0.04480468109250069, "learning_rate": 0.01, "loss": 2.1162, "step": 11286 }, { "epoch": 1.1585591133004927, "grad_norm": 0.08275749534368515, "learning_rate": 0.01, "loss": 2.0808, "step": 11289 }, { "epoch": 1.1588669950738917, "grad_norm": 0.08199379593133926, "learning_rate": 0.01, "loss": 2.1006, "step": 11292 }, { "epoch": 1.1591748768472907, "grad_norm": 0.1015235111117363, "learning_rate": 0.01, "loss": 2.0926, "step": 11295 }, { "epoch": 1.1594827586206897, "grad_norm": 0.08872919529676437, "learning_rate": 0.01, "loss": 2.0739, "step": 11298 }, { "epoch": 1.1597906403940887, "grad_norm": 0.04640579596161842, "learning_rate": 0.01, "loss": 2.09, "step": 11301 }, { "epoch": 1.1600985221674878, "grad_norm": 0.044142261147499084, "learning_rate": 0.01, "loss": 2.115, "step": 11304 }, { "epoch": 1.1604064039408868, "grad_norm": 0.1030762568116188, "learning_rate": 0.01, "loss": 2.1358, "step": 11307 }, { "epoch": 1.1607142857142858, "grad_norm": 0.06712359189987183, "learning_rate": 0.01, "loss": 2.0504, "step": 11310 }, { "epoch": 1.1610221674876846, "grad_norm": 0.05579240992665291, "learning_rate": 0.01, "loss": 2.0915, "step": 11313 }, { "epoch": 1.1613300492610836, "grad_norm": 0.04237228259444237, "learning_rate": 0.01, "loss": 2.0514, "step": 11316 }, { "epoch": 1.1616379310344827, "grad_norm": 0.08990642428398132, "learning_rate": 0.01, "loss": 2.0753, "step": 11319 }, { "epoch": 1.1619458128078817, "grad_norm": 0.09788185358047485, "learning_rate": 0.01, "loss": 2.0907, "step": 11322 }, { "epoch": 1.1622536945812807, "grad_norm": 0.07207074761390686, "learning_rate": 0.01, "loss": 2.0858, "step": 11325 }, { "epoch": 1.1625615763546797, "grad_norm": 0.07704904675483704, "learning_rate": 0.01, "loss": 2.0938, "step": 11328 }, { "epoch": 1.1628694581280787, "grad_norm": 0.07003269344568253, "learning_rate": 0.01, "loss": 2.1031, "step": 11331 }, { "epoch": 1.1631773399014778, "grad_norm": 0.05584646388888359, "learning_rate": 0.01, "loss": 2.0852, "step": 11334 }, { "epoch": 1.1634852216748768, "grad_norm": 0.10223853588104248, "learning_rate": 0.01, "loss": 2.072, "step": 11337 }, { "epoch": 1.1637931034482758, "grad_norm": 0.13336369395256042, "learning_rate": 0.01, "loss": 2.0902, "step": 11340 }, { "epoch": 1.1641009852216748, "grad_norm": 0.04471458122134209, "learning_rate": 0.01, "loss": 2.0884, "step": 11343 }, { "epoch": 1.1644088669950738, "grad_norm": 0.04342315346002579, "learning_rate": 0.01, "loss": 2.0885, "step": 11346 }, { "epoch": 1.1647167487684729, "grad_norm": 0.05172869563102722, "learning_rate": 0.01, "loss": 2.069, "step": 11349 }, { "epoch": 1.1650246305418719, "grad_norm": 0.04304314777255058, "learning_rate": 0.01, "loss": 2.0915, "step": 11352 }, { "epoch": 1.165332512315271, "grad_norm": 0.14592792093753815, "learning_rate": 0.01, "loss": 2.0841, "step": 11355 }, { "epoch": 1.16564039408867, "grad_norm": 0.05238402634859085, "learning_rate": 0.01, "loss": 2.0772, "step": 11358 }, { "epoch": 1.165948275862069, "grad_norm": 0.04800669103860855, "learning_rate": 0.01, "loss": 2.0739, "step": 11361 }, { "epoch": 1.166256157635468, "grad_norm": 0.06648313999176025, "learning_rate": 0.01, "loss": 2.0956, "step": 11364 }, { "epoch": 1.166564039408867, "grad_norm": 0.03402326628565788, "learning_rate": 0.01, "loss": 2.0847, "step": 11367 }, { "epoch": 1.166871921182266, "grad_norm": 0.05076766759157181, "learning_rate": 0.01, "loss": 2.0833, "step": 11370 }, { "epoch": 1.167179802955665, "grad_norm": 0.07221470773220062, "learning_rate": 0.01, "loss": 2.0778, "step": 11373 }, { "epoch": 1.167487684729064, "grad_norm": 0.04556736722588539, "learning_rate": 0.01, "loss": 2.0957, "step": 11376 }, { "epoch": 1.167795566502463, "grad_norm": 0.03702834993600845, "learning_rate": 0.01, "loss": 2.0843, "step": 11379 }, { "epoch": 1.168103448275862, "grad_norm": 0.046527571976184845, "learning_rate": 0.01, "loss": 2.084, "step": 11382 }, { "epoch": 1.1684113300492611, "grad_norm": 0.09520363062620163, "learning_rate": 0.01, "loss": 2.0924, "step": 11385 }, { "epoch": 1.1687192118226601, "grad_norm": 0.12759263813495636, "learning_rate": 0.01, "loss": 2.1265, "step": 11388 }, { "epoch": 1.1690270935960592, "grad_norm": 0.03981192037463188, "learning_rate": 0.01, "loss": 2.076, "step": 11391 }, { "epoch": 1.1693349753694582, "grad_norm": 0.04739897698163986, "learning_rate": 0.01, "loss": 2.1013, "step": 11394 }, { "epoch": 1.1696428571428572, "grad_norm": 0.04937390610575676, "learning_rate": 0.01, "loss": 2.0832, "step": 11397 }, { "epoch": 1.1699507389162562, "grad_norm": 0.07097122073173523, "learning_rate": 0.01, "loss": 2.0699, "step": 11400 }, { "epoch": 1.1702586206896552, "grad_norm": 0.07773783802986145, "learning_rate": 0.01, "loss": 2.1215, "step": 11403 }, { "epoch": 1.1705665024630543, "grad_norm": 0.04591994732618332, "learning_rate": 0.01, "loss": 2.0657, "step": 11406 }, { "epoch": 1.1708743842364533, "grad_norm": 0.08724237233400345, "learning_rate": 0.01, "loss": 2.0817, "step": 11409 }, { "epoch": 1.1711822660098523, "grad_norm": 0.06528041511774063, "learning_rate": 0.01, "loss": 2.0962, "step": 11412 }, { "epoch": 1.1714901477832513, "grad_norm": 0.0660424679517746, "learning_rate": 0.01, "loss": 2.0904, "step": 11415 }, { "epoch": 1.1717980295566504, "grad_norm": 0.08304266631603241, "learning_rate": 0.01, "loss": 2.0912, "step": 11418 }, { "epoch": 1.1721059113300494, "grad_norm": 0.05073266103863716, "learning_rate": 0.01, "loss": 2.0747, "step": 11421 }, { "epoch": 1.1724137931034484, "grad_norm": 0.07305736094713211, "learning_rate": 0.01, "loss": 2.0852, "step": 11424 }, { "epoch": 1.1727216748768472, "grad_norm": 0.09365279227495193, "learning_rate": 0.01, "loss": 2.0883, "step": 11427 }, { "epoch": 1.1730295566502462, "grad_norm": 0.12279067188501358, "learning_rate": 0.01, "loss": 2.0999, "step": 11430 }, { "epoch": 1.1733374384236452, "grad_norm": 0.0589769072830677, "learning_rate": 0.01, "loss": 2.0948, "step": 11433 }, { "epoch": 1.1736453201970443, "grad_norm": 0.06621567159891129, "learning_rate": 0.01, "loss": 2.0823, "step": 11436 }, { "epoch": 1.1739532019704433, "grad_norm": 0.051341816782951355, "learning_rate": 0.01, "loss": 2.0622, "step": 11439 }, { "epoch": 1.1742610837438423, "grad_norm": 0.06027314066886902, "learning_rate": 0.01, "loss": 2.0798, "step": 11442 }, { "epoch": 1.1745689655172413, "grad_norm": 0.10131573677062988, "learning_rate": 0.01, "loss": 2.0882, "step": 11445 }, { "epoch": 1.1748768472906403, "grad_norm": 0.08082377910614014, "learning_rate": 0.01, "loss": 2.0949, "step": 11448 }, { "epoch": 1.1751847290640394, "grad_norm": 0.07095243781805038, "learning_rate": 0.01, "loss": 2.0655, "step": 11451 }, { "epoch": 1.1754926108374384, "grad_norm": 0.07132910192012787, "learning_rate": 0.01, "loss": 2.0903, "step": 11454 }, { "epoch": 1.1758004926108374, "grad_norm": 0.10488838702440262, "learning_rate": 0.01, "loss": 2.0639, "step": 11457 }, { "epoch": 1.1761083743842364, "grad_norm": 0.12755680084228516, "learning_rate": 0.01, "loss": 2.0989, "step": 11460 }, { "epoch": 1.1764162561576355, "grad_norm": 0.12174911797046661, "learning_rate": 0.01, "loss": 2.1026, "step": 11463 }, { "epoch": 1.1767241379310345, "grad_norm": 0.07873964309692383, "learning_rate": 0.01, "loss": 2.0908, "step": 11466 }, { "epoch": 1.1770320197044335, "grad_norm": 0.04275409132242203, "learning_rate": 0.01, "loss": 2.0899, "step": 11469 }, { "epoch": 1.1773399014778325, "grad_norm": 0.046134103089571, "learning_rate": 0.01, "loss": 2.1064, "step": 11472 }, { "epoch": 1.1776477832512315, "grad_norm": 0.07631804049015045, "learning_rate": 0.01, "loss": 2.0811, "step": 11475 }, { "epoch": 1.1779556650246306, "grad_norm": 0.04843062162399292, "learning_rate": 0.01, "loss": 2.1078, "step": 11478 }, { "epoch": 1.1782635467980296, "grad_norm": 0.04664747416973114, "learning_rate": 0.01, "loss": 2.0807, "step": 11481 }, { "epoch": 1.1785714285714286, "grad_norm": 0.042328983545303345, "learning_rate": 0.01, "loss": 2.0898, "step": 11484 }, { "epoch": 1.1788793103448276, "grad_norm": 0.04443054646253586, "learning_rate": 0.01, "loss": 2.092, "step": 11487 }, { "epoch": 1.1791871921182266, "grad_norm": 0.03439139202237129, "learning_rate": 0.01, "loss": 2.0438, "step": 11490 }, { "epoch": 1.1794950738916257, "grad_norm": 0.1651001274585724, "learning_rate": 0.01, "loss": 2.1068, "step": 11493 }, { "epoch": 1.1798029556650247, "grad_norm": 0.04535198211669922, "learning_rate": 0.01, "loss": 2.0951, "step": 11496 }, { "epoch": 1.1801108374384237, "grad_norm": 0.04346736520528793, "learning_rate": 0.01, "loss": 2.1032, "step": 11499 }, { "epoch": 1.1804187192118227, "grad_norm": 0.08131600171327591, "learning_rate": 0.01, "loss": 2.0709, "step": 11502 }, { "epoch": 1.1807266009852218, "grad_norm": 0.0638989582657814, "learning_rate": 0.01, "loss": 2.0676, "step": 11505 }, { "epoch": 1.1810344827586208, "grad_norm": 0.06305757910013199, "learning_rate": 0.01, "loss": 2.0636, "step": 11508 }, { "epoch": 1.1813423645320198, "grad_norm": 0.048068735748529434, "learning_rate": 0.01, "loss": 2.0803, "step": 11511 }, { "epoch": 1.1816502463054186, "grad_norm": 0.1859566867351532, "learning_rate": 0.01, "loss": 2.1206, "step": 11514 }, { "epoch": 1.1819581280788176, "grad_norm": 0.13449640572071075, "learning_rate": 0.01, "loss": 2.1047, "step": 11517 }, { "epoch": 1.1822660098522166, "grad_norm": 0.09624927490949631, "learning_rate": 0.01, "loss": 2.1338, "step": 11520 }, { "epoch": 1.1825738916256157, "grad_norm": 0.04823729023337364, "learning_rate": 0.01, "loss": 2.1485, "step": 11523 }, { "epoch": 1.1828817733990147, "grad_norm": 0.04005538672208786, "learning_rate": 0.01, "loss": 2.1062, "step": 11526 }, { "epoch": 1.1831896551724137, "grad_norm": 0.035647980868816376, "learning_rate": 0.01, "loss": 2.1002, "step": 11529 }, { "epoch": 1.1834975369458127, "grad_norm": 0.03485687077045441, "learning_rate": 0.01, "loss": 2.085, "step": 11532 }, { "epoch": 1.1838054187192117, "grad_norm": 0.03271855041384697, "learning_rate": 0.01, "loss": 2.1083, "step": 11535 }, { "epoch": 1.1841133004926108, "grad_norm": 0.14434824883937836, "learning_rate": 0.01, "loss": 2.0917, "step": 11538 }, { "epoch": 1.1844211822660098, "grad_norm": 0.16373054683208466, "learning_rate": 0.01, "loss": 2.0889, "step": 11541 }, { "epoch": 1.1847290640394088, "grad_norm": 0.1128426045179367, "learning_rate": 0.01, "loss": 2.0605, "step": 11544 }, { "epoch": 1.1850369458128078, "grad_norm": 0.03807492554187775, "learning_rate": 0.01, "loss": 2.0792, "step": 11547 }, { "epoch": 1.1853448275862069, "grad_norm": 0.0678490698337555, "learning_rate": 0.01, "loss": 2.072, "step": 11550 }, { "epoch": 1.1856527093596059, "grad_norm": 0.04688851907849312, "learning_rate": 0.01, "loss": 2.0648, "step": 11553 }, { "epoch": 1.185960591133005, "grad_norm": 0.03682132810354233, "learning_rate": 0.01, "loss": 2.096, "step": 11556 }, { "epoch": 1.186268472906404, "grad_norm": 0.0797944962978363, "learning_rate": 0.01, "loss": 2.0632, "step": 11559 }, { "epoch": 1.186576354679803, "grad_norm": 0.09593506157398224, "learning_rate": 0.01, "loss": 2.0923, "step": 11562 }, { "epoch": 1.186884236453202, "grad_norm": 0.10455387830734253, "learning_rate": 0.01, "loss": 2.0927, "step": 11565 }, { "epoch": 1.187192118226601, "grad_norm": 0.05642642080783844, "learning_rate": 0.01, "loss": 2.0688, "step": 11568 }, { "epoch": 1.1875, "grad_norm": 0.09467128664255142, "learning_rate": 0.01, "loss": 2.0561, "step": 11571 }, { "epoch": 1.187807881773399, "grad_norm": 0.061598166823387146, "learning_rate": 0.01, "loss": 2.0666, "step": 11574 }, { "epoch": 1.188115763546798, "grad_norm": 0.0875246673822403, "learning_rate": 0.01, "loss": 2.0753, "step": 11577 }, { "epoch": 1.188423645320197, "grad_norm": 0.05889583006501198, "learning_rate": 0.01, "loss": 2.0705, "step": 11580 }, { "epoch": 1.188731527093596, "grad_norm": 0.0796559602022171, "learning_rate": 0.01, "loss": 2.0939, "step": 11583 }, { "epoch": 1.189039408866995, "grad_norm": 0.04127117991447449, "learning_rate": 0.01, "loss": 2.0905, "step": 11586 }, { "epoch": 1.1893472906403941, "grad_norm": 0.06161842495203018, "learning_rate": 0.01, "loss": 2.0954, "step": 11589 }, { "epoch": 1.1896551724137931, "grad_norm": 0.05344879627227783, "learning_rate": 0.01, "loss": 2.0808, "step": 11592 }, { "epoch": 1.1899630541871922, "grad_norm": 0.03660701587796211, "learning_rate": 0.01, "loss": 2.0748, "step": 11595 }, { "epoch": 1.1902709359605912, "grad_norm": 0.04792351648211479, "learning_rate": 0.01, "loss": 2.0833, "step": 11598 }, { "epoch": 1.1905788177339902, "grad_norm": 0.04336618259549141, "learning_rate": 0.01, "loss": 2.1017, "step": 11601 }, { "epoch": 1.1908866995073892, "grad_norm": 0.0654226765036583, "learning_rate": 0.01, "loss": 2.0922, "step": 11604 }, { "epoch": 1.1911945812807883, "grad_norm": 0.08879897743463516, "learning_rate": 0.01, "loss": 2.0842, "step": 11607 }, { "epoch": 1.1915024630541873, "grad_norm": 0.15568268299102783, "learning_rate": 0.01, "loss": 2.0947, "step": 11610 }, { "epoch": 1.1918103448275863, "grad_norm": 0.11712448298931122, "learning_rate": 0.01, "loss": 2.0955, "step": 11613 }, { "epoch": 1.1921182266009853, "grad_norm": 0.04966702312231064, "learning_rate": 0.01, "loss": 2.1093, "step": 11616 }, { "epoch": 1.1924261083743843, "grad_norm": 0.04304838925600052, "learning_rate": 0.01, "loss": 2.0828, "step": 11619 }, { "epoch": 1.1927339901477834, "grad_norm": 0.04981999844312668, "learning_rate": 0.01, "loss": 2.0964, "step": 11622 }, { "epoch": 1.1930418719211824, "grad_norm": 0.045383159071207047, "learning_rate": 0.01, "loss": 2.0967, "step": 11625 }, { "epoch": 1.1933497536945814, "grad_norm": 0.0348484069108963, "learning_rate": 0.01, "loss": 2.0685, "step": 11628 }, { "epoch": 1.1936576354679802, "grad_norm": 0.04802081733942032, "learning_rate": 0.01, "loss": 2.0904, "step": 11631 }, { "epoch": 1.1939655172413792, "grad_norm": 0.0615711510181427, "learning_rate": 0.01, "loss": 2.1153, "step": 11634 }, { "epoch": 1.1942733990147782, "grad_norm": 0.15608100593090057, "learning_rate": 0.01, "loss": 2.0753, "step": 11637 }, { "epoch": 1.1945812807881773, "grad_norm": 0.10449741780757904, "learning_rate": 0.01, "loss": 2.1083, "step": 11640 }, { "epoch": 1.1948891625615763, "grad_norm": 0.062145963311195374, "learning_rate": 0.01, "loss": 2.1104, "step": 11643 }, { "epoch": 1.1951970443349753, "grad_norm": 0.04744469001889229, "learning_rate": 0.01, "loss": 2.0742, "step": 11646 }, { "epoch": 1.1955049261083743, "grad_norm": 0.036814477294683456, "learning_rate": 0.01, "loss": 2.0641, "step": 11649 }, { "epoch": 1.1958128078817734, "grad_norm": 0.037870246917009354, "learning_rate": 0.01, "loss": 2.0906, "step": 11652 }, { "epoch": 1.1961206896551724, "grad_norm": 0.17372412979602814, "learning_rate": 0.01, "loss": 2.1027, "step": 11655 }, { "epoch": 1.1964285714285714, "grad_norm": 0.04681265726685524, "learning_rate": 0.01, "loss": 2.1029, "step": 11658 }, { "epoch": 1.1967364532019704, "grad_norm": 0.058284103870391846, "learning_rate": 0.01, "loss": 2.0829, "step": 11661 }, { "epoch": 1.1970443349753694, "grad_norm": 0.07531574368476868, "learning_rate": 0.01, "loss": 2.0602, "step": 11664 }, { "epoch": 1.1973522167487685, "grad_norm": 0.053437430411577225, "learning_rate": 0.01, "loss": 2.0726, "step": 11667 }, { "epoch": 1.1976600985221675, "grad_norm": 0.047438427805900574, "learning_rate": 0.01, "loss": 2.107, "step": 11670 }, { "epoch": 1.1979679802955665, "grad_norm": 0.04474404826760292, "learning_rate": 0.01, "loss": 2.08, "step": 11673 }, { "epoch": 1.1982758620689655, "grad_norm": 0.15452256798744202, "learning_rate": 0.01, "loss": 2.0788, "step": 11676 }, { "epoch": 1.1985837438423645, "grad_norm": 0.05446213111281395, "learning_rate": 0.01, "loss": 2.0696, "step": 11679 }, { "epoch": 1.1988916256157636, "grad_norm": 0.059178926050662994, "learning_rate": 0.01, "loss": 2.0867, "step": 11682 }, { "epoch": 1.1991995073891626, "grad_norm": 0.05807918682694435, "learning_rate": 0.01, "loss": 2.0675, "step": 11685 }, { "epoch": 1.1995073891625616, "grad_norm": 0.046843890100717545, "learning_rate": 0.01, "loss": 2.0729, "step": 11688 }, { "epoch": 1.1998152709359606, "grad_norm": 0.042494870722293854, "learning_rate": 0.01, "loss": 2.079, "step": 11691 }, { "epoch": 1.2001231527093597, "grad_norm": 0.04506772756576538, "learning_rate": 0.01, "loss": 2.0862, "step": 11694 }, { "epoch": 1.2004310344827587, "grad_norm": 0.04237942770123482, "learning_rate": 0.01, "loss": 2.0712, "step": 11697 }, { "epoch": 1.2007389162561577, "grad_norm": 0.03488307446241379, "learning_rate": 0.01, "loss": 2.1051, "step": 11700 }, { "epoch": 1.2010467980295567, "grad_norm": 0.03693482652306557, "learning_rate": 0.01, "loss": 2.045, "step": 11703 }, { "epoch": 1.2013546798029557, "grad_norm": 0.10410935431718826, "learning_rate": 0.01, "loss": 2.0884, "step": 11706 }, { "epoch": 1.2016625615763548, "grad_norm": 0.11816459894180298, "learning_rate": 0.01, "loss": 2.0687, "step": 11709 }, { "epoch": 1.2019704433497538, "grad_norm": 0.06567800790071487, "learning_rate": 0.01, "loss": 2.063, "step": 11712 }, { "epoch": 1.2022783251231528, "grad_norm": 0.06639432907104492, "learning_rate": 0.01, "loss": 2.078, "step": 11715 }, { "epoch": 1.2025862068965516, "grad_norm": 0.05059380456805229, "learning_rate": 0.01, "loss": 2.1163, "step": 11718 }, { "epoch": 1.2028940886699506, "grad_norm": 0.04076917842030525, "learning_rate": 0.01, "loss": 2.0784, "step": 11721 }, { "epoch": 1.2032019704433496, "grad_norm": 0.05994633212685585, "learning_rate": 0.01, "loss": 2.0819, "step": 11724 }, { "epoch": 1.2035098522167487, "grad_norm": 0.05682201310992241, "learning_rate": 0.01, "loss": 2.0625, "step": 11727 }, { "epoch": 1.2038177339901477, "grad_norm": 0.05393010750412941, "learning_rate": 0.01, "loss": 2.072, "step": 11730 }, { "epoch": 1.2041256157635467, "grad_norm": 0.04697128012776375, "learning_rate": 0.01, "loss": 2.0796, "step": 11733 }, { "epoch": 1.2044334975369457, "grad_norm": 0.04945002868771553, "learning_rate": 0.01, "loss": 2.0666, "step": 11736 }, { "epoch": 1.2047413793103448, "grad_norm": 0.06519649177789688, "learning_rate": 0.01, "loss": 2.0873, "step": 11739 }, { "epoch": 1.2050492610837438, "grad_norm": 0.1188720241189003, "learning_rate": 0.01, "loss": 2.0967, "step": 11742 }, { "epoch": 1.2053571428571428, "grad_norm": 0.1045864149928093, "learning_rate": 0.01, "loss": 2.0834, "step": 11745 }, { "epoch": 1.2056650246305418, "grad_norm": 0.04561993479728699, "learning_rate": 0.01, "loss": 2.0872, "step": 11748 }, { "epoch": 1.2059729064039408, "grad_norm": 0.04972228407859802, "learning_rate": 0.01, "loss": 2.0599, "step": 11751 }, { "epoch": 1.2062807881773399, "grad_norm": 0.05342618376016617, "learning_rate": 0.01, "loss": 2.0606, "step": 11754 }, { "epoch": 1.2065886699507389, "grad_norm": 0.05637587606906891, "learning_rate": 0.01, "loss": 2.0836, "step": 11757 }, { "epoch": 1.206896551724138, "grad_norm": 0.11595457047224045, "learning_rate": 0.01, "loss": 2.0904, "step": 11760 }, { "epoch": 1.207204433497537, "grad_norm": 0.11803465336561203, "learning_rate": 0.01, "loss": 2.0741, "step": 11763 }, { "epoch": 1.207512315270936, "grad_norm": 0.045427508652210236, "learning_rate": 0.01, "loss": 2.0721, "step": 11766 }, { "epoch": 1.207820197044335, "grad_norm": 0.0365883894264698, "learning_rate": 0.01, "loss": 2.0599, "step": 11769 }, { "epoch": 1.208128078817734, "grad_norm": 0.03729262575507164, "learning_rate": 0.01, "loss": 2.087, "step": 11772 }, { "epoch": 1.208435960591133, "grad_norm": 0.05842882767319679, "learning_rate": 0.01, "loss": 2.0762, "step": 11775 }, { "epoch": 1.208743842364532, "grad_norm": 0.07687997072935104, "learning_rate": 0.01, "loss": 2.0691, "step": 11778 }, { "epoch": 1.209051724137931, "grad_norm": 0.06832735985517502, "learning_rate": 0.01, "loss": 2.0974, "step": 11781 }, { "epoch": 1.20935960591133, "grad_norm": 0.10200455039739609, "learning_rate": 0.01, "loss": 2.0866, "step": 11784 }, { "epoch": 1.209667487684729, "grad_norm": 0.10769661515951157, "learning_rate": 0.01, "loss": 2.0761, "step": 11787 }, { "epoch": 1.2099753694581281, "grad_norm": 0.12233126908540726, "learning_rate": 0.01, "loss": 2.0911, "step": 11790 }, { "epoch": 1.2102832512315271, "grad_norm": 0.046646762639284134, "learning_rate": 0.01, "loss": 2.0546, "step": 11793 }, { "epoch": 1.2105911330049262, "grad_norm": 0.030627859756350517, "learning_rate": 0.01, "loss": 2.0805, "step": 11796 }, { "epoch": 1.2108990147783252, "grad_norm": 0.03977693244814873, "learning_rate": 0.01, "loss": 2.0465, "step": 11799 }, { "epoch": 1.2112068965517242, "grad_norm": 0.06213162839412689, "learning_rate": 0.01, "loss": 2.1123, "step": 11802 }, { "epoch": 1.2115147783251232, "grad_norm": 0.04708317294716835, "learning_rate": 0.01, "loss": 2.1315, "step": 11805 }, { "epoch": 1.2118226600985222, "grad_norm": 0.10807790607213974, "learning_rate": 0.01, "loss": 2.0825, "step": 11808 }, { "epoch": 1.2121305418719213, "grad_norm": 0.08579280972480774, "learning_rate": 0.01, "loss": 2.1366, "step": 11811 }, { "epoch": 1.2124384236453203, "grad_norm": 0.05783751606941223, "learning_rate": 0.01, "loss": 2.0682, "step": 11814 }, { "epoch": 1.2127463054187193, "grad_norm": 0.04836808145046234, "learning_rate": 0.01, "loss": 2.0606, "step": 11817 }, { "epoch": 1.2130541871921183, "grad_norm": 0.04026191681623459, "learning_rate": 0.01, "loss": 2.0637, "step": 11820 }, { "epoch": 1.2133620689655173, "grad_norm": 0.03309273347258568, "learning_rate": 0.01, "loss": 2.08, "step": 11823 }, { "epoch": 1.2136699507389164, "grad_norm": 0.10611164569854736, "learning_rate": 0.01, "loss": 2.0787, "step": 11826 }, { "epoch": 1.2139778325123154, "grad_norm": 0.0817422866821289, "learning_rate": 0.01, "loss": 2.069, "step": 11829 }, { "epoch": 1.2142857142857142, "grad_norm": 0.05528125911951065, "learning_rate": 0.01, "loss": 2.0408, "step": 11832 }, { "epoch": 1.2145935960591132, "grad_norm": 0.05016999691724777, "learning_rate": 0.01, "loss": 2.0794, "step": 11835 }, { "epoch": 1.2149014778325122, "grad_norm": 0.06376414746046066, "learning_rate": 0.01, "loss": 2.1097, "step": 11838 }, { "epoch": 1.2152093596059113, "grad_norm": 0.06668904423713684, "learning_rate": 0.01, "loss": 2.0745, "step": 11841 }, { "epoch": 1.2155172413793103, "grad_norm": 0.046260587871074677, "learning_rate": 0.01, "loss": 2.0711, "step": 11844 }, { "epoch": 1.2158251231527093, "grad_norm": 0.039374321699142456, "learning_rate": 0.01, "loss": 2.087, "step": 11847 }, { "epoch": 1.2161330049261083, "grad_norm": 0.10993562638759613, "learning_rate": 0.01, "loss": 2.0615, "step": 11850 }, { "epoch": 1.2164408866995073, "grad_norm": 0.03676668182015419, "learning_rate": 0.01, "loss": 2.0717, "step": 11853 }, { "epoch": 1.2167487684729064, "grad_norm": 0.10777715593576431, "learning_rate": 0.01, "loss": 2.1016, "step": 11856 }, { "epoch": 1.2170566502463054, "grad_norm": 0.07948705554008484, "learning_rate": 0.01, "loss": 2.083, "step": 11859 }, { "epoch": 1.2173645320197044, "grad_norm": 0.11646637320518494, "learning_rate": 0.01, "loss": 2.0552, "step": 11862 }, { "epoch": 1.2176724137931034, "grad_norm": 0.07525186985731125, "learning_rate": 0.01, "loss": 2.0877, "step": 11865 }, { "epoch": 1.2179802955665024, "grad_norm": 0.048124101012945175, "learning_rate": 0.01, "loss": 2.0652, "step": 11868 }, { "epoch": 1.2182881773399015, "grad_norm": 0.04603361710906029, "learning_rate": 0.01, "loss": 2.0922, "step": 11871 }, { "epoch": 1.2185960591133005, "grad_norm": 0.07067687064409256, "learning_rate": 0.01, "loss": 2.0946, "step": 11874 }, { "epoch": 1.2189039408866995, "grad_norm": 0.0959327220916748, "learning_rate": 0.01, "loss": 2.096, "step": 11877 }, { "epoch": 1.2192118226600985, "grad_norm": 0.08565320819616318, "learning_rate": 0.01, "loss": 2.09, "step": 11880 }, { "epoch": 1.2195197044334976, "grad_norm": 0.06728377193212509, "learning_rate": 0.01, "loss": 2.0801, "step": 11883 }, { "epoch": 1.2198275862068966, "grad_norm": 0.03809618949890137, "learning_rate": 0.01, "loss": 2.0668, "step": 11886 }, { "epoch": 1.2201354679802956, "grad_norm": 0.049925826489925385, "learning_rate": 0.01, "loss": 2.0625, "step": 11889 }, { "epoch": 1.2204433497536946, "grad_norm": 0.05949478596448898, "learning_rate": 0.01, "loss": 2.0687, "step": 11892 }, { "epoch": 1.2207512315270936, "grad_norm": 0.08161807060241699, "learning_rate": 0.01, "loss": 2.0789, "step": 11895 }, { "epoch": 1.2210591133004927, "grad_norm": 0.05829952284693718, "learning_rate": 0.01, "loss": 2.0846, "step": 11898 }, { "epoch": 1.2213669950738917, "grad_norm": 0.05801619216799736, "learning_rate": 0.01, "loss": 2.1014, "step": 11901 }, { "epoch": 1.2216748768472907, "grad_norm": 0.04123099148273468, "learning_rate": 0.01, "loss": 2.0867, "step": 11904 }, { "epoch": 1.2219827586206897, "grad_norm": 0.05088057741522789, "learning_rate": 0.01, "loss": 2.0751, "step": 11907 }, { "epoch": 1.2222906403940887, "grad_norm": 0.07357197254896164, "learning_rate": 0.01, "loss": 2.1023, "step": 11910 }, { "epoch": 1.2225985221674878, "grad_norm": 0.060078103095293045, "learning_rate": 0.01, "loss": 2.0784, "step": 11913 }, { "epoch": 1.2229064039408868, "grad_norm": 0.12629617750644684, "learning_rate": 0.01, "loss": 2.0767, "step": 11916 }, { "epoch": 1.2232142857142858, "grad_norm": 0.07202067971229553, "learning_rate": 0.01, "loss": 2.0796, "step": 11919 }, { "epoch": 1.2235221674876846, "grad_norm": 0.06407934427261353, "learning_rate": 0.01, "loss": 2.0926, "step": 11922 }, { "epoch": 1.2238300492610836, "grad_norm": 0.053789231926202774, "learning_rate": 0.01, "loss": 2.092, "step": 11925 }, { "epoch": 1.2241379310344827, "grad_norm": 0.04130502790212631, "learning_rate": 0.01, "loss": 2.051, "step": 11928 }, { "epoch": 1.2244458128078817, "grad_norm": 0.05235166475176811, "learning_rate": 0.01, "loss": 2.0829, "step": 11931 }, { "epoch": 1.2247536945812807, "grad_norm": 0.04508119449019432, "learning_rate": 0.01, "loss": 2.0857, "step": 11934 }, { "epoch": 1.2250615763546797, "grad_norm": 0.03570512309670448, "learning_rate": 0.01, "loss": 2.0734, "step": 11937 }, { "epoch": 1.2253694581280787, "grad_norm": 0.04690218344330788, "learning_rate": 0.01, "loss": 2.0988, "step": 11940 }, { "epoch": 1.2256773399014778, "grad_norm": 0.10231764614582062, "learning_rate": 0.01, "loss": 2.0716, "step": 11943 }, { "epoch": 1.2259852216748768, "grad_norm": 0.05221893638372421, "learning_rate": 0.01, "loss": 2.0693, "step": 11946 }, { "epoch": 1.2262931034482758, "grad_norm": 0.0647406056523323, "learning_rate": 0.01, "loss": 2.0809, "step": 11949 }, { "epoch": 1.2266009852216748, "grad_norm": 0.06388009339570999, "learning_rate": 0.01, "loss": 2.0968, "step": 11952 }, { "epoch": 1.2269088669950738, "grad_norm": 0.06904192268848419, "learning_rate": 0.01, "loss": 2.0917, "step": 11955 }, { "epoch": 1.2272167487684729, "grad_norm": 0.08780385553836823, "learning_rate": 0.01, "loss": 2.0881, "step": 11958 }, { "epoch": 1.2275246305418719, "grad_norm": 0.037958092987537384, "learning_rate": 0.01, "loss": 2.0813, "step": 11961 }, { "epoch": 1.227832512315271, "grad_norm": 0.04035305231809616, "learning_rate": 0.01, "loss": 2.0718, "step": 11964 }, { "epoch": 1.22814039408867, "grad_norm": 0.056451354175806046, "learning_rate": 0.01, "loss": 2.0653, "step": 11967 }, { "epoch": 1.228448275862069, "grad_norm": 0.06248374283313751, "learning_rate": 0.01, "loss": 2.0794, "step": 11970 }, { "epoch": 1.228756157635468, "grad_norm": 0.05662978067994118, "learning_rate": 0.01, "loss": 2.0873, "step": 11973 }, { "epoch": 1.229064039408867, "grad_norm": 0.06416438519954681, "learning_rate": 0.01, "loss": 2.0933, "step": 11976 }, { "epoch": 1.229371921182266, "grad_norm": 0.04529969021677971, "learning_rate": 0.01, "loss": 2.0892, "step": 11979 }, { "epoch": 1.229679802955665, "grad_norm": 0.03636370226740837, "learning_rate": 0.01, "loss": 2.0968, "step": 11982 }, { "epoch": 1.229987684729064, "grad_norm": 0.03992651402950287, "learning_rate": 0.01, "loss": 2.0606, "step": 11985 }, { "epoch": 1.230295566502463, "grad_norm": 0.19436125457286835, "learning_rate": 0.01, "loss": 2.0779, "step": 11988 }, { "epoch": 1.230603448275862, "grad_norm": 0.15459048748016357, "learning_rate": 0.01, "loss": 2.0801, "step": 11991 }, { "epoch": 1.2309113300492611, "grad_norm": 0.11131371557712555, "learning_rate": 0.01, "loss": 2.0629, "step": 11994 }, { "epoch": 1.2312192118226601, "grad_norm": 0.06876586377620697, "learning_rate": 0.01, "loss": 2.1081, "step": 11997 }, { "epoch": 1.2315270935960592, "grad_norm": 0.03379599004983902, "learning_rate": 0.01, "loss": 2.0977, "step": 12000 }, { "epoch": 1.2318349753694582, "grad_norm": 0.06905510276556015, "learning_rate": 0.01, "loss": 2.0545, "step": 12003 }, { "epoch": 1.2321428571428572, "grad_norm": 0.05859539657831192, "learning_rate": 0.01, "loss": 2.0751, "step": 12006 }, { "epoch": 1.2324507389162562, "grad_norm": 0.1200842559337616, "learning_rate": 0.01, "loss": 2.1045, "step": 12009 }, { "epoch": 1.2327586206896552, "grad_norm": 0.09969060868024826, "learning_rate": 0.01, "loss": 2.0641, "step": 12012 }, { "epoch": 1.2330665024630543, "grad_norm": 0.08915867656469345, "learning_rate": 0.01, "loss": 2.0585, "step": 12015 }, { "epoch": 1.2333743842364533, "grad_norm": 0.10951671004295349, "learning_rate": 0.01, "loss": 2.1007, "step": 12018 }, { "epoch": 1.2336822660098523, "grad_norm": 0.15262556076049805, "learning_rate": 0.01, "loss": 2.0643, "step": 12021 }, { "epoch": 1.2339901477832513, "grad_norm": 0.05622226372361183, "learning_rate": 0.01, "loss": 2.0796, "step": 12024 }, { "epoch": 1.2342980295566504, "grad_norm": 0.05918841436505318, "learning_rate": 0.01, "loss": 2.0911, "step": 12027 }, { "epoch": 1.2346059113300494, "grad_norm": 0.04867622256278992, "learning_rate": 0.01, "loss": 2.0872, "step": 12030 }, { "epoch": 1.2349137931034484, "grad_norm": 0.04389597848057747, "learning_rate": 0.01, "loss": 2.0882, "step": 12033 }, { "epoch": 1.2352216748768472, "grad_norm": 0.1209108904004097, "learning_rate": 0.01, "loss": 2.0394, "step": 12036 }, { "epoch": 1.2355295566502462, "grad_norm": 0.08446931838989258, "learning_rate": 0.01, "loss": 2.0744, "step": 12039 }, { "epoch": 1.2358374384236452, "grad_norm": 0.07141686230897903, "learning_rate": 0.01, "loss": 2.1099, "step": 12042 }, { "epoch": 1.2361453201970443, "grad_norm": 0.1216801181435585, "learning_rate": 0.01, "loss": 2.0754, "step": 12045 }, { "epoch": 1.2364532019704433, "grad_norm": 0.10539086163043976, "learning_rate": 0.01, "loss": 2.0887, "step": 12048 }, { "epoch": 1.2367610837438423, "grad_norm": 0.09336747229099274, "learning_rate": 0.01, "loss": 2.0637, "step": 12051 }, { "epoch": 1.2370689655172413, "grad_norm": 0.091059610247612, "learning_rate": 0.01, "loss": 2.057, "step": 12054 }, { "epoch": 1.2373768472906403, "grad_norm": 0.08291159570217133, "learning_rate": 0.01, "loss": 2.1039, "step": 12057 }, { "epoch": 1.2376847290640394, "grad_norm": 0.07626821845769882, "learning_rate": 0.01, "loss": 2.0784, "step": 12060 }, { "epoch": 1.2379926108374384, "grad_norm": 0.05197496339678764, "learning_rate": 0.01, "loss": 2.0968, "step": 12063 }, { "epoch": 1.2383004926108374, "grad_norm": 0.061275266110897064, "learning_rate": 0.01, "loss": 2.095, "step": 12066 }, { "epoch": 1.2386083743842364, "grad_norm": 0.04282483085989952, "learning_rate": 0.01, "loss": 2.083, "step": 12069 }, { "epoch": 1.2389162561576355, "grad_norm": 0.037066422402858734, "learning_rate": 0.01, "loss": 2.0653, "step": 12072 }, { "epoch": 1.2392241379310345, "grad_norm": 0.0467105396091938, "learning_rate": 0.01, "loss": 2.0913, "step": 12075 }, { "epoch": 1.2395320197044335, "grad_norm": 0.053995974361896515, "learning_rate": 0.01, "loss": 2.0754, "step": 12078 }, { "epoch": 1.2398399014778325, "grad_norm": 0.08583737164735794, "learning_rate": 0.01, "loss": 2.089, "step": 12081 }, { "epoch": 1.2401477832512315, "grad_norm": 0.07264076173305511, "learning_rate": 0.01, "loss": 2.07, "step": 12084 }, { "epoch": 1.2404556650246306, "grad_norm": 0.062001802027225494, "learning_rate": 0.01, "loss": 2.0969, "step": 12087 }, { "epoch": 1.2407635467980296, "grad_norm": 0.05311381444334984, "learning_rate": 0.01, "loss": 2.0833, "step": 12090 }, { "epoch": 1.2410714285714286, "grad_norm": 0.04272656887769699, "learning_rate": 0.01, "loss": 2.0883, "step": 12093 }, { "epoch": 1.2413793103448276, "grad_norm": 0.10696172714233398, "learning_rate": 0.01, "loss": 2.0925, "step": 12096 }, { "epoch": 1.2416871921182266, "grad_norm": 0.08625493943691254, "learning_rate": 0.01, "loss": 2.0616, "step": 12099 }, { "epoch": 1.2419950738916257, "grad_norm": 0.06818173080682755, "learning_rate": 0.01, "loss": 2.0943, "step": 12102 }, { "epoch": 1.2423029556650247, "grad_norm": 0.050731681287288666, "learning_rate": 0.01, "loss": 2.0802, "step": 12105 }, { "epoch": 1.2426108374384237, "grad_norm": 0.08426883816719055, "learning_rate": 0.01, "loss": 2.0825, "step": 12108 }, { "epoch": 1.2429187192118227, "grad_norm": 0.09432832151651382, "learning_rate": 0.01, "loss": 2.0834, "step": 12111 }, { "epoch": 1.2432266009852218, "grad_norm": 0.06951441615819931, "learning_rate": 0.01, "loss": 2.0951, "step": 12114 }, { "epoch": 1.2435344827586208, "grad_norm": 0.06427393108606339, "learning_rate": 0.01, "loss": 2.0706, "step": 12117 }, { "epoch": 1.2438423645320198, "grad_norm": 0.03967609629034996, "learning_rate": 0.01, "loss": 2.0897, "step": 12120 }, { "epoch": 1.2441502463054186, "grad_norm": 0.036665160208940506, "learning_rate": 0.01, "loss": 2.0791, "step": 12123 }, { "epoch": 1.2444581280788176, "grad_norm": 0.072290800511837, "learning_rate": 0.01, "loss": 2.0889, "step": 12126 }, { "epoch": 1.2447660098522166, "grad_norm": 0.07136868685483932, "learning_rate": 0.01, "loss": 2.0688, "step": 12129 }, { "epoch": 1.2450738916256157, "grad_norm": 0.15400493144989014, "learning_rate": 0.01, "loss": 2.0821, "step": 12132 }, { "epoch": 1.2453817733990147, "grad_norm": 0.07114578038454056, "learning_rate": 0.01, "loss": 2.0719, "step": 12135 }, { "epoch": 1.2456896551724137, "grad_norm": 0.043961767107248306, "learning_rate": 0.01, "loss": 2.0877, "step": 12138 }, { "epoch": 1.2459975369458127, "grad_norm": 0.056267060339450836, "learning_rate": 0.01, "loss": 2.0688, "step": 12141 }, { "epoch": 1.2463054187192117, "grad_norm": 0.035889722406864166, "learning_rate": 0.01, "loss": 2.0783, "step": 12144 }, { "epoch": 1.2466133004926108, "grad_norm": 0.1781640499830246, "learning_rate": 0.01, "loss": 2.0574, "step": 12147 }, { "epoch": 1.2469211822660098, "grad_norm": 0.0891503393650055, "learning_rate": 0.01, "loss": 2.0957, "step": 12150 }, { "epoch": 1.2472290640394088, "grad_norm": 0.047431472688913345, "learning_rate": 0.01, "loss": 2.0696, "step": 12153 }, { "epoch": 1.2475369458128078, "grad_norm": 0.04693286865949631, "learning_rate": 0.01, "loss": 2.0438, "step": 12156 }, { "epoch": 1.2478448275862069, "grad_norm": 0.0382777564227581, "learning_rate": 0.01, "loss": 2.0851, "step": 12159 }, { "epoch": 1.2481527093596059, "grad_norm": 0.04085429012775421, "learning_rate": 0.01, "loss": 2.0846, "step": 12162 }, { "epoch": 1.248460591133005, "grad_norm": 0.05329781025648117, "learning_rate": 0.01, "loss": 2.0732, "step": 12165 }, { "epoch": 1.248768472906404, "grad_norm": 0.06961992383003235, "learning_rate": 0.01, "loss": 2.0372, "step": 12168 }, { "epoch": 1.249076354679803, "grad_norm": 0.05290938913822174, "learning_rate": 0.01, "loss": 2.0692, "step": 12171 }, { "epoch": 1.249384236453202, "grad_norm": 0.1247674971818924, "learning_rate": 0.01, "loss": 2.0679, "step": 12174 }, { "epoch": 1.249692118226601, "grad_norm": 0.04983863607048988, "learning_rate": 0.01, "loss": 2.0611, "step": 12177 }, { "epoch": 1.25, "grad_norm": 0.08552074432373047, "learning_rate": 0.01, "loss": 2.1084, "step": 12180 }, { "epoch": 1.250307881773399, "grad_norm": 0.1376069337129593, "learning_rate": 0.01, "loss": 2.0997, "step": 12183 }, { "epoch": 1.250615763546798, "grad_norm": 0.07097752392292023, "learning_rate": 0.01, "loss": 2.0761, "step": 12186 }, { "epoch": 1.250923645320197, "grad_norm": 0.03953644260764122, "learning_rate": 0.01, "loss": 2.0675, "step": 12189 }, { "epoch": 1.251231527093596, "grad_norm": 0.04611526057124138, "learning_rate": 0.01, "loss": 2.0717, "step": 12192 }, { "epoch": 1.251539408866995, "grad_norm": 0.07573895156383514, "learning_rate": 0.01, "loss": 2.0785, "step": 12195 }, { "epoch": 1.2518472906403941, "grad_norm": 0.07144660502672195, "learning_rate": 0.01, "loss": 2.0815, "step": 12198 }, { "epoch": 1.2521551724137931, "grad_norm": 0.05297645181417465, "learning_rate": 0.01, "loss": 2.093, "step": 12201 }, { "epoch": 1.2524630541871922, "grad_norm": 0.044887710362672806, "learning_rate": 0.01, "loss": 2.0752, "step": 12204 }, { "epoch": 1.2527709359605912, "grad_norm": 0.04305564984679222, "learning_rate": 0.01, "loss": 2.0596, "step": 12207 }, { "epoch": 1.2530788177339902, "grad_norm": 0.057785287499427795, "learning_rate": 0.01, "loss": 2.094, "step": 12210 }, { "epoch": 1.2533866995073892, "grad_norm": 0.04404570534825325, "learning_rate": 0.01, "loss": 2.1037, "step": 12213 }, { "epoch": 1.2536945812807883, "grad_norm": 0.055468104779720306, "learning_rate": 0.01, "loss": 2.0851, "step": 12216 }, { "epoch": 1.2540024630541873, "grad_norm": 0.17121906578540802, "learning_rate": 0.01, "loss": 2.081, "step": 12219 }, { "epoch": 1.2543103448275863, "grad_norm": 0.09411416202783585, "learning_rate": 0.01, "loss": 2.0974, "step": 12222 }, { "epoch": 1.2546182266009853, "grad_norm": 0.07855021953582764, "learning_rate": 0.01, "loss": 2.0733, "step": 12225 }, { "epoch": 1.2549261083743843, "grad_norm": 0.052616432309150696, "learning_rate": 0.01, "loss": 2.0372, "step": 12228 }, { "epoch": 1.2552339901477834, "grad_norm": 0.047992121428251266, "learning_rate": 0.01, "loss": 2.0918, "step": 12231 }, { "epoch": 1.2555418719211824, "grad_norm": 0.04336715489625931, "learning_rate": 0.01, "loss": 2.0511, "step": 12234 }, { "epoch": 1.2558497536945814, "grad_norm": 0.03128316253423691, "learning_rate": 0.01, "loss": 2.0882, "step": 12237 }, { "epoch": 1.2561576354679804, "grad_norm": 0.06315557658672333, "learning_rate": 0.01, "loss": 2.0918, "step": 12240 }, { "epoch": 1.2564655172413794, "grad_norm": 0.0528687946498394, "learning_rate": 0.01, "loss": 2.0556, "step": 12243 }, { "epoch": 1.2567733990147782, "grad_norm": 0.17166706919670105, "learning_rate": 0.01, "loss": 2.068, "step": 12246 }, { "epoch": 1.2570812807881773, "grad_norm": 0.11394128203392029, "learning_rate": 0.01, "loss": 2.1179, "step": 12249 }, { "epoch": 1.2573891625615763, "grad_norm": 0.08554805815219879, "learning_rate": 0.01, "loss": 2.0857, "step": 12252 }, { "epoch": 1.2576970443349753, "grad_norm": 0.05203767865896225, "learning_rate": 0.01, "loss": 2.0975, "step": 12255 }, { "epoch": 1.2580049261083743, "grad_norm": 0.06072428077459335, "learning_rate": 0.01, "loss": 2.0692, "step": 12258 }, { "epoch": 1.2583128078817734, "grad_norm": 0.044136617332696915, "learning_rate": 0.01, "loss": 2.0458, "step": 12261 }, { "epoch": 1.2586206896551724, "grad_norm": 0.038774993270635605, "learning_rate": 0.01, "loss": 2.0835, "step": 12264 }, { "epoch": 1.2589285714285714, "grad_norm": 0.03669529780745506, "learning_rate": 0.01, "loss": 2.0949, "step": 12267 }, { "epoch": 1.2592364532019704, "grad_norm": 0.050722066313028336, "learning_rate": 0.01, "loss": 2.0772, "step": 12270 }, { "epoch": 1.2595443349753694, "grad_norm": 0.12684300541877747, "learning_rate": 0.01, "loss": 2.0814, "step": 12273 }, { "epoch": 1.2598522167487685, "grad_norm": 0.07431039214134216, "learning_rate": 0.01, "loss": 2.1001, "step": 12276 }, { "epoch": 1.2601600985221675, "grad_norm": 0.07050034403800964, "learning_rate": 0.01, "loss": 2.0507, "step": 12279 }, { "epoch": 1.2604679802955665, "grad_norm": 0.0553620308637619, "learning_rate": 0.01, "loss": 2.0757, "step": 12282 }, { "epoch": 1.2607758620689655, "grad_norm": 0.10946903377771378, "learning_rate": 0.01, "loss": 2.0659, "step": 12285 }, { "epoch": 1.2610837438423645, "grad_norm": 0.06346802413463593, "learning_rate": 0.01, "loss": 2.0871, "step": 12288 }, { "epoch": 1.2613916256157636, "grad_norm": 0.09386668354272842, "learning_rate": 0.01, "loss": 2.0702, "step": 12291 }, { "epoch": 1.2616995073891626, "grad_norm": 0.05672946944832802, "learning_rate": 0.01, "loss": 2.0812, "step": 12294 }, { "epoch": 1.2620073891625616, "grad_norm": 0.1079539805650711, "learning_rate": 0.01, "loss": 2.0851, "step": 12297 }, { "epoch": 1.2623152709359606, "grad_norm": 0.043078985065221786, "learning_rate": 0.01, "loss": 2.0846, "step": 12300 }, { "epoch": 1.2626231527093597, "grad_norm": 0.043098706752061844, "learning_rate": 0.01, "loss": 2.0541, "step": 12303 }, { "epoch": 1.2629310344827587, "grad_norm": 0.04908977448940277, "learning_rate": 0.01, "loss": 2.0801, "step": 12306 }, { "epoch": 1.2632389162561577, "grad_norm": 0.09897246211767197, "learning_rate": 0.01, "loss": 2.0899, "step": 12309 }, { "epoch": 1.2635467980295567, "grad_norm": 0.0861278846859932, "learning_rate": 0.01, "loss": 2.0613, "step": 12312 }, { "epoch": 1.2638546798029557, "grad_norm": 0.06973280757665634, "learning_rate": 0.01, "loss": 2.0598, "step": 12315 }, { "epoch": 1.2641625615763548, "grad_norm": 0.048658501356840134, "learning_rate": 0.01, "loss": 2.0838, "step": 12318 }, { "epoch": 1.2644704433497536, "grad_norm": 0.05053295940160751, "learning_rate": 0.01, "loss": 2.0665, "step": 12321 }, { "epoch": 1.2647783251231526, "grad_norm": 0.0536380410194397, "learning_rate": 0.01, "loss": 2.0696, "step": 12324 }, { "epoch": 1.2650862068965516, "grad_norm": 0.13452892005443573, "learning_rate": 0.01, "loss": 2.0928, "step": 12327 }, { "epoch": 1.2653940886699506, "grad_norm": 0.056635159999132156, "learning_rate": 0.01, "loss": 2.0903, "step": 12330 }, { "epoch": 1.2657019704433496, "grad_norm": 0.09460306912660599, "learning_rate": 0.01, "loss": 2.0616, "step": 12333 }, { "epoch": 1.2660098522167487, "grad_norm": 0.09019794315099716, "learning_rate": 0.01, "loss": 2.0808, "step": 12336 }, { "epoch": 1.2663177339901477, "grad_norm": 0.04020017758011818, "learning_rate": 0.01, "loss": 2.0613, "step": 12339 }, { "epoch": 1.2666256157635467, "grad_norm": 0.05063892900943756, "learning_rate": 0.01, "loss": 2.0846, "step": 12342 }, { "epoch": 1.2669334975369457, "grad_norm": 0.06472054123878479, "learning_rate": 0.01, "loss": 2.0884, "step": 12345 }, { "epoch": 1.2672413793103448, "grad_norm": 0.0523315854370594, "learning_rate": 0.01, "loss": 2.0756, "step": 12348 }, { "epoch": 1.2675492610837438, "grad_norm": 0.040240950882434845, "learning_rate": 0.01, "loss": 2.0646, "step": 12351 }, { "epoch": 1.2678571428571428, "grad_norm": 0.061988551169633865, "learning_rate": 0.01, "loss": 2.0899, "step": 12354 }, { "epoch": 1.2681650246305418, "grad_norm": 0.03831657022237778, "learning_rate": 0.01, "loss": 2.0633, "step": 12357 }, { "epoch": 1.2684729064039408, "grad_norm": 0.105617955327034, "learning_rate": 0.01, "loss": 2.0553, "step": 12360 }, { "epoch": 1.2687807881773399, "grad_norm": 0.09372366219758987, "learning_rate": 0.01, "loss": 2.0522, "step": 12363 }, { "epoch": 1.2690886699507389, "grad_norm": 0.10305638611316681, "learning_rate": 0.01, "loss": 2.0543, "step": 12366 }, { "epoch": 1.269396551724138, "grad_norm": 0.07187418639659882, "learning_rate": 0.01, "loss": 2.0775, "step": 12369 }, { "epoch": 1.269704433497537, "grad_norm": 0.03744306415319443, "learning_rate": 0.01, "loss": 2.0771, "step": 12372 }, { "epoch": 1.270012315270936, "grad_norm": 0.03059488907456398, "learning_rate": 0.01, "loss": 2.067, "step": 12375 }, { "epoch": 1.270320197044335, "grad_norm": 0.09767211973667145, "learning_rate": 0.01, "loss": 2.0889, "step": 12378 }, { "epoch": 1.270628078817734, "grad_norm": 0.05093003436923027, "learning_rate": 0.01, "loss": 2.0852, "step": 12381 }, { "epoch": 1.270935960591133, "grad_norm": 0.04648155719041824, "learning_rate": 0.01, "loss": 2.0775, "step": 12384 }, { "epoch": 1.271243842364532, "grad_norm": 0.04745417460799217, "learning_rate": 0.01, "loss": 2.0888, "step": 12387 }, { "epoch": 1.271551724137931, "grad_norm": 0.05171092227101326, "learning_rate": 0.01, "loss": 2.075, "step": 12390 }, { "epoch": 1.27185960591133, "grad_norm": 0.05051850154995918, "learning_rate": 0.01, "loss": 2.0754, "step": 12393 }, { "epoch": 1.272167487684729, "grad_norm": 0.06048591807484627, "learning_rate": 0.01, "loss": 2.1025, "step": 12396 }, { "epoch": 1.2724753694581281, "grad_norm": 0.09673628211021423, "learning_rate": 0.01, "loss": 2.0866, "step": 12399 }, { "epoch": 1.2727832512315271, "grad_norm": 0.11826295405626297, "learning_rate": 0.01, "loss": 2.0757, "step": 12402 }, { "epoch": 1.2730911330049262, "grad_norm": 0.06271769106388092, "learning_rate": 0.01, "loss": 2.0797, "step": 12405 }, { "epoch": 1.2733990147783252, "grad_norm": 0.0463738851249218, "learning_rate": 0.01, "loss": 2.054, "step": 12408 }, { "epoch": 1.2737068965517242, "grad_norm": 0.035649579018354416, "learning_rate": 0.01, "loss": 2.0747, "step": 12411 }, { "epoch": 1.2740147783251232, "grad_norm": 0.08586122840642929, "learning_rate": 0.01, "loss": 2.0692, "step": 12414 }, { "epoch": 1.2743226600985222, "grad_norm": 0.08538860082626343, "learning_rate": 0.01, "loss": 2.0617, "step": 12417 }, { "epoch": 1.2746305418719213, "grad_norm": 0.05974709242582321, "learning_rate": 0.01, "loss": 2.0527, "step": 12420 }, { "epoch": 1.2749384236453203, "grad_norm": 0.057023100554943085, "learning_rate": 0.01, "loss": 2.0751, "step": 12423 }, { "epoch": 1.2752463054187193, "grad_norm": 0.062105972319841385, "learning_rate": 0.01, "loss": 2.0946, "step": 12426 }, { "epoch": 1.2755541871921183, "grad_norm": 0.048221688717603683, "learning_rate": 0.01, "loss": 2.0662, "step": 12429 }, { "epoch": 1.2758620689655173, "grad_norm": 0.04440610110759735, "learning_rate": 0.01, "loss": 2.0852, "step": 12432 }, { "epoch": 1.2761699507389164, "grad_norm": 0.06074054539203644, "learning_rate": 0.01, "loss": 2.0667, "step": 12435 }, { "epoch": 1.2764778325123154, "grad_norm": 0.09477720409631729, "learning_rate": 0.01, "loss": 2.0815, "step": 12438 }, { "epoch": 1.2767857142857144, "grad_norm": 0.08470610529184341, "learning_rate": 0.01, "loss": 2.1023, "step": 12441 }, { "epoch": 1.2770935960591134, "grad_norm": 0.09079881012439728, "learning_rate": 0.01, "loss": 2.0761, "step": 12444 }, { "epoch": 1.2774014778325122, "grad_norm": 0.06132930517196655, "learning_rate": 0.01, "loss": 2.1027, "step": 12447 }, { "epoch": 1.2777093596059113, "grad_norm": 0.04702606052160263, "learning_rate": 0.01, "loss": 2.1051, "step": 12450 }, { "epoch": 1.2780172413793103, "grad_norm": 0.06621818989515305, "learning_rate": 0.01, "loss": 2.0835, "step": 12453 }, { "epoch": 1.2783251231527093, "grad_norm": 0.047375794500112534, "learning_rate": 0.01, "loss": 2.0982, "step": 12456 }, { "epoch": 1.2786330049261083, "grad_norm": 0.03766035661101341, "learning_rate": 0.01, "loss": 2.0468, "step": 12459 }, { "epoch": 1.2789408866995073, "grad_norm": 0.10769324004650116, "learning_rate": 0.01, "loss": 2.0848, "step": 12462 }, { "epoch": 1.2792487684729064, "grad_norm": 0.06848851591348648, "learning_rate": 0.01, "loss": 2.0664, "step": 12465 }, { "epoch": 1.2795566502463054, "grad_norm": 0.09864836931228638, "learning_rate": 0.01, "loss": 2.0719, "step": 12468 }, { "epoch": 1.2798645320197044, "grad_norm": 0.04042387008666992, "learning_rate": 0.01, "loss": 2.0672, "step": 12471 }, { "epoch": 1.2801724137931034, "grad_norm": 0.09212526679039001, "learning_rate": 0.01, "loss": 2.0789, "step": 12474 }, { "epoch": 1.2804802955665024, "grad_norm": 0.08713985234498978, "learning_rate": 0.01, "loss": 2.0765, "step": 12477 }, { "epoch": 1.2807881773399015, "grad_norm": 0.04133505001664162, "learning_rate": 0.01, "loss": 2.0809, "step": 12480 }, { "epoch": 1.2810960591133005, "grad_norm": 0.07466418296098709, "learning_rate": 0.01, "loss": 2.0499, "step": 12483 }, { "epoch": 1.2814039408866995, "grad_norm": 0.08685484528541565, "learning_rate": 0.01, "loss": 2.0689, "step": 12486 }, { "epoch": 1.2817118226600985, "grad_norm": 0.13300663232803345, "learning_rate": 0.01, "loss": 2.0761, "step": 12489 }, { "epoch": 1.2820197044334976, "grad_norm": 0.1024186760187149, "learning_rate": 0.01, "loss": 2.0763, "step": 12492 }, { "epoch": 1.2823275862068966, "grad_norm": 0.05908042937517166, "learning_rate": 0.01, "loss": 2.0485, "step": 12495 }, { "epoch": 1.2826354679802956, "grad_norm": 0.03715427219867706, "learning_rate": 0.01, "loss": 2.0925, "step": 12498 }, { "epoch": 1.2829433497536946, "grad_norm": 0.05033004283905029, "learning_rate": 0.01, "loss": 2.0875, "step": 12501 }, { "epoch": 1.2832512315270936, "grad_norm": 0.04851456359028816, "learning_rate": 0.01, "loss": 2.0547, "step": 12504 }, { "epoch": 1.2835591133004927, "grad_norm": 0.08153282105922699, "learning_rate": 0.01, "loss": 2.0828, "step": 12507 }, { "epoch": 1.2838669950738917, "grad_norm": 0.07549238950014114, "learning_rate": 0.01, "loss": 2.0681, "step": 12510 }, { "epoch": 1.2841748768472907, "grad_norm": 0.08571973443031311, "learning_rate": 0.01, "loss": 2.0556, "step": 12513 }, { "epoch": 1.2844827586206897, "grad_norm": 0.1036754697561264, "learning_rate": 0.01, "loss": 2.0712, "step": 12516 }, { "epoch": 1.2847906403940887, "grad_norm": 0.04329349100589752, "learning_rate": 0.01, "loss": 2.0711, "step": 12519 }, { "epoch": 1.2850985221674878, "grad_norm": 0.03718428686261177, "learning_rate": 0.01, "loss": 2.0812, "step": 12522 }, { "epoch": 1.2854064039408866, "grad_norm": 0.07977878302335739, "learning_rate": 0.01, "loss": 2.0562, "step": 12525 }, { "epoch": 1.2857142857142856, "grad_norm": 0.11540202796459198, "learning_rate": 0.01, "loss": 2.0666, "step": 12528 }, { "epoch": 1.2860221674876846, "grad_norm": 0.07631656527519226, "learning_rate": 0.01, "loss": 2.0694, "step": 12531 }, { "epoch": 1.2863300492610836, "grad_norm": 0.051738426089286804, "learning_rate": 0.01, "loss": 2.0862, "step": 12534 }, { "epoch": 1.2866379310344827, "grad_norm": 0.05070396512746811, "learning_rate": 0.01, "loss": 2.0538, "step": 12537 }, { "epoch": 1.2869458128078817, "grad_norm": 0.039027947932481766, "learning_rate": 0.01, "loss": 2.0487, "step": 12540 }, { "epoch": 1.2872536945812807, "grad_norm": 0.052646003663539886, "learning_rate": 0.01, "loss": 2.0745, "step": 12543 }, { "epoch": 1.2875615763546797, "grad_norm": 0.06035429984331131, "learning_rate": 0.01, "loss": 2.0775, "step": 12546 }, { "epoch": 1.2878694581280787, "grad_norm": 0.0818098783493042, "learning_rate": 0.01, "loss": 2.0685, "step": 12549 }, { "epoch": 1.2881773399014778, "grad_norm": 0.030897030606865883, "learning_rate": 0.01, "loss": 2.0701, "step": 12552 }, { "epoch": 1.2884852216748768, "grad_norm": 0.042795561254024506, "learning_rate": 0.01, "loss": 2.0673, "step": 12555 }, { "epoch": 1.2887931034482758, "grad_norm": 0.11560031026601791, "learning_rate": 0.01, "loss": 2.0637, "step": 12558 }, { "epoch": 1.2891009852216748, "grad_norm": 0.03919963166117668, "learning_rate": 0.01, "loss": 2.0547, "step": 12561 }, { "epoch": 1.2894088669950738, "grad_norm": 0.03816407918930054, "learning_rate": 0.01, "loss": 2.0576, "step": 12564 }, { "epoch": 1.2897167487684729, "grad_norm": 0.04641805216670036, "learning_rate": 0.01, "loss": 2.0779, "step": 12567 }, { "epoch": 1.2900246305418719, "grad_norm": 0.16832102835178375, "learning_rate": 0.01, "loss": 2.0658, "step": 12570 }, { "epoch": 1.290332512315271, "grad_norm": 0.06651032716035843, "learning_rate": 0.01, "loss": 2.0602, "step": 12573 }, { "epoch": 1.29064039408867, "grad_norm": 0.05333925411105156, "learning_rate": 0.01, "loss": 2.0842, "step": 12576 }, { "epoch": 1.290948275862069, "grad_norm": 0.03173685073852539, "learning_rate": 0.01, "loss": 2.0834, "step": 12579 }, { "epoch": 1.291256157635468, "grad_norm": 0.0836583599448204, "learning_rate": 0.01, "loss": 2.0605, "step": 12582 }, { "epoch": 1.291564039408867, "grad_norm": 0.09913724660873413, "learning_rate": 0.01, "loss": 2.0577, "step": 12585 }, { "epoch": 1.291871921182266, "grad_norm": 0.04268624261021614, "learning_rate": 0.01, "loss": 2.1027, "step": 12588 }, { "epoch": 1.292179802955665, "grad_norm": 0.03744608163833618, "learning_rate": 0.01, "loss": 2.0506, "step": 12591 }, { "epoch": 1.292487684729064, "grad_norm": 0.11833969503641129, "learning_rate": 0.01, "loss": 2.0606, "step": 12594 }, { "epoch": 1.292795566502463, "grad_norm": 0.11814229190349579, "learning_rate": 0.01, "loss": 2.0641, "step": 12597 }, { "epoch": 1.293103448275862, "grad_norm": 0.04583375155925751, "learning_rate": 0.01, "loss": 2.0898, "step": 12600 }, { "epoch": 1.2934113300492611, "grad_norm": 0.11559072881937027, "learning_rate": 0.01, "loss": 2.0657, "step": 12603 }, { "epoch": 1.2937192118226601, "grad_norm": 0.050608474761247635, "learning_rate": 0.01, "loss": 2.0879, "step": 12606 }, { "epoch": 1.2940270935960592, "grad_norm": 0.04284593090415001, "learning_rate": 0.01, "loss": 2.0545, "step": 12609 }, { "epoch": 1.2943349753694582, "grad_norm": 0.042168114334344864, "learning_rate": 0.01, "loss": 2.0645, "step": 12612 }, { "epoch": 1.2946428571428572, "grad_norm": 0.06251826882362366, "learning_rate": 0.01, "loss": 2.0621, "step": 12615 }, { "epoch": 1.2949507389162562, "grad_norm": 0.11159554868936539, "learning_rate": 0.01, "loss": 2.0718, "step": 12618 }, { "epoch": 1.2952586206896552, "grad_norm": 0.0715017095208168, "learning_rate": 0.01, "loss": 2.0844, "step": 12621 }, { "epoch": 1.2955665024630543, "grad_norm": 0.12078917771577835, "learning_rate": 0.01, "loss": 2.0604, "step": 12624 }, { "epoch": 1.2958743842364533, "grad_norm": 0.11613093316555023, "learning_rate": 0.01, "loss": 2.0753, "step": 12627 }, { "epoch": 1.2961822660098523, "grad_norm": 0.053543299436569214, "learning_rate": 0.01, "loss": 2.0736, "step": 12630 }, { "epoch": 1.2964901477832513, "grad_norm": 0.05464399605989456, "learning_rate": 0.01, "loss": 2.0845, "step": 12633 }, { "epoch": 1.2967980295566504, "grad_norm": 0.0487944521009922, "learning_rate": 0.01, "loss": 2.0859, "step": 12636 }, { "epoch": 1.2971059113300494, "grad_norm": 0.05266605690121651, "learning_rate": 0.01, "loss": 2.0532, "step": 12639 }, { "epoch": 1.2974137931034484, "grad_norm": 0.07863074541091919, "learning_rate": 0.01, "loss": 2.0496, "step": 12642 }, { "epoch": 1.2977216748768474, "grad_norm": 0.05371072143316269, "learning_rate": 0.01, "loss": 2.1016, "step": 12645 }, { "epoch": 1.2980295566502464, "grad_norm": 0.05592924728989601, "learning_rate": 0.01, "loss": 2.0832, "step": 12648 }, { "epoch": 1.2983374384236452, "grad_norm": 0.0675397738814354, "learning_rate": 0.01, "loss": 2.0784, "step": 12651 }, { "epoch": 1.2986453201970443, "grad_norm": 0.04399113729596138, "learning_rate": 0.01, "loss": 2.0567, "step": 12654 }, { "epoch": 1.2989532019704433, "grad_norm": 0.04609301686286926, "learning_rate": 0.01, "loss": 2.0683, "step": 12657 }, { "epoch": 1.2992610837438423, "grad_norm": 0.05637912079691887, "learning_rate": 0.01, "loss": 2.0655, "step": 12660 }, { "epoch": 1.2995689655172413, "grad_norm": 0.1115126758813858, "learning_rate": 0.01, "loss": 2.0643, "step": 12663 }, { "epoch": 1.2998768472906403, "grad_norm": 0.07762522250413895, "learning_rate": 0.01, "loss": 2.0853, "step": 12666 }, { "epoch": 1.3001847290640394, "grad_norm": 0.06717406958341599, "learning_rate": 0.01, "loss": 2.0881, "step": 12669 }, { "epoch": 1.3004926108374384, "grad_norm": 0.0727803036570549, "learning_rate": 0.01, "loss": 2.0606, "step": 12672 }, { "epoch": 1.3008004926108374, "grad_norm": 0.06588024646043777, "learning_rate": 0.01, "loss": 2.0508, "step": 12675 }, { "epoch": 1.3011083743842364, "grad_norm": 0.09718842804431915, "learning_rate": 0.01, "loss": 2.0793, "step": 12678 }, { "epoch": 1.3014162561576355, "grad_norm": 0.05804411321878433, "learning_rate": 0.01, "loss": 2.0825, "step": 12681 }, { "epoch": 1.3017241379310345, "grad_norm": 0.07549803704023361, "learning_rate": 0.01, "loss": 2.0546, "step": 12684 }, { "epoch": 1.3020320197044335, "grad_norm": 0.04496621713042259, "learning_rate": 0.01, "loss": 2.0483, "step": 12687 }, { "epoch": 1.3023399014778325, "grad_norm": 0.1283668428659439, "learning_rate": 0.01, "loss": 2.0639, "step": 12690 }, { "epoch": 1.3026477832512315, "grad_norm": 0.1276516169309616, "learning_rate": 0.01, "loss": 2.062, "step": 12693 }, { "epoch": 1.3029556650246306, "grad_norm": 0.12865981459617615, "learning_rate": 0.01, "loss": 2.0697, "step": 12696 }, { "epoch": 1.3032635467980296, "grad_norm": 0.05869213864207268, "learning_rate": 0.01, "loss": 2.0955, "step": 12699 }, { "epoch": 1.3035714285714286, "grad_norm": 0.042082637548446655, "learning_rate": 0.01, "loss": 2.0847, "step": 12702 }, { "epoch": 1.3038793103448276, "grad_norm": 0.11474558711051941, "learning_rate": 0.01, "loss": 2.0767, "step": 12705 }, { "epoch": 1.3041871921182266, "grad_norm": 0.12426330894231796, "learning_rate": 0.01, "loss": 2.0787, "step": 12708 }, { "epoch": 1.3044950738916257, "grad_norm": 0.06731969118118286, "learning_rate": 0.01, "loss": 2.064, "step": 12711 }, { "epoch": 1.3048029556650247, "grad_norm": 0.049505047500133514, "learning_rate": 0.01, "loss": 2.0848, "step": 12714 }, { "epoch": 1.3051108374384237, "grad_norm": 0.04460617154836655, "learning_rate": 0.01, "loss": 2.0958, "step": 12717 }, { "epoch": 1.3054187192118227, "grad_norm": 0.05461740121245384, "learning_rate": 0.01, "loss": 2.0724, "step": 12720 }, { "epoch": 1.3057266009852218, "grad_norm": 0.04074997082352638, "learning_rate": 0.01, "loss": 2.0453, "step": 12723 }, { "epoch": 1.3060344827586206, "grad_norm": 0.04925557225942612, "learning_rate": 0.01, "loss": 2.0707, "step": 12726 }, { "epoch": 1.3063423645320196, "grad_norm": 0.06570678949356079, "learning_rate": 0.01, "loss": 2.0703, "step": 12729 }, { "epoch": 1.3066502463054186, "grad_norm": 0.076143778860569, "learning_rate": 0.01, "loss": 2.0639, "step": 12732 }, { "epoch": 1.3069581280788176, "grad_norm": 0.06691130995750427, "learning_rate": 0.01, "loss": 2.0858, "step": 12735 }, { "epoch": 1.3072660098522166, "grad_norm": 0.06733332574367523, "learning_rate": 0.01, "loss": 2.0622, "step": 12738 }, { "epoch": 1.3075738916256157, "grad_norm": 0.08008868247270584, "learning_rate": 0.01, "loss": 2.085, "step": 12741 }, { "epoch": 1.3078817733990147, "grad_norm": 0.10713468492031097, "learning_rate": 0.01, "loss": 2.0642, "step": 12744 }, { "epoch": 1.3081896551724137, "grad_norm": 0.101436547935009, "learning_rate": 0.01, "loss": 2.0835, "step": 12747 }, { "epoch": 1.3084975369458127, "grad_norm": 0.0552450455725193, "learning_rate": 0.01, "loss": 2.0553, "step": 12750 }, { "epoch": 1.3088054187192117, "grad_norm": 0.08755996823310852, "learning_rate": 0.01, "loss": 2.0564, "step": 12753 }, { "epoch": 1.3091133004926108, "grad_norm": 0.03980748727917671, "learning_rate": 0.01, "loss": 2.084, "step": 12756 }, { "epoch": 1.3094211822660098, "grad_norm": 0.0971774309873581, "learning_rate": 0.01, "loss": 2.0858, "step": 12759 }, { "epoch": 1.3097290640394088, "grad_norm": 0.05845404043793678, "learning_rate": 0.01, "loss": 2.0792, "step": 12762 }, { "epoch": 1.3100369458128078, "grad_norm": 0.08498022705316544, "learning_rate": 0.01, "loss": 2.0932, "step": 12765 }, { "epoch": 1.3103448275862069, "grad_norm": 0.05135398730635643, "learning_rate": 0.01, "loss": 2.0796, "step": 12768 }, { "epoch": 1.3106527093596059, "grad_norm": 0.048872511833906174, "learning_rate": 0.01, "loss": 2.0675, "step": 12771 }, { "epoch": 1.310960591133005, "grad_norm": 0.0529983788728714, "learning_rate": 0.01, "loss": 2.0573, "step": 12774 }, { "epoch": 1.311268472906404, "grad_norm": 0.03580658137798309, "learning_rate": 0.01, "loss": 2.0741, "step": 12777 }, { "epoch": 1.311576354679803, "grad_norm": 0.03904234617948532, "learning_rate": 0.01, "loss": 2.0691, "step": 12780 }, { "epoch": 1.311884236453202, "grad_norm": 0.04073551669716835, "learning_rate": 0.01, "loss": 2.0694, "step": 12783 }, { "epoch": 1.312192118226601, "grad_norm": 0.058973487466573715, "learning_rate": 0.01, "loss": 2.0381, "step": 12786 }, { "epoch": 1.3125, "grad_norm": 0.05673586577177048, "learning_rate": 0.01, "loss": 2.0691, "step": 12789 }, { "epoch": 1.312807881773399, "grad_norm": 0.13680770993232727, "learning_rate": 0.01, "loss": 2.0673, "step": 12792 }, { "epoch": 1.313115763546798, "grad_norm": 0.0454241968691349, "learning_rate": 0.01, "loss": 2.0766, "step": 12795 }, { "epoch": 1.313423645320197, "grad_norm": 0.04074293375015259, "learning_rate": 0.01, "loss": 2.0593, "step": 12798 }, { "epoch": 1.313731527093596, "grad_norm": 0.04174893721938133, "learning_rate": 0.01, "loss": 2.0537, "step": 12801 }, { "epoch": 1.314039408866995, "grad_norm": 0.06716062128543854, "learning_rate": 0.01, "loss": 2.0642, "step": 12804 }, { "epoch": 1.3143472906403941, "grad_norm": 0.09866861253976822, "learning_rate": 0.01, "loss": 2.0889, "step": 12807 }, { "epoch": 1.3146551724137931, "grad_norm": 0.13097235560417175, "learning_rate": 0.01, "loss": 2.0744, "step": 12810 }, { "epoch": 1.3149630541871922, "grad_norm": 0.07859724014997482, "learning_rate": 0.01, "loss": 2.0899, "step": 12813 }, { "epoch": 1.3152709359605912, "grad_norm": 0.06141912192106247, "learning_rate": 0.01, "loss": 2.0666, "step": 12816 }, { "epoch": 1.3155788177339902, "grad_norm": 0.05112985149025917, "learning_rate": 0.01, "loss": 2.0658, "step": 12819 }, { "epoch": 1.3158866995073892, "grad_norm": 0.03482404351234436, "learning_rate": 0.01, "loss": 2.0802, "step": 12822 }, { "epoch": 1.3161945812807883, "grad_norm": 0.05562854930758476, "learning_rate": 0.01, "loss": 2.0659, "step": 12825 }, { "epoch": 1.3165024630541873, "grad_norm": 0.04841645434498787, "learning_rate": 0.01, "loss": 2.058, "step": 12828 }, { "epoch": 1.3168103448275863, "grad_norm": 0.0756571963429451, "learning_rate": 0.01, "loss": 2.0283, "step": 12831 }, { "epoch": 1.3171182266009853, "grad_norm": 0.09575197845697403, "learning_rate": 0.01, "loss": 2.0709, "step": 12834 }, { "epoch": 1.3174261083743843, "grad_norm": 0.07003197073936462, "learning_rate": 0.01, "loss": 2.0409, "step": 12837 }, { "epoch": 1.3177339901477834, "grad_norm": 0.12592460215091705, "learning_rate": 0.01, "loss": 2.051, "step": 12840 }, { "epoch": 1.3180418719211824, "grad_norm": 0.07086621969938278, "learning_rate": 0.01, "loss": 2.0856, "step": 12843 }, { "epoch": 1.3183497536945814, "grad_norm": 0.07367062568664551, "learning_rate": 0.01, "loss": 2.0759, "step": 12846 }, { "epoch": 1.3186576354679804, "grad_norm": 0.06731852889060974, "learning_rate": 0.01, "loss": 2.077, "step": 12849 }, { "epoch": 1.3189655172413794, "grad_norm": 0.07499510049819946, "learning_rate": 0.01, "loss": 2.072, "step": 12852 }, { "epoch": 1.3192733990147782, "grad_norm": 0.07604499906301498, "learning_rate": 0.01, "loss": 2.0717, "step": 12855 }, { "epoch": 1.3195812807881773, "grad_norm": 0.0770401731133461, "learning_rate": 0.01, "loss": 2.0894, "step": 12858 }, { "epoch": 1.3198891625615763, "grad_norm": 0.06753168255090714, "learning_rate": 0.01, "loss": 2.0546, "step": 12861 }, { "epoch": 1.3201970443349753, "grad_norm": 0.14175836741924286, "learning_rate": 0.01, "loss": 2.0811, "step": 12864 }, { "epoch": 1.3205049261083743, "grad_norm": 0.04258207604289055, "learning_rate": 0.01, "loss": 2.0831, "step": 12867 }, { "epoch": 1.3208128078817734, "grad_norm": 0.03372815623879433, "learning_rate": 0.01, "loss": 2.0827, "step": 12870 }, { "epoch": 1.3211206896551724, "grad_norm": 0.040649689733982086, "learning_rate": 0.01, "loss": 2.067, "step": 12873 }, { "epoch": 1.3214285714285714, "grad_norm": 0.060684412717819214, "learning_rate": 0.01, "loss": 2.0701, "step": 12876 }, { "epoch": 1.3217364532019704, "grad_norm": 0.036452196538448334, "learning_rate": 0.01, "loss": 2.0726, "step": 12879 }, { "epoch": 1.3220443349753694, "grad_norm": 0.09457682818174362, "learning_rate": 0.01, "loss": 2.0709, "step": 12882 }, { "epoch": 1.3223522167487685, "grad_norm": 0.049776870757341385, "learning_rate": 0.01, "loss": 2.0354, "step": 12885 }, { "epoch": 1.3226600985221675, "grad_norm": 0.05794042348861694, "learning_rate": 0.01, "loss": 2.0781, "step": 12888 }, { "epoch": 1.3229679802955665, "grad_norm": 0.052100833505392075, "learning_rate": 0.01, "loss": 2.0801, "step": 12891 }, { "epoch": 1.3232758620689655, "grad_norm": 0.06558441370725632, "learning_rate": 0.01, "loss": 2.076, "step": 12894 }, { "epoch": 1.3235837438423645, "grad_norm": 0.1107778549194336, "learning_rate": 0.01, "loss": 2.0859, "step": 12897 }, { "epoch": 1.3238916256157636, "grad_norm": 0.052982959896326065, "learning_rate": 0.01, "loss": 2.0749, "step": 12900 }, { "epoch": 1.3241995073891626, "grad_norm": 0.0976911410689354, "learning_rate": 0.01, "loss": 2.067, "step": 12903 }, { "epoch": 1.3245073891625616, "grad_norm": 0.05620484799146652, "learning_rate": 0.01, "loss": 2.0892, "step": 12906 }, { "epoch": 1.3248152709359606, "grad_norm": 0.12753431499004364, "learning_rate": 0.01, "loss": 2.073, "step": 12909 }, { "epoch": 1.3251231527093597, "grad_norm": 0.14311249554157257, "learning_rate": 0.01, "loss": 2.0641, "step": 12912 }, { "epoch": 1.3254310344827587, "grad_norm": 0.049242276698350906, "learning_rate": 0.01, "loss": 2.0701, "step": 12915 }, { "epoch": 1.3257389162561577, "grad_norm": 0.07875422388315201, "learning_rate": 0.01, "loss": 2.0889, "step": 12918 }, { "epoch": 1.3260467980295567, "grad_norm": 0.036451369524002075, "learning_rate": 0.01, "loss": 2.0634, "step": 12921 }, { "epoch": 1.3263546798029557, "grad_norm": 0.03659353405237198, "learning_rate": 0.01, "loss": 2.0725, "step": 12924 }, { "epoch": 1.3266625615763548, "grad_norm": 0.049371130764484406, "learning_rate": 0.01, "loss": 2.0531, "step": 12927 }, { "epoch": 1.3269704433497536, "grad_norm": 0.05998126044869423, "learning_rate": 0.01, "loss": 2.0894, "step": 12930 }, { "epoch": 1.3272783251231526, "grad_norm": 0.08878383040428162, "learning_rate": 0.01, "loss": 2.0627, "step": 12933 }, { "epoch": 1.3275862068965516, "grad_norm": 0.059183619916439056, "learning_rate": 0.01, "loss": 2.0829, "step": 12936 }, { "epoch": 1.3278940886699506, "grad_norm": 0.05783310905098915, "learning_rate": 0.01, "loss": 2.0972, "step": 12939 }, { "epoch": 1.3282019704433496, "grad_norm": 0.07206647843122482, "learning_rate": 0.01, "loss": 2.0667, "step": 12942 }, { "epoch": 1.3285098522167487, "grad_norm": 0.07303550839424133, "learning_rate": 0.01, "loss": 2.0745, "step": 12945 }, { "epoch": 1.3288177339901477, "grad_norm": 0.04319525510072708, "learning_rate": 0.01, "loss": 2.0673, "step": 12948 }, { "epoch": 1.3291256157635467, "grad_norm": 0.044222913682460785, "learning_rate": 0.01, "loss": 2.0856, "step": 12951 }, { "epoch": 1.3294334975369457, "grad_norm": 0.08791283518075943, "learning_rate": 0.01, "loss": 2.0841, "step": 12954 }, { "epoch": 1.3297413793103448, "grad_norm": 0.05172525718808174, "learning_rate": 0.01, "loss": 2.0828, "step": 12957 }, { "epoch": 1.3300492610837438, "grad_norm": 0.053524646908044815, "learning_rate": 0.01, "loss": 2.0504, "step": 12960 }, { "epoch": 1.3303571428571428, "grad_norm": 0.03858666867017746, "learning_rate": 0.01, "loss": 2.1045, "step": 12963 }, { "epoch": 1.3306650246305418, "grad_norm": 0.037037089467048645, "learning_rate": 0.01, "loss": 2.0742, "step": 12966 }, { "epoch": 1.3309729064039408, "grad_norm": 0.04609520733356476, "learning_rate": 0.01, "loss": 2.0474, "step": 12969 }, { "epoch": 1.3312807881773399, "grad_norm": 0.03773853927850723, "learning_rate": 0.01, "loss": 2.0573, "step": 12972 }, { "epoch": 1.3315886699507389, "grad_norm": 0.040222879499197006, "learning_rate": 0.01, "loss": 2.073, "step": 12975 }, { "epoch": 1.331896551724138, "grad_norm": 0.11522398144006729, "learning_rate": 0.01, "loss": 2.0523, "step": 12978 }, { "epoch": 1.332204433497537, "grad_norm": 0.0496886670589447, "learning_rate": 0.01, "loss": 2.0873, "step": 12981 }, { "epoch": 1.332512315270936, "grad_norm": 0.0955866202712059, "learning_rate": 0.01, "loss": 2.0735, "step": 12984 }, { "epoch": 1.332820197044335, "grad_norm": 0.06260306388139725, "learning_rate": 0.01, "loss": 2.0696, "step": 12987 }, { "epoch": 1.333128078817734, "grad_norm": 0.049784719944000244, "learning_rate": 0.01, "loss": 2.0807, "step": 12990 }, { "epoch": 1.333435960591133, "grad_norm": 0.04571852833032608, "learning_rate": 0.01, "loss": 2.0648, "step": 12993 }, { "epoch": 1.333743842364532, "grad_norm": 0.11032246053218842, "learning_rate": 0.01, "loss": 2.0777, "step": 12996 }, { "epoch": 1.334051724137931, "grad_norm": 0.04030182585120201, "learning_rate": 0.01, "loss": 2.0491, "step": 12999 }, { "epoch": 1.33435960591133, "grad_norm": 0.06681946665048599, "learning_rate": 0.01, "loss": 2.051, "step": 13002 }, { "epoch": 1.334667487684729, "grad_norm": 0.04532696306705475, "learning_rate": 0.01, "loss": 2.0322, "step": 13005 }, { "epoch": 1.3349753694581281, "grad_norm": 0.03890594094991684, "learning_rate": 0.01, "loss": 2.0792, "step": 13008 }, { "epoch": 1.3352832512315271, "grad_norm": 0.08290864527225494, "learning_rate": 0.01, "loss": 2.0903, "step": 13011 }, { "epoch": 1.3355911330049262, "grad_norm": 0.09402919560670853, "learning_rate": 0.01, "loss": 2.0678, "step": 13014 }, { "epoch": 1.3358990147783252, "grad_norm": 0.06945643573999405, "learning_rate": 0.01, "loss": 2.0823, "step": 13017 }, { "epoch": 1.3362068965517242, "grad_norm": 0.0540471225976944, "learning_rate": 0.01, "loss": 2.0705, "step": 13020 }, { "epoch": 1.3365147783251232, "grad_norm": 0.04104168713092804, "learning_rate": 0.01, "loss": 2.0808, "step": 13023 }, { "epoch": 1.3368226600985222, "grad_norm": 0.04578167945146561, "learning_rate": 0.01, "loss": 2.0821, "step": 13026 }, { "epoch": 1.3371305418719213, "grad_norm": 0.07289981096982956, "learning_rate": 0.01, "loss": 2.0686, "step": 13029 }, { "epoch": 1.3374384236453203, "grad_norm": 0.09114310890436172, "learning_rate": 0.01, "loss": 2.0435, "step": 13032 }, { "epoch": 1.3377463054187193, "grad_norm": 0.06305088102817535, "learning_rate": 0.01, "loss": 2.0593, "step": 13035 }, { "epoch": 1.3380541871921183, "grad_norm": 0.06495746225118637, "learning_rate": 0.01, "loss": 2.0728, "step": 13038 }, { "epoch": 1.3383620689655173, "grad_norm": 0.05586539953947067, "learning_rate": 0.01, "loss": 2.0747, "step": 13041 }, { "epoch": 1.3386699507389164, "grad_norm": 0.04566524177789688, "learning_rate": 0.01, "loss": 2.0576, "step": 13044 }, { "epoch": 1.3389778325123154, "grad_norm": 0.060839373618364334, "learning_rate": 0.01, "loss": 2.0717, "step": 13047 }, { "epoch": 1.3392857142857144, "grad_norm": 0.11224903166294098, "learning_rate": 0.01, "loss": 2.0878, "step": 13050 }, { "epoch": 1.3395935960591134, "grad_norm": 0.09223728626966476, "learning_rate": 0.01, "loss": 2.0531, "step": 13053 }, { "epoch": 1.3399014778325122, "grad_norm": 0.0413731187582016, "learning_rate": 0.01, "loss": 2.0495, "step": 13056 }, { "epoch": 1.3402093596059113, "grad_norm": 0.050937358289957047, "learning_rate": 0.01, "loss": 2.0774, "step": 13059 }, { "epoch": 1.3405172413793103, "grad_norm": 0.0407971516251564, "learning_rate": 0.01, "loss": 2.0766, "step": 13062 }, { "epoch": 1.3408251231527093, "grad_norm": 0.0623883455991745, "learning_rate": 0.01, "loss": 2.0691, "step": 13065 }, { "epoch": 1.3411330049261083, "grad_norm": 0.09325427561998367, "learning_rate": 0.01, "loss": 2.0731, "step": 13068 }, { "epoch": 1.3414408866995073, "grad_norm": 0.06965765357017517, "learning_rate": 0.01, "loss": 2.0653, "step": 13071 }, { "epoch": 1.3417487684729064, "grad_norm": 0.12671297788619995, "learning_rate": 0.01, "loss": 2.1028, "step": 13074 }, { "epoch": 1.3420566502463054, "grad_norm": 0.04154878482222557, "learning_rate": 0.01, "loss": 2.0783, "step": 13077 }, { "epoch": 1.3423645320197044, "grad_norm": 0.04698561131954193, "learning_rate": 0.01, "loss": 2.0799, "step": 13080 }, { "epoch": 1.3426724137931034, "grad_norm": 0.031127501279115677, "learning_rate": 0.01, "loss": 2.0756, "step": 13083 }, { "epoch": 1.3429802955665024, "grad_norm": 0.05258537083864212, "learning_rate": 0.01, "loss": 2.0821, "step": 13086 }, { "epoch": 1.3432881773399015, "grad_norm": 0.06848637759685516, "learning_rate": 0.01, "loss": 2.0629, "step": 13089 }, { "epoch": 1.3435960591133005, "grad_norm": 0.07738485932350159, "learning_rate": 0.01, "loss": 2.0674, "step": 13092 }, { "epoch": 1.3439039408866995, "grad_norm": 0.09635680168867111, "learning_rate": 0.01, "loss": 2.0782, "step": 13095 }, { "epoch": 1.3442118226600985, "grad_norm": 0.04388611391186714, "learning_rate": 0.01, "loss": 2.0575, "step": 13098 }, { "epoch": 1.3445197044334976, "grad_norm": 0.0776490792632103, "learning_rate": 0.01, "loss": 2.0753, "step": 13101 }, { "epoch": 1.3448275862068966, "grad_norm": 0.11331035196781158, "learning_rate": 0.01, "loss": 2.053, "step": 13104 }, { "epoch": 1.3451354679802956, "grad_norm": 0.04267279431223869, "learning_rate": 0.01, "loss": 2.0812, "step": 13107 }, { "epoch": 1.3454433497536946, "grad_norm": 0.05454112961888313, "learning_rate": 0.01, "loss": 2.0711, "step": 13110 }, { "epoch": 1.3457512315270936, "grad_norm": 0.07470305263996124, "learning_rate": 0.01, "loss": 2.0878, "step": 13113 }, { "epoch": 1.3460591133004927, "grad_norm": 0.057337477803230286, "learning_rate": 0.01, "loss": 2.0607, "step": 13116 }, { "epoch": 1.3463669950738917, "grad_norm": 0.09155120700597763, "learning_rate": 0.01, "loss": 2.0898, "step": 13119 }, { "epoch": 1.3466748768472907, "grad_norm": 0.09644894301891327, "learning_rate": 0.01, "loss": 2.0905, "step": 13122 }, { "epoch": 1.3469827586206897, "grad_norm": 0.0579628124833107, "learning_rate": 0.01, "loss": 2.0834, "step": 13125 }, { "epoch": 1.3472906403940887, "grad_norm": 0.09968624264001846, "learning_rate": 0.01, "loss": 2.0797, "step": 13128 }, { "epoch": 1.3475985221674878, "grad_norm": 0.04834052175283432, "learning_rate": 0.01, "loss": 2.0552, "step": 13131 }, { "epoch": 1.3479064039408866, "grad_norm": 0.05561887100338936, "learning_rate": 0.01, "loss": 2.0564, "step": 13134 }, { "epoch": 1.3482142857142856, "grad_norm": 0.14990638196468353, "learning_rate": 0.01, "loss": 2.0738, "step": 13137 }, { "epoch": 1.3485221674876846, "grad_norm": 0.07530777156352997, "learning_rate": 0.01, "loss": 2.0765, "step": 13140 }, { "epoch": 1.3488300492610836, "grad_norm": 0.09080106765031815, "learning_rate": 0.01, "loss": 2.083, "step": 13143 }, { "epoch": 1.3491379310344827, "grad_norm": 0.042014699429273605, "learning_rate": 0.01, "loss": 2.0608, "step": 13146 }, { "epoch": 1.3494458128078817, "grad_norm": 0.08905219286680222, "learning_rate": 0.01, "loss": 2.0657, "step": 13149 }, { "epoch": 1.3497536945812807, "grad_norm": 0.1093059629201889, "learning_rate": 0.01, "loss": 2.102, "step": 13152 }, { "epoch": 1.3500615763546797, "grad_norm": 0.09834617376327515, "learning_rate": 0.01, "loss": 2.0601, "step": 13155 }, { "epoch": 1.3503694581280787, "grad_norm": 0.0754542201757431, "learning_rate": 0.01, "loss": 2.103, "step": 13158 }, { "epoch": 1.3506773399014778, "grad_norm": 0.09639342129230499, "learning_rate": 0.01, "loss": 2.0743, "step": 13161 }, { "epoch": 1.3509852216748768, "grad_norm": 0.05084405094385147, "learning_rate": 0.01, "loss": 2.0898, "step": 13164 }, { "epoch": 1.3512931034482758, "grad_norm": 0.04796381667256355, "learning_rate": 0.01, "loss": 2.0788, "step": 13167 }, { "epoch": 1.3516009852216748, "grad_norm": 0.05373486131429672, "learning_rate": 0.01, "loss": 2.0632, "step": 13170 }, { "epoch": 1.3519088669950738, "grad_norm": 0.05145580321550369, "learning_rate": 0.01, "loss": 2.0383, "step": 13173 }, { "epoch": 1.3522167487684729, "grad_norm": 0.03861214593052864, "learning_rate": 0.01, "loss": 2.0678, "step": 13176 }, { "epoch": 1.3525246305418719, "grad_norm": 0.04394346475601196, "learning_rate": 0.01, "loss": 2.0601, "step": 13179 }, { "epoch": 1.352832512315271, "grad_norm": 0.08851804584264755, "learning_rate": 0.01, "loss": 2.0374, "step": 13182 }, { "epoch": 1.35314039408867, "grad_norm": 0.059799451380968094, "learning_rate": 0.01, "loss": 2.0735, "step": 13185 }, { "epoch": 1.353448275862069, "grad_norm": 0.13764169812202454, "learning_rate": 0.01, "loss": 2.1001, "step": 13188 }, { "epoch": 1.353756157635468, "grad_norm": 0.05652278661727905, "learning_rate": 0.01, "loss": 2.0771, "step": 13191 }, { "epoch": 1.354064039408867, "grad_norm": 0.04755775257945061, "learning_rate": 0.01, "loss": 2.0467, "step": 13194 }, { "epoch": 1.354371921182266, "grad_norm": 0.058131635189056396, "learning_rate": 0.01, "loss": 2.0884, "step": 13197 }, { "epoch": 1.354679802955665, "grad_norm": 0.041266053915023804, "learning_rate": 0.01, "loss": 2.0684, "step": 13200 }, { "epoch": 1.354987684729064, "grad_norm": 0.034990034997463226, "learning_rate": 0.01, "loss": 2.0576, "step": 13203 }, { "epoch": 1.355295566502463, "grad_norm": 0.13107064366340637, "learning_rate": 0.01, "loss": 2.0616, "step": 13206 }, { "epoch": 1.355603448275862, "grad_norm": 0.05397200584411621, "learning_rate": 0.01, "loss": 2.0953, "step": 13209 }, { "epoch": 1.3559113300492611, "grad_norm": 0.04137737303972244, "learning_rate": 0.01, "loss": 2.0538, "step": 13212 }, { "epoch": 1.3562192118226601, "grad_norm": 0.05001407861709595, "learning_rate": 0.01, "loss": 2.0809, "step": 13215 }, { "epoch": 1.3565270935960592, "grad_norm": 0.10387953370809555, "learning_rate": 0.01, "loss": 2.0818, "step": 13218 }, { "epoch": 1.3568349753694582, "grad_norm": 0.052998363971710205, "learning_rate": 0.01, "loss": 2.0711, "step": 13221 }, { "epoch": 1.3571428571428572, "grad_norm": 0.06805765628814697, "learning_rate": 0.01, "loss": 2.0604, "step": 13224 }, { "epoch": 1.3574507389162562, "grad_norm": 0.06597940623760223, "learning_rate": 0.01, "loss": 2.0701, "step": 13227 }, { "epoch": 1.3577586206896552, "grad_norm": 0.10083628445863724, "learning_rate": 0.01, "loss": 2.086, "step": 13230 }, { "epoch": 1.3580665024630543, "grad_norm": 0.05467986315488815, "learning_rate": 0.01, "loss": 2.072, "step": 13233 }, { "epoch": 1.3583743842364533, "grad_norm": 0.08951261639595032, "learning_rate": 0.01, "loss": 2.1256, "step": 13236 }, { "epoch": 1.3586822660098523, "grad_norm": 0.052532244473695755, "learning_rate": 0.01, "loss": 2.0555, "step": 13239 }, { "epoch": 1.3589901477832513, "grad_norm": 0.038159146904945374, "learning_rate": 0.01, "loss": 2.062, "step": 13242 }, { "epoch": 1.3592980295566504, "grad_norm": 0.09895820915699005, "learning_rate": 0.01, "loss": 2.0559, "step": 13245 }, { "epoch": 1.3596059113300494, "grad_norm": 0.07522387057542801, "learning_rate": 0.01, "loss": 2.0634, "step": 13248 }, { "epoch": 1.3599137931034484, "grad_norm": 0.04762687534093857, "learning_rate": 0.01, "loss": 2.0848, "step": 13251 }, { "epoch": 1.3602216748768474, "grad_norm": 0.0716032013297081, "learning_rate": 0.01, "loss": 2.0783, "step": 13254 }, { "epoch": 1.3605295566502464, "grad_norm": 0.08518968522548676, "learning_rate": 0.01, "loss": 2.0818, "step": 13257 }, { "epoch": 1.3608374384236452, "grad_norm": 0.06927520781755447, "learning_rate": 0.01, "loss": 2.0728, "step": 13260 }, { "epoch": 1.3611453201970443, "grad_norm": 0.10368376970291138, "learning_rate": 0.01, "loss": 2.0419, "step": 13263 }, { "epoch": 1.3614532019704433, "grad_norm": 0.04249117895960808, "learning_rate": 0.01, "loss": 2.0695, "step": 13266 }, { "epoch": 1.3617610837438423, "grad_norm": 0.06504488736391068, "learning_rate": 0.01, "loss": 2.0658, "step": 13269 }, { "epoch": 1.3620689655172413, "grad_norm": 0.03990466147661209, "learning_rate": 0.01, "loss": 2.0796, "step": 13272 }, { "epoch": 1.3623768472906403, "grad_norm": 0.042559072375297546, "learning_rate": 0.01, "loss": 2.0643, "step": 13275 }, { "epoch": 1.3626847290640394, "grad_norm": 0.046650230884552, "learning_rate": 0.01, "loss": 2.0756, "step": 13278 }, { "epoch": 1.3629926108374384, "grad_norm": 0.08641167730093002, "learning_rate": 0.01, "loss": 2.0638, "step": 13281 }, { "epoch": 1.3633004926108374, "grad_norm": 0.11438708007335663, "learning_rate": 0.01, "loss": 2.0559, "step": 13284 }, { "epoch": 1.3636083743842364, "grad_norm": 0.05360870808362961, "learning_rate": 0.01, "loss": 2.0534, "step": 13287 }, { "epoch": 1.3639162561576355, "grad_norm": 0.07226021587848663, "learning_rate": 0.01, "loss": 2.0569, "step": 13290 }, { "epoch": 1.3642241379310345, "grad_norm": 0.04532739892601967, "learning_rate": 0.01, "loss": 2.0504, "step": 13293 }, { "epoch": 1.3645320197044335, "grad_norm": 0.06119906157255173, "learning_rate": 0.01, "loss": 2.0507, "step": 13296 }, { "epoch": 1.3648399014778325, "grad_norm": 0.05576052516698837, "learning_rate": 0.01, "loss": 2.0454, "step": 13299 }, { "epoch": 1.3651477832512315, "grad_norm": 0.038748834282159805, "learning_rate": 0.01, "loss": 2.0632, "step": 13302 }, { "epoch": 1.3654556650246306, "grad_norm": 0.09733711183071136, "learning_rate": 0.01, "loss": 2.0733, "step": 13305 }, { "epoch": 1.3657635467980296, "grad_norm": 0.043375931680202484, "learning_rate": 0.01, "loss": 2.0629, "step": 13308 }, { "epoch": 1.3660714285714286, "grad_norm": 0.11930018663406372, "learning_rate": 0.01, "loss": 2.0931, "step": 13311 }, { "epoch": 1.3663793103448276, "grad_norm": 0.06754540652036667, "learning_rate": 0.01, "loss": 2.0575, "step": 13314 }, { "epoch": 1.3666871921182266, "grad_norm": 0.07226148992776871, "learning_rate": 0.01, "loss": 2.0786, "step": 13317 }, { "epoch": 1.3669950738916257, "grad_norm": 0.08159705251455307, "learning_rate": 0.01, "loss": 2.0594, "step": 13320 }, { "epoch": 1.3673029556650247, "grad_norm": 0.044994477182626724, "learning_rate": 0.01, "loss": 2.0782, "step": 13323 }, { "epoch": 1.3676108374384237, "grad_norm": 0.05308050662279129, "learning_rate": 0.01, "loss": 2.0645, "step": 13326 }, { "epoch": 1.3679187192118227, "grad_norm": 0.09141236543655396, "learning_rate": 0.01, "loss": 2.0801, "step": 13329 }, { "epoch": 1.3682266009852218, "grad_norm": 0.040702883154153824, "learning_rate": 0.01, "loss": 2.0497, "step": 13332 }, { "epoch": 1.3685344827586206, "grad_norm": 0.06111524999141693, "learning_rate": 0.01, "loss": 2.0633, "step": 13335 }, { "epoch": 1.3688423645320196, "grad_norm": 0.10802275687456131, "learning_rate": 0.01, "loss": 2.0541, "step": 13338 }, { "epoch": 1.3691502463054186, "grad_norm": 0.09049344807863235, "learning_rate": 0.01, "loss": 2.0636, "step": 13341 }, { "epoch": 1.3694581280788176, "grad_norm": 0.055894456803798676, "learning_rate": 0.01, "loss": 2.0675, "step": 13344 }, { "epoch": 1.3697660098522166, "grad_norm": 0.054729919880628586, "learning_rate": 0.01, "loss": 2.0674, "step": 13347 }, { "epoch": 1.3700738916256157, "grad_norm": 0.05745011568069458, "learning_rate": 0.01, "loss": 2.0707, "step": 13350 }, { "epoch": 1.3703817733990147, "grad_norm": 0.06573651731014252, "learning_rate": 0.01, "loss": 2.0493, "step": 13353 }, { "epoch": 1.3706896551724137, "grad_norm": 0.20495210587978363, "learning_rate": 0.01, "loss": 2.0798, "step": 13356 }, { "epoch": 1.3709975369458127, "grad_norm": 0.10678639262914658, "learning_rate": 0.01, "loss": 2.0499, "step": 13359 }, { "epoch": 1.3713054187192117, "grad_norm": 0.10948281735181808, "learning_rate": 0.01, "loss": 2.087, "step": 13362 }, { "epoch": 1.3716133004926108, "grad_norm": 0.07788719981908798, "learning_rate": 0.01, "loss": 2.0745, "step": 13365 }, { "epoch": 1.3719211822660098, "grad_norm": 0.04947768524289131, "learning_rate": 0.01, "loss": 2.0608, "step": 13368 }, { "epoch": 1.3722290640394088, "grad_norm": 0.04789843037724495, "learning_rate": 0.01, "loss": 2.0607, "step": 13371 }, { "epoch": 1.3725369458128078, "grad_norm": 0.05217898637056351, "learning_rate": 0.01, "loss": 2.058, "step": 13374 }, { "epoch": 1.3728448275862069, "grad_norm": 0.04018987715244293, "learning_rate": 0.01, "loss": 2.0782, "step": 13377 }, { "epoch": 1.3731527093596059, "grad_norm": 0.035446904599666595, "learning_rate": 0.01, "loss": 2.0668, "step": 13380 }, { "epoch": 1.373460591133005, "grad_norm": 0.038600854575634, "learning_rate": 0.01, "loss": 2.0865, "step": 13383 }, { "epoch": 1.373768472906404, "grad_norm": 0.055341143161058426, "learning_rate": 0.01, "loss": 2.0591, "step": 13386 }, { "epoch": 1.374076354679803, "grad_norm": 0.11673317849636078, "learning_rate": 0.01, "loss": 2.0897, "step": 13389 }, { "epoch": 1.374384236453202, "grad_norm": 0.06797752529382706, "learning_rate": 0.01, "loss": 2.0676, "step": 13392 }, { "epoch": 1.374692118226601, "grad_norm": 0.041885413229465485, "learning_rate": 0.01, "loss": 2.0245, "step": 13395 }, { "epoch": 1.375, "grad_norm": 0.07391481846570969, "learning_rate": 0.01, "loss": 2.067, "step": 13398 }, { "epoch": 1.375307881773399, "grad_norm": 0.07959283888339996, "learning_rate": 0.01, "loss": 2.0192, "step": 13401 }, { "epoch": 1.375615763546798, "grad_norm": 0.09504754096269608, "learning_rate": 0.01, "loss": 2.0841, "step": 13404 }, { "epoch": 1.375923645320197, "grad_norm": 0.08874071389436722, "learning_rate": 0.01, "loss": 2.0473, "step": 13407 }, { "epoch": 1.376231527093596, "grad_norm": 0.05350710079073906, "learning_rate": 0.01, "loss": 2.0763, "step": 13410 }, { "epoch": 1.376539408866995, "grad_norm": 0.06738609820604324, "learning_rate": 0.01, "loss": 2.0782, "step": 13413 }, { "epoch": 1.3768472906403941, "grad_norm": 0.04335073009133339, "learning_rate": 0.01, "loss": 2.0601, "step": 13416 }, { "epoch": 1.3771551724137931, "grad_norm": 0.056045398116111755, "learning_rate": 0.01, "loss": 2.0588, "step": 13419 }, { "epoch": 1.3774630541871922, "grad_norm": 0.06593155860900879, "learning_rate": 0.01, "loss": 2.0637, "step": 13422 }, { "epoch": 1.3777709359605912, "grad_norm": 0.08942624181509018, "learning_rate": 0.01, "loss": 2.0724, "step": 13425 }, { "epoch": 1.3780788177339902, "grad_norm": 0.10098058730363846, "learning_rate": 0.01, "loss": 2.064, "step": 13428 }, { "epoch": 1.3783866995073892, "grad_norm": 0.04804680123925209, "learning_rate": 0.01, "loss": 2.0684, "step": 13431 }, { "epoch": 1.3786945812807883, "grad_norm": 0.08567364513874054, "learning_rate": 0.01, "loss": 2.0617, "step": 13434 }, { "epoch": 1.3790024630541873, "grad_norm": 0.06027091667056084, "learning_rate": 0.01, "loss": 2.0666, "step": 13437 }, { "epoch": 1.3793103448275863, "grad_norm": 0.08809462189674377, "learning_rate": 0.01, "loss": 2.0862, "step": 13440 }, { "epoch": 1.3796182266009853, "grad_norm": 0.053466469049453735, "learning_rate": 0.01, "loss": 2.0497, "step": 13443 }, { "epoch": 1.3799261083743843, "grad_norm": 0.033619511872529984, "learning_rate": 0.01, "loss": 2.0298, "step": 13446 }, { "epoch": 1.3802339901477834, "grad_norm": 0.04768878221511841, "learning_rate": 0.01, "loss": 2.054, "step": 13449 }, { "epoch": 1.3805418719211824, "grad_norm": 0.07854757457971573, "learning_rate": 0.01, "loss": 2.0538, "step": 13452 }, { "epoch": 1.3808497536945814, "grad_norm": 0.05409559607505798, "learning_rate": 0.01, "loss": 2.082, "step": 13455 }, { "epoch": 1.3811576354679804, "grad_norm": 0.057855118066072464, "learning_rate": 0.01, "loss": 2.0814, "step": 13458 }, { "epoch": 1.3814655172413794, "grad_norm": 0.047502126544713974, "learning_rate": 0.01, "loss": 2.0955, "step": 13461 }, { "epoch": 1.3817733990147782, "grad_norm": 0.040939487516880035, "learning_rate": 0.01, "loss": 2.0431, "step": 13464 }, { "epoch": 1.3820812807881773, "grad_norm": 0.1307850480079651, "learning_rate": 0.01, "loss": 2.0801, "step": 13467 }, { "epoch": 1.3823891625615763, "grad_norm": 0.04386845603585243, "learning_rate": 0.01, "loss": 2.0662, "step": 13470 }, { "epoch": 1.3826970443349753, "grad_norm": 0.08174968510866165, "learning_rate": 0.01, "loss": 2.0544, "step": 13473 }, { "epoch": 1.3830049261083743, "grad_norm": 0.1113237589597702, "learning_rate": 0.01, "loss": 2.0411, "step": 13476 }, { "epoch": 1.3833128078817734, "grad_norm": 0.06756308674812317, "learning_rate": 0.01, "loss": 2.0813, "step": 13479 }, { "epoch": 1.3836206896551724, "grad_norm": 0.05931835621595383, "learning_rate": 0.01, "loss": 2.0544, "step": 13482 }, { "epoch": 1.3839285714285714, "grad_norm": 0.043539129197597504, "learning_rate": 0.01, "loss": 2.0528, "step": 13485 }, { "epoch": 1.3842364532019704, "grad_norm": 0.0510721355676651, "learning_rate": 0.01, "loss": 2.075, "step": 13488 }, { "epoch": 1.3845443349753694, "grad_norm": 0.10811501741409302, "learning_rate": 0.01, "loss": 2.0626, "step": 13491 }, { "epoch": 1.3848522167487685, "grad_norm": 0.08322811871767044, "learning_rate": 0.01, "loss": 2.0882, "step": 13494 }, { "epoch": 1.3851600985221675, "grad_norm": 0.05101979896426201, "learning_rate": 0.01, "loss": 2.0677, "step": 13497 }, { "epoch": 1.3854679802955665, "grad_norm": 0.036535851657390594, "learning_rate": 0.01, "loss": 2.0791, "step": 13500 }, { "epoch": 1.3857758620689655, "grad_norm": 0.05161239951848984, "learning_rate": 0.01, "loss": 2.0513, "step": 13503 }, { "epoch": 1.3860837438423645, "grad_norm": 0.06677153706550598, "learning_rate": 0.01, "loss": 2.0581, "step": 13506 }, { "epoch": 1.3863916256157636, "grad_norm": 0.04239841178059578, "learning_rate": 0.01, "loss": 2.0383, "step": 13509 }, { "epoch": 1.3866995073891626, "grad_norm": 0.04252205416560173, "learning_rate": 0.01, "loss": 2.0875, "step": 13512 }, { "epoch": 1.3870073891625616, "grad_norm": 0.10147176682949066, "learning_rate": 0.01, "loss": 2.0631, "step": 13515 }, { "epoch": 1.3873152709359606, "grad_norm": 0.046371154487133026, "learning_rate": 0.01, "loss": 2.0784, "step": 13518 }, { "epoch": 1.3876231527093597, "grad_norm": 0.0997064933180809, "learning_rate": 0.01, "loss": 2.0578, "step": 13521 }, { "epoch": 1.3879310344827587, "grad_norm": 0.05733582749962807, "learning_rate": 0.01, "loss": 2.0742, "step": 13524 }, { "epoch": 1.3882389162561577, "grad_norm": 0.05061260983347893, "learning_rate": 0.01, "loss": 2.0448, "step": 13527 }, { "epoch": 1.3885467980295567, "grad_norm": 0.04336051642894745, "learning_rate": 0.01, "loss": 2.0846, "step": 13530 }, { "epoch": 1.3888546798029557, "grad_norm": 0.04115337133407593, "learning_rate": 0.01, "loss": 2.0577, "step": 13533 }, { "epoch": 1.3891625615763548, "grad_norm": 0.04914069175720215, "learning_rate": 0.01, "loss": 2.0815, "step": 13536 }, { "epoch": 1.3894704433497536, "grad_norm": 0.0677042305469513, "learning_rate": 0.01, "loss": 2.0754, "step": 13539 }, { "epoch": 1.3897783251231526, "grad_norm": 0.04985208064317703, "learning_rate": 0.01, "loss": 2.0633, "step": 13542 }, { "epoch": 1.3900862068965516, "grad_norm": 0.08042199164628983, "learning_rate": 0.01, "loss": 2.0828, "step": 13545 }, { "epoch": 1.3903940886699506, "grad_norm": 0.03648814186453819, "learning_rate": 0.01, "loss": 2.0669, "step": 13548 }, { "epoch": 1.3907019704433496, "grad_norm": 0.03645399957895279, "learning_rate": 0.01, "loss": 2.0495, "step": 13551 }, { "epoch": 1.3910098522167487, "grad_norm": 0.04866683483123779, "learning_rate": 0.01, "loss": 2.0531, "step": 13554 }, { "epoch": 1.3913177339901477, "grad_norm": 0.07728299498558044, "learning_rate": 0.01, "loss": 2.0671, "step": 13557 }, { "epoch": 1.3916256157635467, "grad_norm": 0.12097810208797455, "learning_rate": 0.01, "loss": 2.0935, "step": 13560 }, { "epoch": 1.3919334975369457, "grad_norm": 0.12485776096582413, "learning_rate": 0.01, "loss": 2.0712, "step": 13563 }, { "epoch": 1.3922413793103448, "grad_norm": 0.053524475544691086, "learning_rate": 0.01, "loss": 2.0595, "step": 13566 }, { "epoch": 1.3925492610837438, "grad_norm": 0.04277713969349861, "learning_rate": 0.01, "loss": 2.0791, "step": 13569 }, { "epoch": 1.3928571428571428, "grad_norm": 0.09847384691238403, "learning_rate": 0.01, "loss": 2.0754, "step": 13572 }, { "epoch": 1.3931650246305418, "grad_norm": 0.03410463035106659, "learning_rate": 0.01, "loss": 2.0556, "step": 13575 }, { "epoch": 1.3934729064039408, "grad_norm": 0.10606678575277328, "learning_rate": 0.01, "loss": 2.0637, "step": 13578 }, { "epoch": 1.3937807881773399, "grad_norm": 0.06554549187421799, "learning_rate": 0.01, "loss": 2.0549, "step": 13581 }, { "epoch": 1.3940886699507389, "grad_norm": 0.07487329095602036, "learning_rate": 0.01, "loss": 2.0645, "step": 13584 }, { "epoch": 1.394396551724138, "grad_norm": 0.07526996731758118, "learning_rate": 0.01, "loss": 2.0733, "step": 13587 }, { "epoch": 1.394704433497537, "grad_norm": 0.0581665076315403, "learning_rate": 0.01, "loss": 2.0491, "step": 13590 }, { "epoch": 1.395012315270936, "grad_norm": 0.057513732463121414, "learning_rate": 0.01, "loss": 2.087, "step": 13593 }, { "epoch": 1.395320197044335, "grad_norm": 0.037192508578300476, "learning_rate": 0.01, "loss": 2.0487, "step": 13596 }, { "epoch": 1.395628078817734, "grad_norm": 0.0965125560760498, "learning_rate": 0.01, "loss": 2.0549, "step": 13599 }, { "epoch": 1.395935960591133, "grad_norm": 0.04594407603144646, "learning_rate": 0.01, "loss": 2.067, "step": 13602 }, { "epoch": 1.396243842364532, "grad_norm": 0.08442319929599762, "learning_rate": 0.01, "loss": 2.0627, "step": 13605 }, { "epoch": 1.396551724137931, "grad_norm": 0.08673713356256485, "learning_rate": 0.01, "loss": 2.054, "step": 13608 }, { "epoch": 1.39685960591133, "grad_norm": 0.07299968600273132, "learning_rate": 0.01, "loss": 2.0672, "step": 13611 }, { "epoch": 1.397167487684729, "grad_norm": 0.052630744874477386, "learning_rate": 0.01, "loss": 2.079, "step": 13614 }, { "epoch": 1.3974753694581281, "grad_norm": 0.0626215934753418, "learning_rate": 0.01, "loss": 2.0732, "step": 13617 }, { "epoch": 1.3977832512315271, "grad_norm": 0.0866907387971878, "learning_rate": 0.01, "loss": 2.0743, "step": 13620 }, { "epoch": 1.3980911330049262, "grad_norm": 0.05650071054697037, "learning_rate": 0.01, "loss": 2.0856, "step": 13623 }, { "epoch": 1.3983990147783252, "grad_norm": 0.07526635378599167, "learning_rate": 0.01, "loss": 2.0707, "step": 13626 }, { "epoch": 1.3987068965517242, "grad_norm": 0.07472112774848938, "learning_rate": 0.01, "loss": 2.0608, "step": 13629 }, { "epoch": 1.3990147783251232, "grad_norm": 0.07251216471195221, "learning_rate": 0.01, "loss": 2.086, "step": 13632 }, { "epoch": 1.3993226600985222, "grad_norm": 0.08701921999454498, "learning_rate": 0.01, "loss": 2.0382, "step": 13635 }, { "epoch": 1.3996305418719213, "grad_norm": 0.033323436975479126, "learning_rate": 0.01, "loss": 2.0716, "step": 13638 }, { "epoch": 1.3999384236453203, "grad_norm": 0.04960713908076286, "learning_rate": 0.01, "loss": 2.0557, "step": 13641 }, { "epoch": 1.4002463054187193, "grad_norm": 0.1418198049068451, "learning_rate": 0.01, "loss": 2.0528, "step": 13644 }, { "epoch": 1.4005541871921183, "grad_norm": 0.06056910380721092, "learning_rate": 0.01, "loss": 2.0825, "step": 13647 }, { "epoch": 1.4008620689655173, "grad_norm": 0.062474992126226425, "learning_rate": 0.01, "loss": 2.0195, "step": 13650 }, { "epoch": 1.4011699507389164, "grad_norm": 0.05380658805370331, "learning_rate": 0.01, "loss": 2.0644, "step": 13653 }, { "epoch": 1.4014778325123154, "grad_norm": 0.046230513602495193, "learning_rate": 0.01, "loss": 2.062, "step": 13656 }, { "epoch": 1.4017857142857144, "grad_norm": 0.05238807573914528, "learning_rate": 0.01, "loss": 2.0976, "step": 13659 }, { "epoch": 1.4020935960591134, "grad_norm": 0.045423392206430435, "learning_rate": 0.01, "loss": 2.0729, "step": 13662 }, { "epoch": 1.4024014778325122, "grad_norm": 0.1077577993273735, "learning_rate": 0.01, "loss": 2.0481, "step": 13665 }, { "epoch": 1.4027093596059113, "grad_norm": 0.04370421916246414, "learning_rate": 0.01, "loss": 2.0634, "step": 13668 }, { "epoch": 1.4030172413793103, "grad_norm": 0.061496302485466, "learning_rate": 0.01, "loss": 2.0449, "step": 13671 }, { "epoch": 1.4033251231527093, "grad_norm": 0.048742834478616714, "learning_rate": 0.01, "loss": 2.0683, "step": 13674 }, { "epoch": 1.4036330049261083, "grad_norm": 0.14942513406276703, "learning_rate": 0.01, "loss": 2.0616, "step": 13677 }, { "epoch": 1.4039408866995073, "grad_norm": 0.04235846549272537, "learning_rate": 0.01, "loss": 2.0611, "step": 13680 }, { "epoch": 1.4042487684729064, "grad_norm": 0.05509978160262108, "learning_rate": 0.01, "loss": 2.0645, "step": 13683 }, { "epoch": 1.4045566502463054, "grad_norm": 0.09692233055830002, "learning_rate": 0.01, "loss": 2.0703, "step": 13686 }, { "epoch": 1.4048645320197044, "grad_norm": 0.11141908913850784, "learning_rate": 0.01, "loss": 2.0724, "step": 13689 }, { "epoch": 1.4051724137931034, "grad_norm": 0.06601562350988388, "learning_rate": 0.01, "loss": 2.0719, "step": 13692 }, { "epoch": 1.4054802955665024, "grad_norm": 0.04997260868549347, "learning_rate": 0.01, "loss": 2.056, "step": 13695 }, { "epoch": 1.4057881773399015, "grad_norm": 0.07198163866996765, "learning_rate": 0.01, "loss": 2.0546, "step": 13698 }, { "epoch": 1.4060960591133005, "grad_norm": 0.03802650794386864, "learning_rate": 0.01, "loss": 2.0696, "step": 13701 }, { "epoch": 1.4064039408866995, "grad_norm": 0.06259030848741531, "learning_rate": 0.01, "loss": 2.0459, "step": 13704 }, { "epoch": 1.4067118226600985, "grad_norm": 0.09554235637187958, "learning_rate": 0.01, "loss": 2.0476, "step": 13707 }, { "epoch": 1.4070197044334976, "grad_norm": 0.056935038417577744, "learning_rate": 0.01, "loss": 2.0522, "step": 13710 }, { "epoch": 1.4073275862068966, "grad_norm": 0.11038411408662796, "learning_rate": 0.01, "loss": 2.0567, "step": 13713 }, { "epoch": 1.4076354679802956, "grad_norm": 0.05257488042116165, "learning_rate": 0.01, "loss": 2.09, "step": 13716 }, { "epoch": 1.4079433497536946, "grad_norm": 0.0573866032063961, "learning_rate": 0.01, "loss": 2.0538, "step": 13719 }, { "epoch": 1.4082512315270936, "grad_norm": 0.04933631047606468, "learning_rate": 0.01, "loss": 2.0435, "step": 13722 }, { "epoch": 1.4085591133004927, "grad_norm": 0.05909980088472366, "learning_rate": 0.01, "loss": 2.0554, "step": 13725 }, { "epoch": 1.4088669950738917, "grad_norm": 0.09598751366138458, "learning_rate": 0.01, "loss": 2.0718, "step": 13728 }, { "epoch": 1.4091748768472907, "grad_norm": 0.05608231574296951, "learning_rate": 0.01, "loss": 2.0621, "step": 13731 }, { "epoch": 1.4094827586206897, "grad_norm": 0.08262834697961807, "learning_rate": 0.01, "loss": 2.0661, "step": 13734 }, { "epoch": 1.4097906403940887, "grad_norm": 0.041144959628582, "learning_rate": 0.01, "loss": 2.0646, "step": 13737 }, { "epoch": 1.4100985221674878, "grad_norm": 0.03748650476336479, "learning_rate": 0.01, "loss": 2.0558, "step": 13740 }, { "epoch": 1.4104064039408866, "grad_norm": 0.04054822400212288, "learning_rate": 0.01, "loss": 2.0564, "step": 13743 }, { "epoch": 1.4107142857142856, "grad_norm": 0.07961263507604599, "learning_rate": 0.01, "loss": 2.0513, "step": 13746 }, { "epoch": 1.4110221674876846, "grad_norm": 0.049971841275691986, "learning_rate": 0.01, "loss": 2.033, "step": 13749 }, { "epoch": 1.4113300492610836, "grad_norm": 0.040059734135866165, "learning_rate": 0.01, "loss": 2.0791, "step": 13752 }, { "epoch": 1.4116379310344827, "grad_norm": 0.0400179885327816, "learning_rate": 0.01, "loss": 2.0496, "step": 13755 }, { "epoch": 1.4119458128078817, "grad_norm": 0.04587862268090248, "learning_rate": 0.01, "loss": 2.0572, "step": 13758 }, { "epoch": 1.4122536945812807, "grad_norm": 0.08982817828655243, "learning_rate": 0.01, "loss": 2.0864, "step": 13761 }, { "epoch": 1.4125615763546797, "grad_norm": 0.05488836392760277, "learning_rate": 0.01, "loss": 2.0529, "step": 13764 }, { "epoch": 1.4128694581280787, "grad_norm": 0.06559593975543976, "learning_rate": 0.01, "loss": 2.0564, "step": 13767 }, { "epoch": 1.4131773399014778, "grad_norm": 0.10647718608379364, "learning_rate": 0.01, "loss": 2.0404, "step": 13770 }, { "epoch": 1.4134852216748768, "grad_norm": 0.05944173410534859, "learning_rate": 0.01, "loss": 2.053, "step": 13773 }, { "epoch": 1.4137931034482758, "grad_norm": 0.05548718199133873, "learning_rate": 0.01, "loss": 2.0534, "step": 13776 }, { "epoch": 1.4141009852216748, "grad_norm": 0.0694265142083168, "learning_rate": 0.01, "loss": 2.0647, "step": 13779 }, { "epoch": 1.4144088669950738, "grad_norm": 0.10526683181524277, "learning_rate": 0.01, "loss": 2.0768, "step": 13782 }, { "epoch": 1.4147167487684729, "grad_norm": 0.08820123970508575, "learning_rate": 0.01, "loss": 2.0693, "step": 13785 }, { "epoch": 1.4150246305418719, "grad_norm": 0.04513731971383095, "learning_rate": 0.01, "loss": 2.0596, "step": 13788 }, { "epoch": 1.415332512315271, "grad_norm": 0.05737076327204704, "learning_rate": 0.01, "loss": 2.0698, "step": 13791 }, { "epoch": 1.41564039408867, "grad_norm": 0.0431799478828907, "learning_rate": 0.01, "loss": 2.0603, "step": 13794 }, { "epoch": 1.415948275862069, "grad_norm": 0.09012471139431, "learning_rate": 0.01, "loss": 2.0634, "step": 13797 }, { "epoch": 1.416256157635468, "grad_norm": 0.05895904824137688, "learning_rate": 0.01, "loss": 2.0516, "step": 13800 }, { "epoch": 1.416564039408867, "grad_norm": 0.1610986888408661, "learning_rate": 0.01, "loss": 2.0743, "step": 13803 }, { "epoch": 1.416871921182266, "grad_norm": 0.07852904498577118, "learning_rate": 0.01, "loss": 2.064, "step": 13806 }, { "epoch": 1.417179802955665, "grad_norm": 0.06620481610298157, "learning_rate": 0.01, "loss": 2.0688, "step": 13809 }, { "epoch": 1.417487684729064, "grad_norm": 0.033222537487745285, "learning_rate": 0.01, "loss": 2.0702, "step": 13812 }, { "epoch": 1.417795566502463, "grad_norm": 0.02942623570561409, "learning_rate": 0.01, "loss": 2.0654, "step": 13815 }, { "epoch": 1.418103448275862, "grad_norm": 0.03543059900403023, "learning_rate": 0.01, "loss": 2.0776, "step": 13818 }, { "epoch": 1.4184113300492611, "grad_norm": 0.13414567708969116, "learning_rate": 0.01, "loss": 2.0621, "step": 13821 }, { "epoch": 1.4187192118226601, "grad_norm": 0.06474481523036957, "learning_rate": 0.01, "loss": 2.0675, "step": 13824 }, { "epoch": 1.4190270935960592, "grad_norm": 0.11285994201898575, "learning_rate": 0.01, "loss": 2.0533, "step": 13827 }, { "epoch": 1.4193349753694582, "grad_norm": 0.05104577913880348, "learning_rate": 0.01, "loss": 2.0835, "step": 13830 }, { "epoch": 1.4196428571428572, "grad_norm": 0.05463656783103943, "learning_rate": 0.01, "loss": 2.0788, "step": 13833 }, { "epoch": 1.4199507389162562, "grad_norm": 0.07886187732219696, "learning_rate": 0.01, "loss": 2.0538, "step": 13836 }, { "epoch": 1.4202586206896552, "grad_norm": 0.06960279494524002, "learning_rate": 0.01, "loss": 2.0927, "step": 13839 }, { "epoch": 1.4205665024630543, "grad_norm": 0.07481426745653152, "learning_rate": 0.01, "loss": 2.0421, "step": 13842 }, { "epoch": 1.4208743842364533, "grad_norm": 0.04317006468772888, "learning_rate": 0.01, "loss": 2.0665, "step": 13845 }, { "epoch": 1.4211822660098523, "grad_norm": 0.10644064098596573, "learning_rate": 0.01, "loss": 2.0434, "step": 13848 }, { "epoch": 1.4214901477832513, "grad_norm": 0.09246213734149933, "learning_rate": 0.01, "loss": 2.0487, "step": 13851 }, { "epoch": 1.4217980295566504, "grad_norm": 0.05824518948793411, "learning_rate": 0.01, "loss": 2.0633, "step": 13854 }, { "epoch": 1.4221059113300494, "grad_norm": 0.06316854059696198, "learning_rate": 0.01, "loss": 2.076, "step": 13857 }, { "epoch": 1.4224137931034484, "grad_norm": 0.058339640498161316, "learning_rate": 0.01, "loss": 2.0702, "step": 13860 }, { "epoch": 1.4227216748768474, "grad_norm": 0.05458427220582962, "learning_rate": 0.01, "loss": 2.0585, "step": 13863 }, { "epoch": 1.4230295566502464, "grad_norm": 0.041047900915145874, "learning_rate": 0.01, "loss": 2.0793, "step": 13866 }, { "epoch": 1.4233374384236452, "grad_norm": 0.04485390707850456, "learning_rate": 0.01, "loss": 2.0723, "step": 13869 }, { "epoch": 1.4236453201970443, "grad_norm": 0.13340137898921967, "learning_rate": 0.01, "loss": 2.0555, "step": 13872 }, { "epoch": 1.4239532019704433, "grad_norm": 0.05519254505634308, "learning_rate": 0.01, "loss": 2.0931, "step": 13875 }, { "epoch": 1.4242610837438423, "grad_norm": 0.07989728450775146, "learning_rate": 0.01, "loss": 2.0435, "step": 13878 }, { "epoch": 1.4245689655172413, "grad_norm": 0.09447802603244781, "learning_rate": 0.01, "loss": 2.061, "step": 13881 }, { "epoch": 1.4248768472906403, "grad_norm": 0.05240226909518242, "learning_rate": 0.01, "loss": 2.0643, "step": 13884 }, { "epoch": 1.4251847290640394, "grad_norm": 0.07171013206243515, "learning_rate": 0.01, "loss": 2.0829, "step": 13887 }, { "epoch": 1.4254926108374384, "grad_norm": 0.05098895728588104, "learning_rate": 0.01, "loss": 2.0716, "step": 13890 }, { "epoch": 1.4258004926108374, "grad_norm": 0.08569507300853729, "learning_rate": 0.01, "loss": 2.0699, "step": 13893 }, { "epoch": 1.4261083743842364, "grad_norm": 0.09055166691541672, "learning_rate": 0.01, "loss": 2.0843, "step": 13896 }, { "epoch": 1.4264162561576355, "grad_norm": 0.03242780640721321, "learning_rate": 0.01, "loss": 2.0433, "step": 13899 }, { "epoch": 1.4267241379310345, "grad_norm": 0.04612202197313309, "learning_rate": 0.01, "loss": 2.0327, "step": 13902 }, { "epoch": 1.4270320197044335, "grad_norm": 0.05800663307309151, "learning_rate": 0.01, "loss": 2.0362, "step": 13905 }, { "epoch": 1.4273399014778325, "grad_norm": 0.04150572046637535, "learning_rate": 0.01, "loss": 2.0504, "step": 13908 }, { "epoch": 1.4276477832512315, "grad_norm": 0.08542584627866745, "learning_rate": 0.01, "loss": 2.0406, "step": 13911 }, { "epoch": 1.4279556650246306, "grad_norm": 0.11966803669929504, "learning_rate": 0.01, "loss": 2.0829, "step": 13914 }, { "epoch": 1.4282635467980296, "grad_norm": 0.12066449970006943, "learning_rate": 0.01, "loss": 2.0657, "step": 13917 }, { "epoch": 1.4285714285714286, "grad_norm": 0.04103751480579376, "learning_rate": 0.01, "loss": 2.0496, "step": 13920 }, { "epoch": 1.4288793103448276, "grad_norm": 0.05432034656405449, "learning_rate": 0.01, "loss": 2.0563, "step": 13923 }, { "epoch": 1.4291871921182266, "grad_norm": 0.03935731574892998, "learning_rate": 0.01, "loss": 2.0463, "step": 13926 }, { "epoch": 1.4294950738916257, "grad_norm": 0.1475706547498703, "learning_rate": 0.01, "loss": 2.0641, "step": 13929 }, { "epoch": 1.4298029556650247, "grad_norm": 0.06562622636556625, "learning_rate": 0.01, "loss": 2.0536, "step": 13932 }, { "epoch": 1.4301108374384237, "grad_norm": 0.051726315170526505, "learning_rate": 0.01, "loss": 2.0506, "step": 13935 }, { "epoch": 1.4304187192118227, "grad_norm": 0.0998329371213913, "learning_rate": 0.01, "loss": 2.0521, "step": 13938 }, { "epoch": 1.4307266009852218, "grad_norm": 0.04965333640575409, "learning_rate": 0.01, "loss": 2.0568, "step": 13941 }, { "epoch": 1.4310344827586206, "grad_norm": 0.04430006071925163, "learning_rate": 0.01, "loss": 2.0735, "step": 13944 }, { "epoch": 1.4313423645320196, "grad_norm": 0.05260150134563446, "learning_rate": 0.01, "loss": 2.0887, "step": 13947 }, { "epoch": 1.4316502463054186, "grad_norm": 0.04135138541460037, "learning_rate": 0.01, "loss": 2.055, "step": 13950 }, { "epoch": 1.4319581280788176, "grad_norm": 0.08347123116254807, "learning_rate": 0.01, "loss": 2.0826, "step": 13953 }, { "epoch": 1.4322660098522166, "grad_norm": 0.12328385561704636, "learning_rate": 0.01, "loss": 2.0787, "step": 13956 }, { "epoch": 1.4325738916256157, "grad_norm": 0.05809056758880615, "learning_rate": 0.01, "loss": 2.0798, "step": 13959 }, { "epoch": 1.4328817733990147, "grad_norm": 0.038590408861637115, "learning_rate": 0.01, "loss": 2.0699, "step": 13962 }, { "epoch": 1.4331896551724137, "grad_norm": 0.11158851534128189, "learning_rate": 0.01, "loss": 2.0707, "step": 13965 }, { "epoch": 1.4334975369458127, "grad_norm": 0.0880589634180069, "learning_rate": 0.01, "loss": 2.0425, "step": 13968 }, { "epoch": 1.4338054187192117, "grad_norm": 0.059966232627630234, "learning_rate": 0.01, "loss": 2.0659, "step": 13971 }, { "epoch": 1.4341133004926108, "grad_norm": 0.04661833122372627, "learning_rate": 0.01, "loss": 2.0736, "step": 13974 }, { "epoch": 1.4344211822660098, "grad_norm": 0.04508896544575691, "learning_rate": 0.01, "loss": 2.0856, "step": 13977 }, { "epoch": 1.4347290640394088, "grad_norm": 0.051987554877996445, "learning_rate": 0.01, "loss": 2.0449, "step": 13980 }, { "epoch": 1.4350369458128078, "grad_norm": 0.04814029112458229, "learning_rate": 0.01, "loss": 2.0587, "step": 13983 }, { "epoch": 1.4353448275862069, "grad_norm": 0.09631717205047607, "learning_rate": 0.01, "loss": 2.0747, "step": 13986 }, { "epoch": 1.4356527093596059, "grad_norm": 0.06581971794366837, "learning_rate": 0.01, "loss": 2.0456, "step": 13989 }, { "epoch": 1.435960591133005, "grad_norm": 0.06483247131109238, "learning_rate": 0.01, "loss": 2.0627, "step": 13992 }, { "epoch": 1.436268472906404, "grad_norm": 0.1000155657529831, "learning_rate": 0.01, "loss": 2.0655, "step": 13995 }, { "epoch": 1.436576354679803, "grad_norm": 0.07297492027282715, "learning_rate": 0.01, "loss": 2.0686, "step": 13998 }, { "epoch": 1.436884236453202, "grad_norm": 0.054907578974962234, "learning_rate": 0.01, "loss": 2.0351, "step": 14001 }, { "epoch": 1.437192118226601, "grad_norm": 0.051127828657627106, "learning_rate": 0.01, "loss": 2.0583, "step": 14004 }, { "epoch": 1.4375, "grad_norm": 0.04157300665974617, "learning_rate": 0.01, "loss": 2.0548, "step": 14007 }, { "epoch": 1.437807881773399, "grad_norm": 0.07996746897697449, "learning_rate": 0.01, "loss": 2.0625, "step": 14010 }, { "epoch": 1.438115763546798, "grad_norm": 0.0764036774635315, "learning_rate": 0.01, "loss": 2.076, "step": 14013 }, { "epoch": 1.438423645320197, "grad_norm": 0.10736891627311707, "learning_rate": 0.01, "loss": 2.0682, "step": 14016 }, { "epoch": 1.438731527093596, "grad_norm": 0.0598980113863945, "learning_rate": 0.01, "loss": 2.0774, "step": 14019 }, { "epoch": 1.439039408866995, "grad_norm": 0.10858605802059174, "learning_rate": 0.01, "loss": 2.0708, "step": 14022 }, { "epoch": 1.4393472906403941, "grad_norm": 0.03999786823987961, "learning_rate": 0.01, "loss": 2.0521, "step": 14025 }, { "epoch": 1.4396551724137931, "grad_norm": 0.053138673305511475, "learning_rate": 0.01, "loss": 2.0547, "step": 14028 }, { "epoch": 1.4399630541871922, "grad_norm": 0.06477091461420059, "learning_rate": 0.01, "loss": 2.0371, "step": 14031 }, { "epoch": 1.4402709359605912, "grad_norm": 0.035987384617328644, "learning_rate": 0.01, "loss": 2.0897, "step": 14034 }, { "epoch": 1.4405788177339902, "grad_norm": 0.06938667595386505, "learning_rate": 0.01, "loss": 2.0895, "step": 14037 }, { "epoch": 1.4408866995073892, "grad_norm": 0.041746124625205994, "learning_rate": 0.01, "loss": 2.0705, "step": 14040 }, { "epoch": 1.4411945812807883, "grad_norm": 0.04503123462200165, "learning_rate": 0.01, "loss": 2.0528, "step": 14043 }, { "epoch": 1.4415024630541873, "grad_norm": 0.04153100401163101, "learning_rate": 0.01, "loss": 2.0596, "step": 14046 }, { "epoch": 1.4418103448275863, "grad_norm": 0.04992615804076195, "learning_rate": 0.01, "loss": 2.0527, "step": 14049 }, { "epoch": 1.4421182266009853, "grad_norm": 0.0718725174665451, "learning_rate": 0.01, "loss": 2.049, "step": 14052 }, { "epoch": 1.4424261083743843, "grad_norm": 0.13080431520938873, "learning_rate": 0.01, "loss": 2.0457, "step": 14055 }, { "epoch": 1.4427339901477834, "grad_norm": 0.04203762486577034, "learning_rate": 0.01, "loss": 2.0713, "step": 14058 }, { "epoch": 1.4430418719211824, "grad_norm": 0.04111120104789734, "learning_rate": 0.01, "loss": 2.0845, "step": 14061 }, { "epoch": 1.4433497536945814, "grad_norm": 0.044398125261068344, "learning_rate": 0.01, "loss": 2.0679, "step": 14064 }, { "epoch": 1.4436576354679804, "grad_norm": 0.031682152301073074, "learning_rate": 0.01, "loss": 2.0855, "step": 14067 }, { "epoch": 1.4439655172413794, "grad_norm": 0.07848865538835526, "learning_rate": 0.01, "loss": 2.0607, "step": 14070 }, { "epoch": 1.4442733990147782, "grad_norm": 0.08814079314470291, "learning_rate": 0.01, "loss": 2.0468, "step": 14073 }, { "epoch": 1.4445812807881773, "grad_norm": 0.05344429612159729, "learning_rate": 0.01, "loss": 2.0499, "step": 14076 }, { "epoch": 1.4448891625615763, "grad_norm": 0.05509471520781517, "learning_rate": 0.01, "loss": 2.0662, "step": 14079 }, { "epoch": 1.4451970443349753, "grad_norm": 0.08177798241376877, "learning_rate": 0.01, "loss": 2.0509, "step": 14082 }, { "epoch": 1.4455049261083743, "grad_norm": 0.07953787595033646, "learning_rate": 0.01, "loss": 2.0501, "step": 14085 }, { "epoch": 1.4458128078817734, "grad_norm": 0.06984551250934601, "learning_rate": 0.01, "loss": 2.0599, "step": 14088 }, { "epoch": 1.4461206896551724, "grad_norm": 0.07923319190740585, "learning_rate": 0.01, "loss": 2.06, "step": 14091 }, { "epoch": 1.4464285714285714, "grad_norm": 0.04370349645614624, "learning_rate": 0.01, "loss": 2.0839, "step": 14094 }, { "epoch": 1.4467364532019704, "grad_norm": 0.045787643641233444, "learning_rate": 0.01, "loss": 2.0512, "step": 14097 }, { "epoch": 1.4470443349753694, "grad_norm": 0.04126288741827011, "learning_rate": 0.01, "loss": 2.0603, "step": 14100 }, { "epoch": 1.4473522167487685, "grad_norm": 0.039805784821510315, "learning_rate": 0.01, "loss": 2.0561, "step": 14103 }, { "epoch": 1.4476600985221675, "grad_norm": 0.038430992513895035, "learning_rate": 0.01, "loss": 2.0697, "step": 14106 }, { "epoch": 1.4479679802955665, "grad_norm": 0.07664498686790466, "learning_rate": 0.01, "loss": 2.0381, "step": 14109 }, { "epoch": 1.4482758620689655, "grad_norm": 0.04592788219451904, "learning_rate": 0.01, "loss": 2.042, "step": 14112 }, { "epoch": 1.4485837438423645, "grad_norm": 0.06161922961473465, "learning_rate": 0.01, "loss": 2.0446, "step": 14115 }, { "epoch": 1.4488916256157636, "grad_norm": 0.07906373590230942, "learning_rate": 0.01, "loss": 2.0758, "step": 14118 }, { "epoch": 1.4491995073891626, "grad_norm": 0.09529503434896469, "learning_rate": 0.01, "loss": 2.0512, "step": 14121 }, { "epoch": 1.4495073891625616, "grad_norm": 0.05416659638285637, "learning_rate": 0.01, "loss": 2.0484, "step": 14124 }, { "epoch": 1.4498152709359606, "grad_norm": 0.07085006684064865, "learning_rate": 0.01, "loss": 2.0293, "step": 14127 }, { "epoch": 1.4501231527093597, "grad_norm": 0.07722880691289902, "learning_rate": 0.01, "loss": 2.0578, "step": 14130 }, { "epoch": 1.4504310344827587, "grad_norm": 0.06599342823028564, "learning_rate": 0.01, "loss": 2.0556, "step": 14133 }, { "epoch": 1.4507389162561577, "grad_norm": 0.11217498779296875, "learning_rate": 0.01, "loss": 2.068, "step": 14136 }, { "epoch": 1.4510467980295567, "grad_norm": 0.13082845509052277, "learning_rate": 0.01, "loss": 2.0677, "step": 14139 }, { "epoch": 1.4513546798029557, "grad_norm": 0.06812801957130432, "learning_rate": 0.01, "loss": 2.0668, "step": 14142 }, { "epoch": 1.4516625615763548, "grad_norm": 0.043554674834012985, "learning_rate": 0.01, "loss": 2.0537, "step": 14145 }, { "epoch": 1.4519704433497536, "grad_norm": 0.04550860822200775, "learning_rate": 0.01, "loss": 2.0415, "step": 14148 }, { "epoch": 1.4522783251231526, "grad_norm": 0.04838492348790169, "learning_rate": 0.01, "loss": 2.0642, "step": 14151 }, { "epoch": 1.4525862068965516, "grad_norm": 0.08433537930250168, "learning_rate": 0.01, "loss": 2.0549, "step": 14154 }, { "epoch": 1.4528940886699506, "grad_norm": 0.06986009329557419, "learning_rate": 0.01, "loss": 2.057, "step": 14157 }, { "epoch": 1.4532019704433496, "grad_norm": 0.06860263645648956, "learning_rate": 0.01, "loss": 2.0581, "step": 14160 }, { "epoch": 1.4535098522167487, "grad_norm": 0.06218327581882477, "learning_rate": 0.01, "loss": 2.0482, "step": 14163 }, { "epoch": 1.4538177339901477, "grad_norm": 0.10177832096815109, "learning_rate": 0.01, "loss": 2.059, "step": 14166 }, { "epoch": 1.4541256157635467, "grad_norm": 0.047695957124233246, "learning_rate": 0.01, "loss": 2.0372, "step": 14169 }, { "epoch": 1.4544334975369457, "grad_norm": 0.09761510044336319, "learning_rate": 0.01, "loss": 2.0671, "step": 14172 }, { "epoch": 1.4547413793103448, "grad_norm": 0.050296783447265625, "learning_rate": 0.01, "loss": 2.0536, "step": 14175 }, { "epoch": 1.4550492610837438, "grad_norm": 0.13070064783096313, "learning_rate": 0.01, "loss": 2.0579, "step": 14178 }, { "epoch": 1.4553571428571428, "grad_norm": 0.1080620214343071, "learning_rate": 0.01, "loss": 2.0455, "step": 14181 }, { "epoch": 1.4556650246305418, "grad_norm": 0.06132792308926582, "learning_rate": 0.01, "loss": 2.062, "step": 14184 }, { "epoch": 1.4559729064039408, "grad_norm": 0.07258635014295578, "learning_rate": 0.01, "loss": 2.0666, "step": 14187 }, { "epoch": 1.4562807881773399, "grad_norm": 0.05423443764448166, "learning_rate": 0.01, "loss": 2.0676, "step": 14190 }, { "epoch": 1.4565886699507389, "grad_norm": 0.06038088724017143, "learning_rate": 0.01, "loss": 2.0539, "step": 14193 }, { "epoch": 1.456896551724138, "grad_norm": 0.043958742171525955, "learning_rate": 0.01, "loss": 2.0739, "step": 14196 }, { "epoch": 1.457204433497537, "grad_norm": 0.03985238075256348, "learning_rate": 0.01, "loss": 2.0323, "step": 14199 }, { "epoch": 1.457512315270936, "grad_norm": 0.05626663193106651, "learning_rate": 0.01, "loss": 2.0518, "step": 14202 }, { "epoch": 1.457820197044335, "grad_norm": 0.06365952640771866, "learning_rate": 0.01, "loss": 2.042, "step": 14205 }, { "epoch": 1.458128078817734, "grad_norm": 0.0698857232928276, "learning_rate": 0.01, "loss": 2.0676, "step": 14208 }, { "epoch": 1.458435960591133, "grad_norm": 0.08149702101945877, "learning_rate": 0.01, "loss": 2.0659, "step": 14211 }, { "epoch": 1.458743842364532, "grad_norm": 0.055818330496549606, "learning_rate": 0.01, "loss": 2.0633, "step": 14214 }, { "epoch": 1.459051724137931, "grad_norm": 0.046251330524683, "learning_rate": 0.01, "loss": 2.0598, "step": 14217 }, { "epoch": 1.45935960591133, "grad_norm": 0.10986322909593582, "learning_rate": 0.01, "loss": 2.0768, "step": 14220 }, { "epoch": 1.459667487684729, "grad_norm": 0.06735626608133316, "learning_rate": 0.01, "loss": 2.0709, "step": 14223 }, { "epoch": 1.4599753694581281, "grad_norm": 0.1051633432507515, "learning_rate": 0.01, "loss": 2.0807, "step": 14226 }, { "epoch": 1.4602832512315271, "grad_norm": 0.0956743136048317, "learning_rate": 0.01, "loss": 2.0377, "step": 14229 }, { "epoch": 1.4605911330049262, "grad_norm": 0.04349840059876442, "learning_rate": 0.01, "loss": 2.0723, "step": 14232 }, { "epoch": 1.4608990147783252, "grad_norm": 0.0388668030500412, "learning_rate": 0.01, "loss": 2.0612, "step": 14235 }, { "epoch": 1.4612068965517242, "grad_norm": 0.04311763867735863, "learning_rate": 0.01, "loss": 2.0553, "step": 14238 }, { "epoch": 1.4615147783251232, "grad_norm": 0.09116464853286743, "learning_rate": 0.01, "loss": 2.0537, "step": 14241 }, { "epoch": 1.4618226600985222, "grad_norm": 0.08582088351249695, "learning_rate": 0.01, "loss": 2.0588, "step": 14244 }, { "epoch": 1.4621305418719213, "grad_norm": 0.0435602031648159, "learning_rate": 0.01, "loss": 2.0303, "step": 14247 }, { "epoch": 1.4624384236453203, "grad_norm": 0.067762091755867, "learning_rate": 0.01, "loss": 2.0453, "step": 14250 }, { "epoch": 1.4627463054187193, "grad_norm": 0.03980677202343941, "learning_rate": 0.01, "loss": 2.0488, "step": 14253 }, { "epoch": 1.4630541871921183, "grad_norm": 0.08521614968776703, "learning_rate": 0.01, "loss": 2.0617, "step": 14256 }, { "epoch": 1.4633620689655173, "grad_norm": 0.06770948320627213, "learning_rate": 0.01, "loss": 2.0535, "step": 14259 }, { "epoch": 1.4636699507389164, "grad_norm": 0.053458839654922485, "learning_rate": 0.01, "loss": 2.0594, "step": 14262 }, { "epoch": 1.4639778325123154, "grad_norm": 0.06733859330415726, "learning_rate": 0.01, "loss": 2.0422, "step": 14265 }, { "epoch": 1.4642857142857144, "grad_norm": 0.08033892512321472, "learning_rate": 0.01, "loss": 2.0685, "step": 14268 }, { "epoch": 1.4645935960591134, "grad_norm": 0.0832366794347763, "learning_rate": 0.01, "loss": 2.0867, "step": 14271 }, { "epoch": 1.4649014778325122, "grad_norm": 0.055291153490543365, "learning_rate": 0.01, "loss": 2.0618, "step": 14274 }, { "epoch": 1.4652093596059113, "grad_norm": 0.07180801033973694, "learning_rate": 0.01, "loss": 2.0569, "step": 14277 }, { "epoch": 1.4655172413793103, "grad_norm": 0.048950713127851486, "learning_rate": 0.01, "loss": 2.0433, "step": 14280 }, { "epoch": 1.4658251231527093, "grad_norm": 0.05428025498986244, "learning_rate": 0.01, "loss": 2.052, "step": 14283 }, { "epoch": 1.4661330049261083, "grad_norm": 0.06631309539079666, "learning_rate": 0.01, "loss": 2.0615, "step": 14286 }, { "epoch": 1.4664408866995073, "grad_norm": 0.06743253022432327, "learning_rate": 0.01, "loss": 2.0524, "step": 14289 }, { "epoch": 1.4667487684729064, "grad_norm": 0.10901882499456406, "learning_rate": 0.01, "loss": 2.076, "step": 14292 }, { "epoch": 1.4670566502463054, "grad_norm": 0.08234187960624695, "learning_rate": 0.01, "loss": 2.0432, "step": 14295 }, { "epoch": 1.4673645320197044, "grad_norm": 0.07249965518712997, "learning_rate": 0.01, "loss": 2.057, "step": 14298 }, { "epoch": 1.4676724137931034, "grad_norm": 0.0705137550830841, "learning_rate": 0.01, "loss": 2.0616, "step": 14301 }, { "epoch": 1.4679802955665024, "grad_norm": 0.10730472952127457, "learning_rate": 0.01, "loss": 2.0575, "step": 14304 }, { "epoch": 1.4682881773399015, "grad_norm": 0.048364557325839996, "learning_rate": 0.01, "loss": 2.0719, "step": 14307 }, { "epoch": 1.4685960591133005, "grad_norm": 0.03604978322982788, "learning_rate": 0.01, "loss": 2.0608, "step": 14310 }, { "epoch": 1.4689039408866995, "grad_norm": 0.09732489287853241, "learning_rate": 0.01, "loss": 2.0627, "step": 14313 }, { "epoch": 1.4692118226600985, "grad_norm": 0.06590714305639267, "learning_rate": 0.01, "loss": 2.0775, "step": 14316 }, { "epoch": 1.4695197044334976, "grad_norm": 0.075086310505867, "learning_rate": 0.01, "loss": 2.0633, "step": 14319 }, { "epoch": 1.4698275862068966, "grad_norm": 0.10288450121879578, "learning_rate": 0.01, "loss": 2.0607, "step": 14322 }, { "epoch": 1.4701354679802956, "grad_norm": 0.0535271093249321, "learning_rate": 0.01, "loss": 2.0702, "step": 14325 }, { "epoch": 1.4704433497536946, "grad_norm": 0.04609391465783119, "learning_rate": 0.01, "loss": 2.049, "step": 14328 }, { "epoch": 1.4707512315270936, "grad_norm": 0.044252909719944, "learning_rate": 0.01, "loss": 2.0586, "step": 14331 }, { "epoch": 1.4710591133004927, "grad_norm": 0.07837995141744614, "learning_rate": 0.01, "loss": 2.0541, "step": 14334 }, { "epoch": 1.4713669950738917, "grad_norm": 0.06548511236906052, "learning_rate": 0.01, "loss": 2.0575, "step": 14337 }, { "epoch": 1.4716748768472907, "grad_norm": 0.09237763285636902, "learning_rate": 0.01, "loss": 2.0607, "step": 14340 }, { "epoch": 1.4719827586206897, "grad_norm": 0.04163452237844467, "learning_rate": 0.01, "loss": 2.0604, "step": 14343 }, { "epoch": 1.4722906403940887, "grad_norm": 0.13814504444599152, "learning_rate": 0.01, "loss": 2.0667, "step": 14346 }, { "epoch": 1.4725985221674878, "grad_norm": 0.054490260779857635, "learning_rate": 0.01, "loss": 2.0754, "step": 14349 }, { "epoch": 1.4729064039408866, "grad_norm": 0.07470995932817459, "learning_rate": 0.01, "loss": 2.0731, "step": 14352 }, { "epoch": 1.4732142857142856, "grad_norm": 0.12089511752128601, "learning_rate": 0.01, "loss": 2.0671, "step": 14355 }, { "epoch": 1.4735221674876846, "grad_norm": 0.05507595092058182, "learning_rate": 0.01, "loss": 2.0278, "step": 14358 }, { "epoch": 1.4738300492610836, "grad_norm": 0.05130109563469887, "learning_rate": 0.01, "loss": 2.0491, "step": 14361 }, { "epoch": 1.4741379310344827, "grad_norm": 0.04399503022432327, "learning_rate": 0.01, "loss": 2.0669, "step": 14364 }, { "epoch": 1.4744458128078817, "grad_norm": 0.06161755695939064, "learning_rate": 0.01, "loss": 2.0458, "step": 14367 }, { "epoch": 1.4747536945812807, "grad_norm": 0.045603156089782715, "learning_rate": 0.01, "loss": 2.0477, "step": 14370 }, { "epoch": 1.4750615763546797, "grad_norm": 0.06444186717271805, "learning_rate": 0.01, "loss": 2.0514, "step": 14373 }, { "epoch": 1.4753694581280787, "grad_norm": 0.07450753450393677, "learning_rate": 0.01, "loss": 2.0327, "step": 14376 }, { "epoch": 1.4756773399014778, "grad_norm": 0.10367168486118317, "learning_rate": 0.01, "loss": 2.0453, "step": 14379 }, { "epoch": 1.4759852216748768, "grad_norm": 0.08999089151620865, "learning_rate": 0.01, "loss": 2.0588, "step": 14382 }, { "epoch": 1.4762931034482758, "grad_norm": 0.09803617745637894, "learning_rate": 0.01, "loss": 2.0972, "step": 14385 }, { "epoch": 1.4766009852216748, "grad_norm": 0.042447153478860855, "learning_rate": 0.01, "loss": 2.0295, "step": 14388 }, { "epoch": 1.4769088669950738, "grad_norm": 0.04479740187525749, "learning_rate": 0.01, "loss": 2.0545, "step": 14391 }, { "epoch": 1.4772167487684729, "grad_norm": 0.04113270714879036, "learning_rate": 0.01, "loss": 2.0522, "step": 14394 }, { "epoch": 1.4775246305418719, "grad_norm": 0.1087644025683403, "learning_rate": 0.01, "loss": 2.0668, "step": 14397 }, { "epoch": 1.477832512315271, "grad_norm": 0.05737099424004555, "learning_rate": 0.01, "loss": 2.0509, "step": 14400 }, { "epoch": 1.47814039408867, "grad_norm": 0.11025606095790863, "learning_rate": 0.01, "loss": 2.067, "step": 14403 }, { "epoch": 1.478448275862069, "grad_norm": 0.06662195175886154, "learning_rate": 0.01, "loss": 2.0554, "step": 14406 }, { "epoch": 1.478756157635468, "grad_norm": 0.05261904001235962, "learning_rate": 0.01, "loss": 2.0562, "step": 14409 }, { "epoch": 1.479064039408867, "grad_norm": 0.048272691667079926, "learning_rate": 0.01, "loss": 2.0858, "step": 14412 }, { "epoch": 1.479371921182266, "grad_norm": 0.048300545662641525, "learning_rate": 0.01, "loss": 2.0736, "step": 14415 }, { "epoch": 1.479679802955665, "grad_norm": 0.08697368204593658, "learning_rate": 0.01, "loss": 2.0425, "step": 14418 }, { "epoch": 1.479987684729064, "grad_norm": 0.07993713021278381, "learning_rate": 0.01, "loss": 2.0511, "step": 14421 }, { "epoch": 1.480295566502463, "grad_norm": 0.10037390887737274, "learning_rate": 0.01, "loss": 2.0584, "step": 14424 }, { "epoch": 1.480603448275862, "grad_norm": 0.06048484891653061, "learning_rate": 0.01, "loss": 2.0589, "step": 14427 }, { "epoch": 1.4809113300492611, "grad_norm": 0.08982612937688828, "learning_rate": 0.01, "loss": 2.0575, "step": 14430 }, { "epoch": 1.4812192118226601, "grad_norm": 0.06678975373506546, "learning_rate": 0.01, "loss": 2.0544, "step": 14433 }, { "epoch": 1.4815270935960592, "grad_norm": 0.07890944927930832, "learning_rate": 0.01, "loss": 2.0352, "step": 14436 }, { "epoch": 1.4818349753694582, "grad_norm": 0.05838685482740402, "learning_rate": 0.01, "loss": 2.0481, "step": 14439 }, { "epoch": 1.4821428571428572, "grad_norm": 0.06483394652605057, "learning_rate": 0.01, "loss": 2.0425, "step": 14442 }, { "epoch": 1.4824507389162562, "grad_norm": 0.07320713996887207, "learning_rate": 0.01, "loss": 2.0524, "step": 14445 }, { "epoch": 1.4827586206896552, "grad_norm": 0.07484092563390732, "learning_rate": 0.01, "loss": 2.0709, "step": 14448 }, { "epoch": 1.4830665024630543, "grad_norm": 0.07702804356813431, "learning_rate": 0.01, "loss": 2.0483, "step": 14451 }, { "epoch": 1.4833743842364533, "grad_norm": 0.05470692366361618, "learning_rate": 0.01, "loss": 2.0767, "step": 14454 }, { "epoch": 1.4836822660098523, "grad_norm": 0.055773910135030746, "learning_rate": 0.01, "loss": 2.0692, "step": 14457 }, { "epoch": 1.4839901477832513, "grad_norm": 0.03712743893265724, "learning_rate": 0.01, "loss": 2.0723, "step": 14460 }, { "epoch": 1.4842980295566504, "grad_norm": 0.035963475704193115, "learning_rate": 0.01, "loss": 2.0526, "step": 14463 }, { "epoch": 1.4846059113300494, "grad_norm": 0.08578921854496002, "learning_rate": 0.01, "loss": 2.0631, "step": 14466 }, { "epoch": 1.4849137931034484, "grad_norm": 0.08239159733057022, "learning_rate": 0.01, "loss": 2.0654, "step": 14469 }, { "epoch": 1.4852216748768474, "grad_norm": 0.05097891017794609, "learning_rate": 0.01, "loss": 2.0837, "step": 14472 }, { "epoch": 1.4855295566502464, "grad_norm": 0.051847904920578, "learning_rate": 0.01, "loss": 2.059, "step": 14475 }, { "epoch": 1.4858374384236452, "grad_norm": 0.04754810780286789, "learning_rate": 0.01, "loss": 2.0658, "step": 14478 }, { "epoch": 1.4861453201970443, "grad_norm": 0.046647075563669205, "learning_rate": 0.01, "loss": 2.0423, "step": 14481 }, { "epoch": 1.4864532019704433, "grad_norm": 0.06013277545571327, "learning_rate": 0.01, "loss": 2.0254, "step": 14484 }, { "epoch": 1.4867610837438423, "grad_norm": 0.13433513045310974, "learning_rate": 0.01, "loss": 2.0775, "step": 14487 }, { "epoch": 1.4870689655172413, "grad_norm": 0.046518564224243164, "learning_rate": 0.01, "loss": 2.0434, "step": 14490 }, { "epoch": 1.4873768472906403, "grad_norm": 0.09483514726161957, "learning_rate": 0.01, "loss": 2.0839, "step": 14493 }, { "epoch": 1.4876847290640394, "grad_norm": 0.07147302478551865, "learning_rate": 0.01, "loss": 2.0741, "step": 14496 }, { "epoch": 1.4879926108374384, "grad_norm": 0.12423846870660782, "learning_rate": 0.01, "loss": 2.045, "step": 14499 }, { "epoch": 1.4883004926108374, "grad_norm": 0.07726770639419556, "learning_rate": 0.01, "loss": 2.0578, "step": 14502 }, { "epoch": 1.4886083743842364, "grad_norm": 0.059802982956171036, "learning_rate": 0.01, "loss": 2.0526, "step": 14505 }, { "epoch": 1.4889162561576355, "grad_norm": 0.050745993852615356, "learning_rate": 0.01, "loss": 2.049, "step": 14508 }, { "epoch": 1.4892241379310345, "grad_norm": 0.052064161747694016, "learning_rate": 0.01, "loss": 2.0575, "step": 14511 }, { "epoch": 1.4895320197044335, "grad_norm": 0.06646674871444702, "learning_rate": 0.01, "loss": 2.0689, "step": 14514 }, { "epoch": 1.4898399014778325, "grad_norm": 0.043484605848789215, "learning_rate": 0.01, "loss": 2.0655, "step": 14517 }, { "epoch": 1.4901477832512315, "grad_norm": 0.14452145993709564, "learning_rate": 0.01, "loss": 2.0599, "step": 14520 }, { "epoch": 1.4904556650246306, "grad_norm": 0.08289093524217606, "learning_rate": 0.01, "loss": 2.0654, "step": 14523 }, { "epoch": 1.4907635467980296, "grad_norm": 0.05047908052802086, "learning_rate": 0.01, "loss": 2.0409, "step": 14526 }, { "epoch": 1.4910714285714286, "grad_norm": 0.04830252006649971, "learning_rate": 0.01, "loss": 2.0529, "step": 14529 }, { "epoch": 1.4913793103448276, "grad_norm": 0.0430610254406929, "learning_rate": 0.01, "loss": 2.0242, "step": 14532 }, { "epoch": 1.4916871921182266, "grad_norm": 0.04282008111476898, "learning_rate": 0.01, "loss": 2.0494, "step": 14535 }, { "epoch": 1.4919950738916257, "grad_norm": 0.037373676896095276, "learning_rate": 0.01, "loss": 2.0528, "step": 14538 }, { "epoch": 1.4923029556650247, "grad_norm": 0.04186755418777466, "learning_rate": 0.01, "loss": 2.058, "step": 14541 }, { "epoch": 1.4926108374384237, "grad_norm": 0.05514196678996086, "learning_rate": 0.01, "loss": 2.0647, "step": 14544 }, { "epoch": 1.4929187192118227, "grad_norm": 0.07391703873872757, "learning_rate": 0.01, "loss": 2.0812, "step": 14547 }, { "epoch": 1.4932266009852218, "grad_norm": 0.1295444518327713, "learning_rate": 0.01, "loss": 2.0571, "step": 14550 }, { "epoch": 1.4935344827586206, "grad_norm": 0.06389490514993668, "learning_rate": 0.01, "loss": 2.0756, "step": 14553 }, { "epoch": 1.4938423645320196, "grad_norm": 0.09335155785083771, "learning_rate": 0.01, "loss": 2.0904, "step": 14556 }, { "epoch": 1.4941502463054186, "grad_norm": 0.059700366109609604, "learning_rate": 0.01, "loss": 2.0598, "step": 14559 }, { "epoch": 1.4944581280788176, "grad_norm": 0.07785683870315552, "learning_rate": 0.01, "loss": 2.0567, "step": 14562 }, { "epoch": 1.4947660098522166, "grad_norm": 0.11935362964868546, "learning_rate": 0.01, "loss": 2.0536, "step": 14565 }, { "epoch": 1.4950738916256157, "grad_norm": 0.06188122183084488, "learning_rate": 0.01, "loss": 2.0436, "step": 14568 }, { "epoch": 1.4953817733990147, "grad_norm": 0.05302932485938072, "learning_rate": 0.01, "loss": 2.0704, "step": 14571 }, { "epoch": 1.4956896551724137, "grad_norm": 0.03871694207191467, "learning_rate": 0.01, "loss": 2.062, "step": 14574 }, { "epoch": 1.4959975369458127, "grad_norm": 0.03942064568400383, "learning_rate": 0.01, "loss": 2.0725, "step": 14577 }, { "epoch": 1.4963054187192117, "grad_norm": 0.05354088917374611, "learning_rate": 0.01, "loss": 2.054, "step": 14580 }, { "epoch": 1.4966133004926108, "grad_norm": 0.07863521575927734, "learning_rate": 0.01, "loss": 2.0577, "step": 14583 }, { "epoch": 1.4969211822660098, "grad_norm": 0.0440685860812664, "learning_rate": 0.01, "loss": 2.0422, "step": 14586 }, { "epoch": 1.4972290640394088, "grad_norm": 0.0724552571773529, "learning_rate": 0.01, "loss": 2.0707, "step": 14589 }, { "epoch": 1.4975369458128078, "grad_norm": 0.06099352613091469, "learning_rate": 0.01, "loss": 2.0567, "step": 14592 }, { "epoch": 1.4978448275862069, "grad_norm": 0.05534674599766731, "learning_rate": 0.01, "loss": 2.0343, "step": 14595 }, { "epoch": 1.4981527093596059, "grad_norm": 0.07876823097467422, "learning_rate": 0.01, "loss": 2.0686, "step": 14598 }, { "epoch": 1.498460591133005, "grad_norm": 0.07860377430915833, "learning_rate": 0.01, "loss": 2.0506, "step": 14601 }, { "epoch": 1.498768472906404, "grad_norm": 0.054005399346351624, "learning_rate": 0.01, "loss": 2.0429, "step": 14604 }, { "epoch": 1.499076354679803, "grad_norm": 0.10550951957702637, "learning_rate": 0.01, "loss": 2.0407, "step": 14607 }, { "epoch": 1.499384236453202, "grad_norm": 0.056426841765642166, "learning_rate": 0.01, "loss": 2.0589, "step": 14610 }, { "epoch": 1.499692118226601, "grad_norm": 0.09640904515981674, "learning_rate": 0.01, "loss": 2.0556, "step": 14613 }, { "epoch": 1.5, "grad_norm": 0.0822538211941719, "learning_rate": 0.01, "loss": 2.0684, "step": 14616 }, { "epoch": 1.500307881773399, "grad_norm": 0.05105495825409889, "learning_rate": 0.01, "loss": 2.0588, "step": 14619 }, { "epoch": 1.500615763546798, "grad_norm": 0.07851336896419525, "learning_rate": 0.01, "loss": 2.0635, "step": 14622 }, { "epoch": 1.500923645320197, "grad_norm": 0.051046207547187805, "learning_rate": 0.01, "loss": 2.0418, "step": 14625 }, { "epoch": 1.501231527093596, "grad_norm": 0.12335740774869919, "learning_rate": 0.01, "loss": 2.0483, "step": 14628 }, { "epoch": 1.501539408866995, "grad_norm": 0.04044636711478233, "learning_rate": 0.01, "loss": 2.0635, "step": 14631 }, { "epoch": 1.5018472906403941, "grad_norm": 0.0532059408724308, "learning_rate": 0.01, "loss": 2.0567, "step": 14634 }, { "epoch": 1.5021551724137931, "grad_norm": 0.0446847639977932, "learning_rate": 0.01, "loss": 2.0531, "step": 14637 }, { "epoch": 1.5024630541871922, "grad_norm": 0.05464153364300728, "learning_rate": 0.01, "loss": 2.0464, "step": 14640 }, { "epoch": 1.5027709359605912, "grad_norm": 0.08923088759183884, "learning_rate": 0.01, "loss": 2.0787, "step": 14643 }, { "epoch": 1.5030788177339902, "grad_norm": 0.06256496161222458, "learning_rate": 0.01, "loss": 2.0417, "step": 14646 }, { "epoch": 1.5033866995073892, "grad_norm": 0.05338229984045029, "learning_rate": 0.01, "loss": 2.053, "step": 14649 }, { "epoch": 1.5036945812807883, "grad_norm": 0.04416535049676895, "learning_rate": 0.01, "loss": 2.0532, "step": 14652 }, { "epoch": 1.5040024630541873, "grad_norm": 0.07076221704483032, "learning_rate": 0.01, "loss": 2.0677, "step": 14655 }, { "epoch": 1.5043103448275863, "grad_norm": 0.08566464483737946, "learning_rate": 0.01, "loss": 2.0499, "step": 14658 }, { "epoch": 1.5046182266009853, "grad_norm": 0.049552109092473984, "learning_rate": 0.01, "loss": 2.0549, "step": 14661 }, { "epoch": 1.5049261083743843, "grad_norm": 0.11802852898836136, "learning_rate": 0.01, "loss": 2.0657, "step": 14664 }, { "epoch": 1.5052339901477834, "grad_norm": 0.05280107632279396, "learning_rate": 0.01, "loss": 2.0722, "step": 14667 }, { "epoch": 1.5055418719211824, "grad_norm": 0.036458853632211685, "learning_rate": 0.01, "loss": 2.0357, "step": 14670 }, { "epoch": 1.5058497536945814, "grad_norm": 0.0465536043047905, "learning_rate": 0.01, "loss": 2.0765, "step": 14673 }, { "epoch": 1.5061576354679804, "grad_norm": 0.09052444994449615, "learning_rate": 0.01, "loss": 2.0677, "step": 14676 }, { "epoch": 1.5064655172413794, "grad_norm": 0.08750707656145096, "learning_rate": 0.01, "loss": 2.0719, "step": 14679 }, { "epoch": 1.5067733990147785, "grad_norm": 0.07876972109079361, "learning_rate": 0.01, "loss": 2.0539, "step": 14682 }, { "epoch": 1.5070812807881775, "grad_norm": 0.045561011880636215, "learning_rate": 0.01, "loss": 2.0656, "step": 14685 }, { "epoch": 1.5073891625615765, "grad_norm": 0.04548237472772598, "learning_rate": 0.01, "loss": 2.0811, "step": 14688 }, { "epoch": 1.5076970443349755, "grad_norm": 0.04897540062665939, "learning_rate": 0.01, "loss": 2.046, "step": 14691 }, { "epoch": 1.5080049261083743, "grad_norm": 0.08820399641990662, "learning_rate": 0.01, "loss": 2.0521, "step": 14694 }, { "epoch": 1.5083128078817734, "grad_norm": 0.0701432004570961, "learning_rate": 0.01, "loss": 2.0644, "step": 14697 }, { "epoch": 1.5086206896551724, "grad_norm": 0.10921904444694519, "learning_rate": 0.01, "loss": 2.0617, "step": 14700 }, { "epoch": 1.5089285714285714, "grad_norm": 0.08308566361665726, "learning_rate": 0.01, "loss": 2.101, "step": 14703 }, { "epoch": 1.5092364532019704, "grad_norm": 0.12545743584632874, "learning_rate": 0.01, "loss": 2.0495, "step": 14706 }, { "epoch": 1.5095443349753694, "grad_norm": 0.11245466768741608, "learning_rate": 0.01, "loss": 2.0264, "step": 14709 }, { "epoch": 1.5098522167487685, "grad_norm": 0.10128718614578247, "learning_rate": 0.01, "loss": 2.0413, "step": 14712 }, { "epoch": 1.5101600985221675, "grad_norm": 0.07226911187171936, "learning_rate": 0.01, "loss": 2.0487, "step": 14715 }, { "epoch": 1.5104679802955665, "grad_norm": 0.056605782359838486, "learning_rate": 0.01, "loss": 2.0686, "step": 14718 }, { "epoch": 1.5107758620689655, "grad_norm": 0.08795683085918427, "learning_rate": 0.01, "loss": 2.053, "step": 14721 }, { "epoch": 1.5110837438423645, "grad_norm": 0.07311341166496277, "learning_rate": 0.01, "loss": 2.0164, "step": 14724 }, { "epoch": 1.5113916256157636, "grad_norm": 0.07164688408374786, "learning_rate": 0.01, "loss": 2.048, "step": 14727 }, { "epoch": 1.5116995073891626, "grad_norm": 0.04577312618494034, "learning_rate": 0.01, "loss": 2.0523, "step": 14730 }, { "epoch": 1.5120073891625616, "grad_norm": 0.043186552822589874, "learning_rate": 0.01, "loss": 2.0694, "step": 14733 }, { "epoch": 1.5123152709359606, "grad_norm": 0.06101042777299881, "learning_rate": 0.01, "loss": 2.072, "step": 14736 }, { "epoch": 1.5126231527093597, "grad_norm": 0.12651608884334564, "learning_rate": 0.01, "loss": 2.0914, "step": 14739 }, { "epoch": 1.5129310344827587, "grad_norm": 0.039495594799518585, "learning_rate": 0.01, "loss": 2.0331, "step": 14742 }, { "epoch": 1.5132389162561575, "grad_norm": 0.10854054987430573, "learning_rate": 0.01, "loss": 2.0585, "step": 14745 }, { "epoch": 1.5135467980295565, "grad_norm": 0.142778679728508, "learning_rate": 0.01, "loss": 2.0433, "step": 14748 }, { "epoch": 1.5138546798029555, "grad_norm": 0.06473375856876373, "learning_rate": 0.01, "loss": 2.075, "step": 14751 }, { "epoch": 1.5141625615763545, "grad_norm": 0.050436701625585556, "learning_rate": 0.01, "loss": 2.0823, "step": 14754 }, { "epoch": 1.5144704433497536, "grad_norm": 0.057088159024715424, "learning_rate": 0.01, "loss": 2.0578, "step": 14757 }, { "epoch": 1.5147783251231526, "grad_norm": 0.051623161882162094, "learning_rate": 0.01, "loss": 2.0816, "step": 14760 }, { "epoch": 1.5150862068965516, "grad_norm": 0.0770149901509285, "learning_rate": 0.01, "loss": 2.0583, "step": 14763 }, { "epoch": 1.5153940886699506, "grad_norm": 0.06827536970376968, "learning_rate": 0.01, "loss": 2.0782, "step": 14766 }, { "epoch": 1.5157019704433496, "grad_norm": 0.06987358629703522, "learning_rate": 0.01, "loss": 2.0618, "step": 14769 }, { "epoch": 1.5160098522167487, "grad_norm": 0.05388219282031059, "learning_rate": 0.01, "loss": 2.084, "step": 14772 }, { "epoch": 1.5163177339901477, "grad_norm": 0.11866139620542526, "learning_rate": 0.01, "loss": 2.0622, "step": 14775 }, { "epoch": 1.5166256157635467, "grad_norm": 0.12754911184310913, "learning_rate": 0.01, "loss": 2.0387, "step": 14778 }, { "epoch": 1.5169334975369457, "grad_norm": 0.03591502830386162, "learning_rate": 0.01, "loss": 2.043, "step": 14781 }, { "epoch": 1.5172413793103448, "grad_norm": 0.09142038226127625, "learning_rate": 0.01, "loss": 2.05, "step": 14784 }, { "epoch": 1.5175492610837438, "grad_norm": 0.13140954077243805, "learning_rate": 0.01, "loss": 2.0302, "step": 14787 }, { "epoch": 1.5178571428571428, "grad_norm": 0.08330459147691727, "learning_rate": 0.01, "loss": 2.0461, "step": 14790 }, { "epoch": 1.5181650246305418, "grad_norm": 0.0779498815536499, "learning_rate": 0.01, "loss": 2.0662, "step": 14793 }, { "epoch": 1.5184729064039408, "grad_norm": 0.05396762117743492, "learning_rate": 0.01, "loss": 2.0586, "step": 14796 }, { "epoch": 1.5187807881773399, "grad_norm": 0.06744614988565445, "learning_rate": 0.01, "loss": 2.0578, "step": 14799 }, { "epoch": 1.5190886699507389, "grad_norm": 0.04777420684695244, "learning_rate": 0.01, "loss": 2.0645, "step": 14802 }, { "epoch": 1.519396551724138, "grad_norm": 0.044643301516771317, "learning_rate": 0.01, "loss": 2.0691, "step": 14805 }, { "epoch": 1.519704433497537, "grad_norm": 0.05263877660036087, "learning_rate": 0.01, "loss": 2.0475, "step": 14808 }, { "epoch": 1.520012315270936, "grad_norm": 0.07794903963804245, "learning_rate": 0.01, "loss": 2.0685, "step": 14811 }, { "epoch": 1.520320197044335, "grad_norm": 0.03846001625061035, "learning_rate": 0.01, "loss": 2.0547, "step": 14814 }, { "epoch": 1.520628078817734, "grad_norm": 0.03806301951408386, "learning_rate": 0.01, "loss": 2.0701, "step": 14817 }, { "epoch": 1.520935960591133, "grad_norm": 0.08289408683776855, "learning_rate": 0.01, "loss": 2.0712, "step": 14820 }, { "epoch": 1.521243842364532, "grad_norm": 0.04307285323739052, "learning_rate": 0.01, "loss": 2.0926, "step": 14823 }, { "epoch": 1.521551724137931, "grad_norm": 0.04523704573512077, "learning_rate": 0.01, "loss": 2.0613, "step": 14826 }, { "epoch": 1.52185960591133, "grad_norm": 0.0813162624835968, "learning_rate": 0.01, "loss": 2.0516, "step": 14829 }, { "epoch": 1.522167487684729, "grad_norm": 0.08958449214696884, "learning_rate": 0.01, "loss": 2.0534, "step": 14832 }, { "epoch": 1.5224753694581281, "grad_norm": 0.1036042720079422, "learning_rate": 0.01, "loss": 2.051, "step": 14835 }, { "epoch": 1.5227832512315271, "grad_norm": 0.06528764218091965, "learning_rate": 0.01, "loss": 2.0389, "step": 14838 }, { "epoch": 1.5230911330049262, "grad_norm": 0.04857415333390236, "learning_rate": 0.01, "loss": 2.0215, "step": 14841 }, { "epoch": 1.5233990147783252, "grad_norm": 0.11137302964925766, "learning_rate": 0.01, "loss": 2.0706, "step": 14844 }, { "epoch": 1.5237068965517242, "grad_norm": 0.05258537456393242, "learning_rate": 0.01, "loss": 2.053, "step": 14847 }, { "epoch": 1.5240147783251232, "grad_norm": 0.05203690007328987, "learning_rate": 0.01, "loss": 2.0719, "step": 14850 }, { "epoch": 1.5243226600985222, "grad_norm": 0.036557264626026154, "learning_rate": 0.01, "loss": 2.0827, "step": 14853 }, { "epoch": 1.5246305418719213, "grad_norm": 0.05553048849105835, "learning_rate": 0.01, "loss": 2.0526, "step": 14856 }, { "epoch": 1.5249384236453203, "grad_norm": 0.07551626116037369, "learning_rate": 0.01, "loss": 2.0696, "step": 14859 }, { "epoch": 1.5252463054187193, "grad_norm": 0.09335839748382568, "learning_rate": 0.01, "loss": 2.069, "step": 14862 }, { "epoch": 1.5255541871921183, "grad_norm": 0.07123745232820511, "learning_rate": 0.01, "loss": 2.048, "step": 14865 }, { "epoch": 1.5258620689655173, "grad_norm": 0.06792188435792923, "learning_rate": 0.01, "loss": 2.059, "step": 14868 }, { "epoch": 1.5261699507389164, "grad_norm": 0.035666827112436295, "learning_rate": 0.01, "loss": 2.0585, "step": 14871 }, { "epoch": 1.5264778325123154, "grad_norm": 0.039600104093551636, "learning_rate": 0.01, "loss": 2.0447, "step": 14874 }, { "epoch": 1.5267857142857144, "grad_norm": 0.03983796760439873, "learning_rate": 0.01, "loss": 2.0607, "step": 14877 }, { "epoch": 1.5270935960591134, "grad_norm": 0.07013492286205292, "learning_rate": 0.01, "loss": 2.0502, "step": 14880 }, { "epoch": 1.5274014778325125, "grad_norm": 0.07064792513847351, "learning_rate": 0.01, "loss": 2.0328, "step": 14883 }, { "epoch": 1.5277093596059115, "grad_norm": 0.10502810031175613, "learning_rate": 0.01, "loss": 2.0494, "step": 14886 }, { "epoch": 1.5280172413793105, "grad_norm": 0.050288375467061996, "learning_rate": 0.01, "loss": 2.0316, "step": 14889 }, { "epoch": 1.5283251231527095, "grad_norm": 0.07382049411535263, "learning_rate": 0.01, "loss": 2.0414, "step": 14892 }, { "epoch": 1.5286330049261085, "grad_norm": 0.08693026751279831, "learning_rate": 0.01, "loss": 2.0428, "step": 14895 }, { "epoch": 1.5289408866995073, "grad_norm": 0.04283773526549339, "learning_rate": 0.01, "loss": 2.068, "step": 14898 }, { "epoch": 1.5292487684729064, "grad_norm": 0.1044667437672615, "learning_rate": 0.01, "loss": 2.0583, "step": 14901 }, { "epoch": 1.5295566502463054, "grad_norm": 0.06316410005092621, "learning_rate": 0.01, "loss": 2.0372, "step": 14904 }, { "epoch": 1.5298645320197044, "grad_norm": 0.04687780514359474, "learning_rate": 0.01, "loss": 2.0575, "step": 14907 }, { "epoch": 1.5301724137931034, "grad_norm": 0.04785927012562752, "learning_rate": 0.01, "loss": 2.0435, "step": 14910 }, { "epoch": 1.5304802955665024, "grad_norm": 0.03788747265934944, "learning_rate": 0.01, "loss": 2.0483, "step": 14913 }, { "epoch": 1.5307881773399015, "grad_norm": 0.05618858337402344, "learning_rate": 0.01, "loss": 2.031, "step": 14916 }, { "epoch": 1.5310960591133005, "grad_norm": 0.10059016942977905, "learning_rate": 0.01, "loss": 2.0608, "step": 14919 }, { "epoch": 1.5314039408866995, "grad_norm": 0.06718064099550247, "learning_rate": 0.01, "loss": 2.0676, "step": 14922 }, { "epoch": 1.5317118226600985, "grad_norm": 0.09006398916244507, "learning_rate": 0.01, "loss": 2.059, "step": 14925 }, { "epoch": 1.5320197044334976, "grad_norm": 0.036577485501766205, "learning_rate": 0.01, "loss": 2.0425, "step": 14928 }, { "epoch": 1.5323275862068966, "grad_norm": 0.07979925721883774, "learning_rate": 0.01, "loss": 2.0387, "step": 14931 }, { "epoch": 1.5326354679802956, "grad_norm": 0.08286473900079727, "learning_rate": 0.01, "loss": 2.0523, "step": 14934 }, { "epoch": 1.5329433497536946, "grad_norm": 0.042596206068992615, "learning_rate": 0.01, "loss": 2.0692, "step": 14937 }, { "epoch": 1.5332512315270936, "grad_norm": 0.043695200234651566, "learning_rate": 0.01, "loss": 2.0429, "step": 14940 }, { "epoch": 1.5335591133004927, "grad_norm": 0.13041776418685913, "learning_rate": 0.01, "loss": 2.0966, "step": 14943 }, { "epoch": 1.5338669950738915, "grad_norm": 0.06076871603727341, "learning_rate": 0.01, "loss": 2.081, "step": 14946 }, { "epoch": 1.5341748768472905, "grad_norm": 0.08744147419929504, "learning_rate": 0.01, "loss": 2.0698, "step": 14949 }, { "epoch": 1.5344827586206895, "grad_norm": 0.041992440819740295, "learning_rate": 0.01, "loss": 2.0735, "step": 14952 }, { "epoch": 1.5347906403940885, "grad_norm": 0.0773782953619957, "learning_rate": 0.01, "loss": 2.0501, "step": 14955 }, { "epoch": 1.5350985221674875, "grad_norm": 0.04371657967567444, "learning_rate": 0.01, "loss": 2.043, "step": 14958 }, { "epoch": 1.5354064039408866, "grad_norm": 0.04753347858786583, "learning_rate": 0.01, "loss": 2.0399, "step": 14961 }, { "epoch": 1.5357142857142856, "grad_norm": 0.10644037276506424, "learning_rate": 0.01, "loss": 2.0838, "step": 14964 }, { "epoch": 1.5360221674876846, "grad_norm": 0.037067610770463943, "learning_rate": 0.01, "loss": 2.0677, "step": 14967 }, { "epoch": 1.5363300492610836, "grad_norm": 0.06745267659425735, "learning_rate": 0.01, "loss": 2.0464, "step": 14970 }, { "epoch": 1.5366379310344827, "grad_norm": 0.10053039342164993, "learning_rate": 0.01, "loss": 2.0696, "step": 14973 }, { "epoch": 1.5369458128078817, "grad_norm": 0.08785562962293625, "learning_rate": 0.01, "loss": 2.059, "step": 14976 }, { "epoch": 1.5372536945812807, "grad_norm": 0.12240536510944366, "learning_rate": 0.01, "loss": 2.0477, "step": 14979 }, { "epoch": 1.5375615763546797, "grad_norm": 0.08541588485240936, "learning_rate": 0.01, "loss": 2.0581, "step": 14982 }, { "epoch": 1.5378694581280787, "grad_norm": 0.0469081737101078, "learning_rate": 0.01, "loss": 2.0581, "step": 14985 }, { "epoch": 1.5381773399014778, "grad_norm": 0.04396476596593857, "learning_rate": 0.01, "loss": 2.0504, "step": 14988 }, { "epoch": 1.5384852216748768, "grad_norm": 0.033920448273420334, "learning_rate": 0.01, "loss": 2.0513, "step": 14991 }, { "epoch": 1.5387931034482758, "grad_norm": 0.035798102617263794, "learning_rate": 0.01, "loss": 2.0583, "step": 14994 }, { "epoch": 1.5391009852216748, "grad_norm": 0.030788132920861244, "learning_rate": 0.01, "loss": 2.0856, "step": 14997 }, { "epoch": 1.5394088669950738, "grad_norm": 0.06127138063311577, "learning_rate": 0.01, "loss": 2.0485, "step": 15000 }, { "epoch": 1.5397167487684729, "grad_norm": 0.07792042940855026, "learning_rate": 0.01, "loss": 2.0473, "step": 15003 }, { "epoch": 1.5400246305418719, "grad_norm": 0.1915716826915741, "learning_rate": 0.01, "loss": 2.06, "step": 15006 }, { "epoch": 1.540332512315271, "grad_norm": 0.13130734860897064, "learning_rate": 0.01, "loss": 2.0458, "step": 15009 }, { "epoch": 1.54064039408867, "grad_norm": 0.08750183880329132, "learning_rate": 0.01, "loss": 2.0569, "step": 15012 }, { "epoch": 1.540948275862069, "grad_norm": 0.0678631141781807, "learning_rate": 0.01, "loss": 2.0256, "step": 15015 }, { "epoch": 1.541256157635468, "grad_norm": 0.04352593049407005, "learning_rate": 0.01, "loss": 2.0541, "step": 15018 }, { "epoch": 1.541564039408867, "grad_norm": 0.059398628771305084, "learning_rate": 0.01, "loss": 2.0542, "step": 15021 }, { "epoch": 1.541871921182266, "grad_norm": 0.09179355949163437, "learning_rate": 0.01, "loss": 2.0623, "step": 15024 }, { "epoch": 1.542179802955665, "grad_norm": 0.08243024349212646, "learning_rate": 0.01, "loss": 2.0616, "step": 15027 }, { "epoch": 1.542487684729064, "grad_norm": 0.05103360861539841, "learning_rate": 0.01, "loss": 2.0498, "step": 15030 }, { "epoch": 1.542795566502463, "grad_norm": 0.04206395894289017, "learning_rate": 0.01, "loss": 2.0676, "step": 15033 }, { "epoch": 1.543103448275862, "grad_norm": 0.03659799322485924, "learning_rate": 0.01, "loss": 2.0376, "step": 15036 }, { "epoch": 1.5434113300492611, "grad_norm": 0.1279965043067932, "learning_rate": 0.01, "loss": 2.043, "step": 15039 }, { "epoch": 1.5437192118226601, "grad_norm": 0.09509512782096863, "learning_rate": 0.01, "loss": 2.0233, "step": 15042 }, { "epoch": 1.5440270935960592, "grad_norm": 0.07963217794895172, "learning_rate": 0.01, "loss": 2.0632, "step": 15045 }, { "epoch": 1.5443349753694582, "grad_norm": 0.06425557285547256, "learning_rate": 0.01, "loss": 2.0454, "step": 15048 }, { "epoch": 1.5446428571428572, "grad_norm": 0.1166144609451294, "learning_rate": 0.01, "loss": 2.0675, "step": 15051 }, { "epoch": 1.5449507389162562, "grad_norm": 0.0558270663022995, "learning_rate": 0.01, "loss": 2.0495, "step": 15054 }, { "epoch": 1.5452586206896552, "grad_norm": 0.05666494369506836, "learning_rate": 0.01, "loss": 2.0417, "step": 15057 }, { "epoch": 1.5455665024630543, "grad_norm": 0.048931702971458435, "learning_rate": 0.01, "loss": 2.0503, "step": 15060 }, { "epoch": 1.5458743842364533, "grad_norm": 0.10072410106658936, "learning_rate": 0.01, "loss": 2.0432, "step": 15063 }, { "epoch": 1.5461822660098523, "grad_norm": 0.06339754164218903, "learning_rate": 0.01, "loss": 2.048, "step": 15066 }, { "epoch": 1.5464901477832513, "grad_norm": 0.04913650080561638, "learning_rate": 0.01, "loss": 2.07, "step": 15069 }, { "epoch": 1.5467980295566504, "grad_norm": 0.1012924313545227, "learning_rate": 0.01, "loss": 2.0423, "step": 15072 }, { "epoch": 1.5471059113300494, "grad_norm": 0.048015668988227844, "learning_rate": 0.01, "loss": 2.0356, "step": 15075 }, { "epoch": 1.5474137931034484, "grad_norm": 0.09666754305362701, "learning_rate": 0.01, "loss": 2.0701, "step": 15078 }, { "epoch": 1.5477216748768474, "grad_norm": 0.07722094655036926, "learning_rate": 0.01, "loss": 2.0223, "step": 15081 }, { "epoch": 1.5480295566502464, "grad_norm": 0.06525082141160965, "learning_rate": 0.01, "loss": 2.056, "step": 15084 }, { "epoch": 1.5483374384236455, "grad_norm": 0.04979628697037697, "learning_rate": 0.01, "loss": 2.0662, "step": 15087 }, { "epoch": 1.5486453201970445, "grad_norm": 0.05903888866305351, "learning_rate": 0.01, "loss": 2.0551, "step": 15090 }, { "epoch": 1.5489532019704435, "grad_norm": 0.09098793566226959, "learning_rate": 0.01, "loss": 2.0758, "step": 15093 }, { "epoch": 1.5492610837438425, "grad_norm": 0.08262350410223007, "learning_rate": 0.01, "loss": 2.0548, "step": 15096 }, { "epoch": 1.5495689655172413, "grad_norm": 0.057414278388023376, "learning_rate": 0.01, "loss": 2.0887, "step": 15099 }, { "epoch": 1.5498768472906403, "grad_norm": 0.06718642264604568, "learning_rate": 0.01, "loss": 2.0731, "step": 15102 }, { "epoch": 1.5501847290640394, "grad_norm": 0.07351098954677582, "learning_rate": 0.01, "loss": 2.0589, "step": 15105 }, { "epoch": 1.5504926108374384, "grad_norm": 0.03318174555897713, "learning_rate": 0.01, "loss": 2.0545, "step": 15108 }, { "epoch": 1.5508004926108374, "grad_norm": 0.11198091506958008, "learning_rate": 0.01, "loss": 2.0306, "step": 15111 }, { "epoch": 1.5511083743842364, "grad_norm": 0.056512147188186646, "learning_rate": 0.01, "loss": 2.0724, "step": 15114 }, { "epoch": 1.5514162561576355, "grad_norm": 0.08460327982902527, "learning_rate": 0.01, "loss": 2.0537, "step": 15117 }, { "epoch": 1.5517241379310345, "grad_norm": 0.08536583930253983, "learning_rate": 0.01, "loss": 2.0696, "step": 15120 }, { "epoch": 1.5520320197044335, "grad_norm": 0.10857357084751129, "learning_rate": 0.01, "loss": 2.0645, "step": 15123 }, { "epoch": 1.5523399014778325, "grad_norm": 0.04923904314637184, "learning_rate": 0.01, "loss": 2.0339, "step": 15126 }, { "epoch": 1.5526477832512315, "grad_norm": 0.05313669145107269, "learning_rate": 0.01, "loss": 2.044, "step": 15129 }, { "epoch": 1.5529556650246306, "grad_norm": 0.058348409831523895, "learning_rate": 0.01, "loss": 2.0264, "step": 15132 }, { "epoch": 1.5532635467980296, "grad_norm": 0.04621830955147743, "learning_rate": 0.01, "loss": 2.0543, "step": 15135 }, { "epoch": 1.5535714285714286, "grad_norm": 0.079473577439785, "learning_rate": 0.01, "loss": 2.0494, "step": 15138 }, { "epoch": 1.5538793103448276, "grad_norm": 0.10176654160022736, "learning_rate": 0.01, "loss": 2.0569, "step": 15141 }, { "epoch": 1.5541871921182266, "grad_norm": 0.048997897654771805, "learning_rate": 0.01, "loss": 2.0832, "step": 15144 }, { "epoch": 1.5544950738916257, "grad_norm": 0.06281887739896774, "learning_rate": 0.01, "loss": 2.0446, "step": 15147 }, { "epoch": 1.5548029556650245, "grad_norm": 0.07801464200019836, "learning_rate": 0.01, "loss": 2.0608, "step": 15150 }, { "epoch": 1.5551108374384235, "grad_norm": 0.08535271883010864, "learning_rate": 0.01, "loss": 2.0597, "step": 15153 }, { "epoch": 1.5554187192118225, "grad_norm": 0.06536438316106796, "learning_rate": 0.01, "loss": 2.0491, "step": 15156 }, { "epoch": 1.5557266009852215, "grad_norm": 0.045388113707304, "learning_rate": 0.01, "loss": 2.0243, "step": 15159 }, { "epoch": 1.5560344827586206, "grad_norm": 0.041893068701028824, "learning_rate": 0.01, "loss": 2.0373, "step": 15162 }, { "epoch": 1.5563423645320196, "grad_norm": 0.04261694848537445, "learning_rate": 0.01, "loss": 2.028, "step": 15165 }, { "epoch": 1.5566502463054186, "grad_norm": 0.03123985230922699, "learning_rate": 0.01, "loss": 2.0653, "step": 15168 }, { "epoch": 1.5569581280788176, "grad_norm": 0.048562515527009964, "learning_rate": 0.01, "loss": 2.0249, "step": 15171 }, { "epoch": 1.5572660098522166, "grad_norm": 0.1343316286802292, "learning_rate": 0.01, "loss": 2.062, "step": 15174 }, { "epoch": 1.5575738916256157, "grad_norm": 0.10992839932441711, "learning_rate": 0.01, "loss": 2.0374, "step": 15177 }, { "epoch": 1.5578817733990147, "grad_norm": 0.09098651260137558, "learning_rate": 0.01, "loss": 2.0394, "step": 15180 }, { "epoch": 1.5581896551724137, "grad_norm": 0.05405926704406738, "learning_rate": 0.01, "loss": 2.0491, "step": 15183 }, { "epoch": 1.5584975369458127, "grad_norm": 0.04776093736290932, "learning_rate": 0.01, "loss": 2.0775, "step": 15186 }, { "epoch": 1.5588054187192117, "grad_norm": 0.04614724963903427, "learning_rate": 0.01, "loss": 2.0339, "step": 15189 }, { "epoch": 1.5591133004926108, "grad_norm": 0.05032865330576897, "learning_rate": 0.01, "loss": 2.0516, "step": 15192 }, { "epoch": 1.5594211822660098, "grad_norm": 0.051392171531915665, "learning_rate": 0.01, "loss": 2.0405, "step": 15195 }, { "epoch": 1.5597290640394088, "grad_norm": 0.10255944728851318, "learning_rate": 0.01, "loss": 2.0615, "step": 15198 }, { "epoch": 1.5600369458128078, "grad_norm": 0.04222560301423073, "learning_rate": 0.01, "loss": 2.0527, "step": 15201 }, { "epoch": 1.5603448275862069, "grad_norm": 0.045385826379060745, "learning_rate": 0.01, "loss": 2.0556, "step": 15204 }, { "epoch": 1.5606527093596059, "grad_norm": 0.04241577908396721, "learning_rate": 0.01, "loss": 2.0727, "step": 15207 }, { "epoch": 1.560960591133005, "grad_norm": 0.12387125194072723, "learning_rate": 0.01, "loss": 2.0337, "step": 15210 }, { "epoch": 1.561268472906404, "grad_norm": 0.14704599976539612, "learning_rate": 0.01, "loss": 2.0602, "step": 15213 }, { "epoch": 1.561576354679803, "grad_norm": 0.049915559589862823, "learning_rate": 0.01, "loss": 2.0203, "step": 15216 }, { "epoch": 1.561884236453202, "grad_norm": 0.057630617171525955, "learning_rate": 0.01, "loss": 2.0503, "step": 15219 }, { "epoch": 1.562192118226601, "grad_norm": 0.04765351861715317, "learning_rate": 0.01, "loss": 2.0763, "step": 15222 }, { "epoch": 1.5625, "grad_norm": 0.06213679164648056, "learning_rate": 0.01, "loss": 2.0831, "step": 15225 }, { "epoch": 1.562807881773399, "grad_norm": 0.07990710437297821, "learning_rate": 0.01, "loss": 2.0458, "step": 15228 }, { "epoch": 1.563115763546798, "grad_norm": 0.0683673620223999, "learning_rate": 0.01, "loss": 2.0294, "step": 15231 }, { "epoch": 1.563423645320197, "grad_norm": 0.12503905594348907, "learning_rate": 0.01, "loss": 2.0543, "step": 15234 }, { "epoch": 1.563731527093596, "grad_norm": 0.03973531350493431, "learning_rate": 0.01, "loss": 2.0474, "step": 15237 }, { "epoch": 1.564039408866995, "grad_norm": 0.07055282592773438, "learning_rate": 0.01, "loss": 2.0607, "step": 15240 }, { "epoch": 1.5643472906403941, "grad_norm": 0.06088467687368393, "learning_rate": 0.01, "loss": 2.0643, "step": 15243 }, { "epoch": 1.5646551724137931, "grad_norm": 0.06393450498580933, "learning_rate": 0.01, "loss": 2.0369, "step": 15246 }, { "epoch": 1.5649630541871922, "grad_norm": 0.08600255101919174, "learning_rate": 0.01, "loss": 2.072, "step": 15249 }, { "epoch": 1.5652709359605912, "grad_norm": 0.07075429707765579, "learning_rate": 0.01, "loss": 2.0509, "step": 15252 }, { "epoch": 1.5655788177339902, "grad_norm": 0.058057527989149094, "learning_rate": 0.01, "loss": 2.0373, "step": 15255 }, { "epoch": 1.5658866995073892, "grad_norm": 0.04670482128858566, "learning_rate": 0.01, "loss": 2.0447, "step": 15258 }, { "epoch": 1.5661945812807883, "grad_norm": 0.08971681445837021, "learning_rate": 0.01, "loss": 2.0599, "step": 15261 }, { "epoch": 1.5665024630541873, "grad_norm": 0.12580984830856323, "learning_rate": 0.01, "loss": 2.0402, "step": 15264 }, { "epoch": 1.5668103448275863, "grad_norm": 0.05133863538503647, "learning_rate": 0.01, "loss": 2.056, "step": 15267 }, { "epoch": 1.5671182266009853, "grad_norm": 0.07821512222290039, "learning_rate": 0.01, "loss": 2.0458, "step": 15270 }, { "epoch": 1.5674261083743843, "grad_norm": 0.07024712860584259, "learning_rate": 0.01, "loss": 2.0496, "step": 15273 }, { "epoch": 1.5677339901477834, "grad_norm": 0.09332927316427231, "learning_rate": 0.01, "loss": 2.0358, "step": 15276 }, { "epoch": 1.5680418719211824, "grad_norm": 0.06875135749578476, "learning_rate": 0.01, "loss": 2.0459, "step": 15279 }, { "epoch": 1.5683497536945814, "grad_norm": 0.08868546783924103, "learning_rate": 0.01, "loss": 2.0641, "step": 15282 }, { "epoch": 1.5686576354679804, "grad_norm": 0.07729046791791916, "learning_rate": 0.01, "loss": 2.0333, "step": 15285 }, { "epoch": 1.5689655172413794, "grad_norm": 0.07686775177717209, "learning_rate": 0.01, "loss": 2.0446, "step": 15288 }, { "epoch": 1.5692733990147785, "grad_norm": 0.0839667096734047, "learning_rate": 0.01, "loss": 2.0428, "step": 15291 }, { "epoch": 1.5695812807881775, "grad_norm": 0.0704370066523552, "learning_rate": 0.01, "loss": 2.0638, "step": 15294 }, { "epoch": 1.5698891625615765, "grad_norm": 0.05312497168779373, "learning_rate": 0.01, "loss": 2.029, "step": 15297 }, { "epoch": 1.5701970443349755, "grad_norm": 0.049166906625032425, "learning_rate": 0.01, "loss": 2.0544, "step": 15300 }, { "epoch": 1.5705049261083743, "grad_norm": 0.041398897767066956, "learning_rate": 0.01, "loss": 2.0652, "step": 15303 }, { "epoch": 1.5708128078817734, "grad_norm": 0.08617027848958969, "learning_rate": 0.01, "loss": 2.0675, "step": 15306 }, { "epoch": 1.5711206896551724, "grad_norm": 0.0348927266895771, "learning_rate": 0.01, "loss": 2.043, "step": 15309 }, { "epoch": 1.5714285714285714, "grad_norm": 0.060787077993154526, "learning_rate": 0.01, "loss": 2.0439, "step": 15312 }, { "epoch": 1.5717364532019704, "grad_norm": 0.050898227840662, "learning_rate": 0.01, "loss": 2.0475, "step": 15315 }, { "epoch": 1.5720443349753694, "grad_norm": 0.04594309255480766, "learning_rate": 0.01, "loss": 2.0352, "step": 15318 }, { "epoch": 1.5723522167487685, "grad_norm": 0.1161418929696083, "learning_rate": 0.01, "loss": 2.0232, "step": 15321 }, { "epoch": 1.5726600985221675, "grad_norm": 0.05419136583805084, "learning_rate": 0.01, "loss": 2.0417, "step": 15324 }, { "epoch": 1.5729679802955665, "grad_norm": 0.07661257684230804, "learning_rate": 0.01, "loss": 2.0548, "step": 15327 }, { "epoch": 1.5732758620689655, "grad_norm": 0.09760436415672302, "learning_rate": 0.01, "loss": 2.0539, "step": 15330 }, { "epoch": 1.5735837438423645, "grad_norm": 0.07211121916770935, "learning_rate": 0.01, "loss": 2.037, "step": 15333 }, { "epoch": 1.5738916256157636, "grad_norm": 0.08360971510410309, "learning_rate": 0.01, "loss": 2.0465, "step": 15336 }, { "epoch": 1.5741995073891626, "grad_norm": 0.05901337414979935, "learning_rate": 0.01, "loss": 2.0581, "step": 15339 }, { "epoch": 1.5745073891625616, "grad_norm": 0.07543767988681793, "learning_rate": 0.01, "loss": 2.0437, "step": 15342 }, { "epoch": 1.5748152709359606, "grad_norm": 0.04725690186023712, "learning_rate": 0.01, "loss": 2.0751, "step": 15345 }, { "epoch": 1.5751231527093597, "grad_norm": 0.051067035645246506, "learning_rate": 0.01, "loss": 2.0434, "step": 15348 }, { "epoch": 1.5754310344827587, "grad_norm": 0.03145357966423035, "learning_rate": 0.01, "loss": 2.0511, "step": 15351 }, { "epoch": 1.5757389162561575, "grad_norm": 0.09291981905698776, "learning_rate": 0.01, "loss": 2.0604, "step": 15354 }, { "epoch": 1.5760467980295565, "grad_norm": 0.03409574180841446, "learning_rate": 0.01, "loss": 2.0052, "step": 15357 }, { "epoch": 1.5763546798029555, "grad_norm": 0.045993223786354065, "learning_rate": 0.01, "loss": 2.0442, "step": 15360 }, { "epoch": 1.5766625615763545, "grad_norm": 0.03174202889204025, "learning_rate": 0.01, "loss": 2.0525, "step": 15363 }, { "epoch": 1.5769704433497536, "grad_norm": 0.057013627141714096, "learning_rate": 0.01, "loss": 2.0699, "step": 15366 }, { "epoch": 1.5772783251231526, "grad_norm": 0.05778640881180763, "learning_rate": 0.01, "loss": 2.0641, "step": 15369 }, { "epoch": 1.5775862068965516, "grad_norm": 0.11259103566408157, "learning_rate": 0.01, "loss": 2.0496, "step": 15372 }, { "epoch": 1.5778940886699506, "grad_norm": 0.05728684365749359, "learning_rate": 0.01, "loss": 2.0646, "step": 15375 }, { "epoch": 1.5782019704433496, "grad_norm": 0.07037964463233948, "learning_rate": 0.01, "loss": 2.072, "step": 15378 }, { "epoch": 1.5785098522167487, "grad_norm": 0.05147834122180939, "learning_rate": 0.01, "loss": 2.0339, "step": 15381 }, { "epoch": 1.5788177339901477, "grad_norm": 0.0663742870092392, "learning_rate": 0.01, "loss": 2.0534, "step": 15384 }, { "epoch": 1.5791256157635467, "grad_norm": 0.04665178433060646, "learning_rate": 0.01, "loss": 2.0698, "step": 15387 }, { "epoch": 1.5794334975369457, "grad_norm": 0.037410903722047806, "learning_rate": 0.01, "loss": 2.0529, "step": 15390 }, { "epoch": 1.5797413793103448, "grad_norm": 0.03849703446030617, "learning_rate": 0.01, "loss": 2.0484, "step": 15393 }, { "epoch": 1.5800492610837438, "grad_norm": 0.10119396448135376, "learning_rate": 0.01, "loss": 2.0752, "step": 15396 }, { "epoch": 1.5803571428571428, "grad_norm": 0.03294401615858078, "learning_rate": 0.01, "loss": 2.0218, "step": 15399 }, { "epoch": 1.5806650246305418, "grad_norm": 0.06310781091451645, "learning_rate": 0.01, "loss": 2.0311, "step": 15402 }, { "epoch": 1.5809729064039408, "grad_norm": 0.050191253423690796, "learning_rate": 0.01, "loss": 2.0408, "step": 15405 }, { "epoch": 1.5812807881773399, "grad_norm": 0.09952792525291443, "learning_rate": 0.01, "loss": 2.0622, "step": 15408 }, { "epoch": 1.5815886699507389, "grad_norm": 0.12618017196655273, "learning_rate": 0.01, "loss": 2.0696, "step": 15411 }, { "epoch": 1.581896551724138, "grad_norm": 0.19296571612358093, "learning_rate": 0.01, "loss": 2.072, "step": 15414 }, { "epoch": 1.582204433497537, "grad_norm": 0.1124732494354248, "learning_rate": 0.01, "loss": 2.0451, "step": 15417 }, { "epoch": 1.582512315270936, "grad_norm": 0.06556060910224915, "learning_rate": 0.01, "loss": 2.0325, "step": 15420 }, { "epoch": 1.582820197044335, "grad_norm": 0.05607735365629196, "learning_rate": 0.01, "loss": 2.0663, "step": 15423 }, { "epoch": 1.583128078817734, "grad_norm": 0.04731295630335808, "learning_rate": 0.01, "loss": 2.0559, "step": 15426 }, { "epoch": 1.583435960591133, "grad_norm": 0.060452669858932495, "learning_rate": 0.01, "loss": 2.0498, "step": 15429 }, { "epoch": 1.583743842364532, "grad_norm": 0.056996360421180725, "learning_rate": 0.01, "loss": 2.0752, "step": 15432 }, { "epoch": 1.584051724137931, "grad_norm": 0.11382313817739487, "learning_rate": 0.01, "loss": 2.0723, "step": 15435 }, { "epoch": 1.58435960591133, "grad_norm": 0.05176008865237236, "learning_rate": 0.01, "loss": 2.0706, "step": 15438 }, { "epoch": 1.584667487684729, "grad_norm": 0.05329972878098488, "learning_rate": 0.01, "loss": 2.0473, "step": 15441 }, { "epoch": 1.5849753694581281, "grad_norm": 0.05416284501552582, "learning_rate": 0.01, "loss": 2.0585, "step": 15444 }, { "epoch": 1.5852832512315271, "grad_norm": 0.052267350256443024, "learning_rate": 0.01, "loss": 2.0314, "step": 15447 }, { "epoch": 1.5855911330049262, "grad_norm": 0.1025579646229744, "learning_rate": 0.01, "loss": 2.0412, "step": 15450 }, { "epoch": 1.5858990147783252, "grad_norm": 0.06416940689086914, "learning_rate": 0.01, "loss": 2.0312, "step": 15453 }, { "epoch": 1.5862068965517242, "grad_norm": 0.05699596926569939, "learning_rate": 0.01, "loss": 2.0678, "step": 15456 }, { "epoch": 1.5865147783251232, "grad_norm": 0.036711812019348145, "learning_rate": 0.01, "loss": 2.0251, "step": 15459 }, { "epoch": 1.5868226600985222, "grad_norm": 0.1025582030415535, "learning_rate": 0.01, "loss": 2.0387, "step": 15462 }, { "epoch": 1.5871305418719213, "grad_norm": 0.03923096880316734, "learning_rate": 0.01, "loss": 2.067, "step": 15465 }, { "epoch": 1.5874384236453203, "grad_norm": 0.08267144113779068, "learning_rate": 0.01, "loss": 2.0583, "step": 15468 }, { "epoch": 1.5877463054187193, "grad_norm": 0.15374930202960968, "learning_rate": 0.01, "loss": 2.0384, "step": 15471 }, { "epoch": 1.5880541871921183, "grad_norm": 0.10246127098798752, "learning_rate": 0.01, "loss": 2.0696, "step": 15474 }, { "epoch": 1.5883620689655173, "grad_norm": 0.09784136712551117, "learning_rate": 0.01, "loss": 2.0546, "step": 15477 }, { "epoch": 1.5886699507389164, "grad_norm": 0.08747786283493042, "learning_rate": 0.01, "loss": 2.0754, "step": 15480 }, { "epoch": 1.5889778325123154, "grad_norm": 0.0755406990647316, "learning_rate": 0.01, "loss": 2.0477, "step": 15483 }, { "epoch": 1.5892857142857144, "grad_norm": 0.05593521520495415, "learning_rate": 0.01, "loss": 2.0485, "step": 15486 }, { "epoch": 1.5895935960591134, "grad_norm": 0.04462866857647896, "learning_rate": 0.01, "loss": 2.056, "step": 15489 }, { "epoch": 1.5899014778325125, "grad_norm": 0.040571678429841995, "learning_rate": 0.01, "loss": 2.0906, "step": 15492 }, { "epoch": 1.5902093596059115, "grad_norm": 0.038100458681583405, "learning_rate": 0.01, "loss": 2.0735, "step": 15495 }, { "epoch": 1.5905172413793105, "grad_norm": 0.04336906597018242, "learning_rate": 0.01, "loss": 2.0716, "step": 15498 }, { "epoch": 1.5908251231527095, "grad_norm": 0.1424836665391922, "learning_rate": 0.01, "loss": 2.0424, "step": 15501 }, { "epoch": 1.5911330049261085, "grad_norm": 0.09235331416130066, "learning_rate": 0.01, "loss": 2.0461, "step": 15504 }, { "epoch": 1.5914408866995073, "grad_norm": 0.07932816445827484, "learning_rate": 0.01, "loss": 2.0595, "step": 15507 }, { "epoch": 1.5917487684729064, "grad_norm": 0.057297565042972565, "learning_rate": 0.01, "loss": 2.0407, "step": 15510 }, { "epoch": 1.5920566502463054, "grad_norm": 0.05323542281985283, "learning_rate": 0.01, "loss": 2.0568, "step": 15513 }, { "epoch": 1.5923645320197044, "grad_norm": 0.039465416222810745, "learning_rate": 0.01, "loss": 2.0455, "step": 15516 }, { "epoch": 1.5926724137931034, "grad_norm": 0.044614970684051514, "learning_rate": 0.01, "loss": 2.0784, "step": 15519 }, { "epoch": 1.5929802955665024, "grad_norm": 0.044074248522520065, "learning_rate": 0.01, "loss": 2.0391, "step": 15522 }, { "epoch": 1.5932881773399015, "grad_norm": 0.04098647087812424, "learning_rate": 0.01, "loss": 2.0512, "step": 15525 }, { "epoch": 1.5935960591133005, "grad_norm": 0.18400810658931732, "learning_rate": 0.01, "loss": 2.0623, "step": 15528 }, { "epoch": 1.5939039408866995, "grad_norm": 0.10264160484075546, "learning_rate": 0.01, "loss": 2.0546, "step": 15531 }, { "epoch": 1.5942118226600985, "grad_norm": 0.10086511820554733, "learning_rate": 0.01, "loss": 2.0671, "step": 15534 }, { "epoch": 1.5945197044334976, "grad_norm": 0.03823179379105568, "learning_rate": 0.01, "loss": 2.0496, "step": 15537 }, { "epoch": 1.5948275862068966, "grad_norm": 0.0449577271938324, "learning_rate": 0.01, "loss": 2.0635, "step": 15540 }, { "epoch": 1.5951354679802956, "grad_norm": 0.04791559278964996, "learning_rate": 0.01, "loss": 2.0596, "step": 15543 }, { "epoch": 1.5954433497536946, "grad_norm": 0.04523475095629692, "learning_rate": 0.01, "loss": 2.0457, "step": 15546 }, { "epoch": 1.5957512315270936, "grad_norm": 0.10654012113809586, "learning_rate": 0.01, "loss": 2.0172, "step": 15549 }, { "epoch": 1.5960591133004927, "grad_norm": 0.06602972745895386, "learning_rate": 0.01, "loss": 2.0565, "step": 15552 }, { "epoch": 1.5963669950738915, "grad_norm": 0.10605626553297043, "learning_rate": 0.01, "loss": 2.0792, "step": 15555 }, { "epoch": 1.5966748768472905, "grad_norm": 0.05995124578475952, "learning_rate": 0.01, "loss": 2.0367, "step": 15558 }, { "epoch": 1.5969827586206895, "grad_norm": 0.05426995828747749, "learning_rate": 0.01, "loss": 2.0458, "step": 15561 }, { "epoch": 1.5972906403940885, "grad_norm": 0.08749561756849289, "learning_rate": 0.01, "loss": 2.0509, "step": 15564 }, { "epoch": 1.5975985221674875, "grad_norm": 0.0735105574131012, "learning_rate": 0.01, "loss": 2.0548, "step": 15567 }, { "epoch": 1.5979064039408866, "grad_norm": 0.05417585372924805, "learning_rate": 0.01, "loss": 2.0304, "step": 15570 }, { "epoch": 1.5982142857142856, "grad_norm": 0.04170646145939827, "learning_rate": 0.01, "loss": 2.0751, "step": 15573 }, { "epoch": 1.5985221674876846, "grad_norm": 0.05886775627732277, "learning_rate": 0.01, "loss": 2.0419, "step": 15576 }, { "epoch": 1.5988300492610836, "grad_norm": 0.04231201857328415, "learning_rate": 0.01, "loss": 2.0531, "step": 15579 }, { "epoch": 1.5991379310344827, "grad_norm": 0.06620585918426514, "learning_rate": 0.01, "loss": 2.043, "step": 15582 }, { "epoch": 1.5994458128078817, "grad_norm": 0.10536913573741913, "learning_rate": 0.01, "loss": 2.0327, "step": 15585 }, { "epoch": 1.5997536945812807, "grad_norm": 0.14467884600162506, "learning_rate": 0.01, "loss": 2.0491, "step": 15588 }, { "epoch": 1.6000615763546797, "grad_norm": 0.11715273559093475, "learning_rate": 0.01, "loss": 2.0362, "step": 15591 }, { "epoch": 1.6003694581280787, "grad_norm": 0.04978121817111969, "learning_rate": 0.01, "loss": 2.0444, "step": 15594 }, { "epoch": 1.6006773399014778, "grad_norm": 0.06248803436756134, "learning_rate": 0.01, "loss": 2.0527, "step": 15597 }, { "epoch": 1.6009852216748768, "grad_norm": 0.05408048257231712, "learning_rate": 0.01, "loss": 2.0519, "step": 15600 }, { "epoch": 1.6012931034482758, "grad_norm": 0.05805948004126549, "learning_rate": 0.01, "loss": 2.0539, "step": 15603 }, { "epoch": 1.6016009852216748, "grad_norm": 0.03809194639325142, "learning_rate": 0.01, "loss": 2.0515, "step": 15606 }, { "epoch": 1.6019088669950738, "grad_norm": 0.07981141656637192, "learning_rate": 0.01, "loss": 2.0238, "step": 15609 }, { "epoch": 1.6022167487684729, "grad_norm": 0.04769575223326683, "learning_rate": 0.01, "loss": 2.0703, "step": 15612 }, { "epoch": 1.6025246305418719, "grad_norm": 0.09913644194602966, "learning_rate": 0.01, "loss": 2.074, "step": 15615 }, { "epoch": 1.602832512315271, "grad_norm": 0.12298569083213806, "learning_rate": 0.01, "loss": 2.0662, "step": 15618 }, { "epoch": 1.60314039408867, "grad_norm": 0.0525309219956398, "learning_rate": 0.01, "loss": 2.0411, "step": 15621 }, { "epoch": 1.603448275862069, "grad_norm": 0.07430320978164673, "learning_rate": 0.01, "loss": 2.0465, "step": 15624 }, { "epoch": 1.603756157635468, "grad_norm": 0.036753058433532715, "learning_rate": 0.01, "loss": 2.0408, "step": 15627 }, { "epoch": 1.604064039408867, "grad_norm": 0.04560523107647896, "learning_rate": 0.01, "loss": 2.074, "step": 15630 }, { "epoch": 1.604371921182266, "grad_norm": 0.07089810073375702, "learning_rate": 0.01, "loss": 2.0599, "step": 15633 }, { "epoch": 1.604679802955665, "grad_norm": 0.10833004862070084, "learning_rate": 0.01, "loss": 2.066, "step": 15636 }, { "epoch": 1.604987684729064, "grad_norm": 0.06033416837453842, "learning_rate": 0.01, "loss": 2.0893, "step": 15639 }, { "epoch": 1.605295566502463, "grad_norm": 0.06819162517786026, "learning_rate": 0.01, "loss": 2.0432, "step": 15642 }, { "epoch": 1.605603448275862, "grad_norm": 0.08949002623558044, "learning_rate": 0.01, "loss": 2.0891, "step": 15645 }, { "epoch": 1.6059113300492611, "grad_norm": 0.04749004542827606, "learning_rate": 0.01, "loss": 2.0434, "step": 15648 }, { "epoch": 1.6062192118226601, "grad_norm": 0.06903103739023209, "learning_rate": 0.01, "loss": 2.0379, "step": 15651 }, { "epoch": 1.6065270935960592, "grad_norm": 0.10074819624423981, "learning_rate": 0.01, "loss": 2.0657, "step": 15654 }, { "epoch": 1.6068349753694582, "grad_norm": 0.0390753298997879, "learning_rate": 0.01, "loss": 2.0186, "step": 15657 }, { "epoch": 1.6071428571428572, "grad_norm": 0.04776669666171074, "learning_rate": 0.01, "loss": 2.0435, "step": 15660 }, { "epoch": 1.6074507389162562, "grad_norm": 0.1191340908408165, "learning_rate": 0.01, "loss": 2.0519, "step": 15663 }, { "epoch": 1.6077586206896552, "grad_norm": 0.08326657861471176, "learning_rate": 0.01, "loss": 2.0443, "step": 15666 }, { "epoch": 1.6080665024630543, "grad_norm": 0.044734589755535126, "learning_rate": 0.01, "loss": 2.0626, "step": 15669 }, { "epoch": 1.6083743842364533, "grad_norm": 0.047262392938137054, "learning_rate": 0.01, "loss": 2.0351, "step": 15672 }, { "epoch": 1.6086822660098523, "grad_norm": 0.0908563882112503, "learning_rate": 0.01, "loss": 2.0655, "step": 15675 }, { "epoch": 1.6089901477832513, "grad_norm": 0.06681264191865921, "learning_rate": 0.01, "loss": 2.0606, "step": 15678 }, { "epoch": 1.6092980295566504, "grad_norm": 0.09569018334150314, "learning_rate": 0.01, "loss": 2.0454, "step": 15681 }, { "epoch": 1.6096059113300494, "grad_norm": 0.04303963482379913, "learning_rate": 0.01, "loss": 2.0296, "step": 15684 }, { "epoch": 1.6099137931034484, "grad_norm": 0.09924867749214172, "learning_rate": 0.01, "loss": 2.0578, "step": 15687 }, { "epoch": 1.6102216748768474, "grad_norm": 0.041328392922878265, "learning_rate": 0.01, "loss": 2.0538, "step": 15690 }, { "epoch": 1.6105295566502464, "grad_norm": 0.056367840617895126, "learning_rate": 0.01, "loss": 2.0746, "step": 15693 }, { "epoch": 1.6108374384236455, "grad_norm": 0.06074264645576477, "learning_rate": 0.01, "loss": 2.0707, "step": 15696 }, { "epoch": 1.6111453201970445, "grad_norm": 0.06541740894317627, "learning_rate": 0.01, "loss": 2.0615, "step": 15699 }, { "epoch": 1.6114532019704435, "grad_norm": 0.06279835850000381, "learning_rate": 0.01, "loss": 2.0484, "step": 15702 }, { "epoch": 1.6117610837438425, "grad_norm": 0.03825109452009201, "learning_rate": 0.01, "loss": 2.0317, "step": 15705 }, { "epoch": 1.6120689655172413, "grad_norm": 0.03792817145586014, "learning_rate": 0.01, "loss": 2.0246, "step": 15708 }, { "epoch": 1.6123768472906403, "grad_norm": 0.05229473114013672, "learning_rate": 0.01, "loss": 2.0765, "step": 15711 }, { "epoch": 1.6126847290640394, "grad_norm": 0.11285384744405746, "learning_rate": 0.01, "loss": 2.0392, "step": 15714 }, { "epoch": 1.6129926108374384, "grad_norm": 0.07333546876907349, "learning_rate": 0.01, "loss": 2.05, "step": 15717 }, { "epoch": 1.6133004926108374, "grad_norm": 0.07698936760425568, "learning_rate": 0.01, "loss": 2.0757, "step": 15720 }, { "epoch": 1.6136083743842364, "grad_norm": 0.06517963856458664, "learning_rate": 0.01, "loss": 2.032, "step": 15723 }, { "epoch": 1.6139162561576355, "grad_norm": 0.07242800295352936, "learning_rate": 0.01, "loss": 2.0398, "step": 15726 }, { "epoch": 1.6142241379310345, "grad_norm": 0.03956649452447891, "learning_rate": 0.01, "loss": 2.0437, "step": 15729 }, { "epoch": 1.6145320197044335, "grad_norm": 0.10249898582696915, "learning_rate": 0.01, "loss": 2.0716, "step": 15732 }, { "epoch": 1.6148399014778325, "grad_norm": 0.09716839343309402, "learning_rate": 0.01, "loss": 2.0288, "step": 15735 }, { "epoch": 1.6151477832512315, "grad_norm": 0.0809134840965271, "learning_rate": 0.01, "loss": 2.0553, "step": 15738 }, { "epoch": 1.6154556650246306, "grad_norm": 0.07891330122947693, "learning_rate": 0.01, "loss": 2.0622, "step": 15741 }, { "epoch": 1.6157635467980296, "grad_norm": 0.06289231032133102, "learning_rate": 0.01, "loss": 2.0382, "step": 15744 }, { "epoch": 1.6160714285714286, "grad_norm": 0.032251689583063126, "learning_rate": 0.01, "loss": 2.049, "step": 15747 }, { "epoch": 1.6163793103448276, "grad_norm": 0.032203931361436844, "learning_rate": 0.01, "loss": 2.0474, "step": 15750 }, { "epoch": 1.6166871921182266, "grad_norm": 0.042572617530822754, "learning_rate": 0.01, "loss": 2.0604, "step": 15753 }, { "epoch": 1.6169950738916257, "grad_norm": 0.06869769096374512, "learning_rate": 0.01, "loss": 2.0643, "step": 15756 }, { "epoch": 1.6173029556650245, "grad_norm": 0.09649953991174698, "learning_rate": 0.01, "loss": 2.0811, "step": 15759 }, { "epoch": 1.6176108374384235, "grad_norm": 0.060255225747823715, "learning_rate": 0.01, "loss": 2.0543, "step": 15762 }, { "epoch": 1.6179187192118225, "grad_norm": 0.0548517182469368, "learning_rate": 0.01, "loss": 2.0427, "step": 15765 }, { "epoch": 1.6182266009852215, "grad_norm": 0.09392546862363815, "learning_rate": 0.01, "loss": 2.0722, "step": 15768 }, { "epoch": 1.6185344827586206, "grad_norm": 0.052100926637649536, "learning_rate": 0.01, "loss": 2.0469, "step": 15771 }, { "epoch": 1.6188423645320196, "grad_norm": 0.05099212005734444, "learning_rate": 0.01, "loss": 2.0589, "step": 15774 }, { "epoch": 1.6191502463054186, "grad_norm": 0.0486266165971756, "learning_rate": 0.01, "loss": 2.0308, "step": 15777 }, { "epoch": 1.6194581280788176, "grad_norm": 0.044072605669498444, "learning_rate": 0.01, "loss": 2.0877, "step": 15780 }, { "epoch": 1.6197660098522166, "grad_norm": 0.09196856617927551, "learning_rate": 0.01, "loss": 2.0224, "step": 15783 }, { "epoch": 1.6200738916256157, "grad_norm": 0.05948984995484352, "learning_rate": 0.01, "loss": 2.0425, "step": 15786 }, { "epoch": 1.6203817733990147, "grad_norm": 0.043075162917375565, "learning_rate": 0.01, "loss": 2.058, "step": 15789 }, { "epoch": 1.6206896551724137, "grad_norm": 0.06739038228988647, "learning_rate": 0.01, "loss": 2.0356, "step": 15792 }, { "epoch": 1.6209975369458127, "grad_norm": 0.05961238220334053, "learning_rate": 0.01, "loss": 2.0492, "step": 15795 }, { "epoch": 1.6213054187192117, "grad_norm": 0.06527238339185715, "learning_rate": 0.01, "loss": 2.0227, "step": 15798 }, { "epoch": 1.6216133004926108, "grad_norm": 0.09234929084777832, "learning_rate": 0.01, "loss": 2.039, "step": 15801 }, { "epoch": 1.6219211822660098, "grad_norm": 0.08050446212291718, "learning_rate": 0.01, "loss": 2.0769, "step": 15804 }, { "epoch": 1.6222290640394088, "grad_norm": 0.06419754028320312, "learning_rate": 0.01, "loss": 2.0613, "step": 15807 }, { "epoch": 1.6225369458128078, "grad_norm": 0.06302323937416077, "learning_rate": 0.01, "loss": 2.0535, "step": 15810 }, { "epoch": 1.6228448275862069, "grad_norm": 0.051602717489004135, "learning_rate": 0.01, "loss": 2.0462, "step": 15813 }, { "epoch": 1.6231527093596059, "grad_norm": 0.12424405664205551, "learning_rate": 0.01, "loss": 2.0562, "step": 15816 }, { "epoch": 1.623460591133005, "grad_norm": 0.10444232821464539, "learning_rate": 0.01, "loss": 2.0527, "step": 15819 }, { "epoch": 1.623768472906404, "grad_norm": 0.06170908361673355, "learning_rate": 0.01, "loss": 2.0456, "step": 15822 }, { "epoch": 1.624076354679803, "grad_norm": 0.05145244672894478, "learning_rate": 0.01, "loss": 2.0368, "step": 15825 }, { "epoch": 1.624384236453202, "grad_norm": 0.0459282286465168, "learning_rate": 0.01, "loss": 2.0547, "step": 15828 }, { "epoch": 1.624692118226601, "grad_norm": 0.05250949412584305, "learning_rate": 0.01, "loss": 2.0475, "step": 15831 }, { "epoch": 1.625, "grad_norm": 0.03222022205591202, "learning_rate": 0.01, "loss": 2.0347, "step": 15834 }, { "epoch": 1.625307881773399, "grad_norm": 0.05849120765924454, "learning_rate": 0.01, "loss": 2.0351, "step": 15837 }, { "epoch": 1.625615763546798, "grad_norm": 0.04638088122010231, "learning_rate": 0.01, "loss": 2.0222, "step": 15840 }, { "epoch": 1.625923645320197, "grad_norm": 0.046597037464380264, "learning_rate": 0.01, "loss": 2.0577, "step": 15843 }, { "epoch": 1.626231527093596, "grad_norm": 0.10477445274591446, "learning_rate": 0.01, "loss": 2.0528, "step": 15846 }, { "epoch": 1.626539408866995, "grad_norm": 0.03439783677458763, "learning_rate": 0.01, "loss": 2.0631, "step": 15849 }, { "epoch": 1.6268472906403941, "grad_norm": 0.05810544639825821, "learning_rate": 0.01, "loss": 2.0531, "step": 15852 }, { "epoch": 1.6271551724137931, "grad_norm": 0.04557522386312485, "learning_rate": 0.01, "loss": 2.0582, "step": 15855 }, { "epoch": 1.6274630541871922, "grad_norm": 0.04236530885100365, "learning_rate": 0.01, "loss": 2.0531, "step": 15858 }, { "epoch": 1.6277709359605912, "grad_norm": 0.04338948428630829, "learning_rate": 0.01, "loss": 2.0584, "step": 15861 }, { "epoch": 1.6280788177339902, "grad_norm": 0.039782583713531494, "learning_rate": 0.01, "loss": 2.0699, "step": 15864 }, { "epoch": 1.6283866995073892, "grad_norm": 0.06858891248703003, "learning_rate": 0.01, "loss": 2.0477, "step": 15867 }, { "epoch": 1.6286945812807883, "grad_norm": 0.0510399155318737, "learning_rate": 0.01, "loss": 2.0692, "step": 15870 }, { "epoch": 1.6290024630541873, "grad_norm": 0.12568604946136475, "learning_rate": 0.01, "loss": 2.0597, "step": 15873 }, { "epoch": 1.6293103448275863, "grad_norm": 0.09245727956295013, "learning_rate": 0.01, "loss": 2.0365, "step": 15876 }, { "epoch": 1.6296182266009853, "grad_norm": 0.05763734132051468, "learning_rate": 0.01, "loss": 2.0787, "step": 15879 }, { "epoch": 1.6299261083743843, "grad_norm": 0.06099852919578552, "learning_rate": 0.01, "loss": 2.0603, "step": 15882 }, { "epoch": 1.6302339901477834, "grad_norm": 0.05738021805882454, "learning_rate": 0.01, "loss": 2.0405, "step": 15885 }, { "epoch": 1.6305418719211824, "grad_norm": 0.04953853040933609, "learning_rate": 0.01, "loss": 2.0622, "step": 15888 }, { "epoch": 1.6308497536945814, "grad_norm": 0.08572196215391159, "learning_rate": 0.01, "loss": 2.0618, "step": 15891 }, { "epoch": 1.6311576354679804, "grad_norm": 0.09245479106903076, "learning_rate": 0.01, "loss": 2.0453, "step": 15894 }, { "epoch": 1.6314655172413794, "grad_norm": 0.057964712381362915, "learning_rate": 0.01, "loss": 2.0186, "step": 15897 }, { "epoch": 1.6317733990147785, "grad_norm": 0.05189305916428566, "learning_rate": 0.01, "loss": 2.0615, "step": 15900 }, { "epoch": 1.6320812807881775, "grad_norm": 0.07327884435653687, "learning_rate": 0.01, "loss": 2.0175, "step": 15903 }, { "epoch": 1.6323891625615765, "grad_norm": 0.07089177519083023, "learning_rate": 0.01, "loss": 2.0475, "step": 15906 }, { "epoch": 1.6326970443349755, "grad_norm": 0.09783073514699936, "learning_rate": 0.01, "loss": 2.051, "step": 15909 }, { "epoch": 1.6330049261083743, "grad_norm": 0.06617991626262665, "learning_rate": 0.01, "loss": 2.0408, "step": 15912 }, { "epoch": 1.6333128078817734, "grad_norm": 0.10033921152353287, "learning_rate": 0.01, "loss": 2.0308, "step": 15915 }, { "epoch": 1.6336206896551724, "grad_norm": 0.054432835429906845, "learning_rate": 0.01, "loss": 2.0404, "step": 15918 }, { "epoch": 1.6339285714285714, "grad_norm": 0.056940387934446335, "learning_rate": 0.01, "loss": 2.0566, "step": 15921 }, { "epoch": 1.6342364532019704, "grad_norm": 0.12047278136014938, "learning_rate": 0.01, "loss": 2.0464, "step": 15924 }, { "epoch": 1.6345443349753694, "grad_norm": 0.04637087881565094, "learning_rate": 0.01, "loss": 2.0514, "step": 15927 }, { "epoch": 1.6348522167487685, "grad_norm": 0.03925006836652756, "learning_rate": 0.01, "loss": 2.0536, "step": 15930 }, { "epoch": 1.6351600985221675, "grad_norm": 0.04180562496185303, "learning_rate": 0.01, "loss": 2.0206, "step": 15933 }, { "epoch": 1.6354679802955665, "grad_norm": 0.08031155914068222, "learning_rate": 0.01, "loss": 2.0509, "step": 15936 }, { "epoch": 1.6357758620689655, "grad_norm": 0.0812869518995285, "learning_rate": 0.01, "loss": 2.021, "step": 15939 }, { "epoch": 1.6360837438423645, "grad_norm": 0.07887094467878342, "learning_rate": 0.01, "loss": 2.0554, "step": 15942 }, { "epoch": 1.6363916256157636, "grad_norm": 0.12604457139968872, "learning_rate": 0.01, "loss": 2.0236, "step": 15945 }, { "epoch": 1.6366995073891626, "grad_norm": 0.1262006163597107, "learning_rate": 0.01, "loss": 2.0806, "step": 15948 }, { "epoch": 1.6370073891625616, "grad_norm": 0.07335629314184189, "learning_rate": 0.01, "loss": 2.0339, "step": 15951 }, { "epoch": 1.6373152709359606, "grad_norm": 0.043172985315322876, "learning_rate": 0.01, "loss": 2.0455, "step": 15954 }, { "epoch": 1.6376231527093597, "grad_norm": 0.07475942373275757, "learning_rate": 0.01, "loss": 2.0842, "step": 15957 }, { "epoch": 1.6379310344827587, "grad_norm": 0.06113087013363838, "learning_rate": 0.01, "loss": 2.0526, "step": 15960 }, { "epoch": 1.6382389162561575, "grad_norm": 0.08709672093391418, "learning_rate": 0.01, "loss": 2.0886, "step": 15963 }, { "epoch": 1.6385467980295565, "grad_norm": 0.05810529738664627, "learning_rate": 0.01, "loss": 2.043, "step": 15966 }, { "epoch": 1.6388546798029555, "grad_norm": 0.0831620916724205, "learning_rate": 0.01, "loss": 2.0599, "step": 15969 }, { "epoch": 1.6391625615763545, "grad_norm": 0.040577951818704605, "learning_rate": 0.01, "loss": 2.0676, "step": 15972 }, { "epoch": 1.6394704433497536, "grad_norm": 0.03428385406732559, "learning_rate": 0.01, "loss": 2.0491, "step": 15975 }, { "epoch": 1.6397783251231526, "grad_norm": 0.04800771176815033, "learning_rate": 0.01, "loss": 2.0371, "step": 15978 }, { "epoch": 1.6400862068965516, "grad_norm": 0.05769934877753258, "learning_rate": 0.01, "loss": 2.0468, "step": 15981 }, { "epoch": 1.6403940886699506, "grad_norm": 0.08842117339372635, "learning_rate": 0.01, "loss": 2.037, "step": 15984 }, { "epoch": 1.6407019704433496, "grad_norm": 0.09740414470434189, "learning_rate": 0.01, "loss": 2.0423, "step": 15987 }, { "epoch": 1.6410098522167487, "grad_norm": 0.11128890514373779, "learning_rate": 0.01, "loss": 2.0423, "step": 15990 }, { "epoch": 1.6413177339901477, "grad_norm": 0.03690354898571968, "learning_rate": 0.01, "loss": 2.0412, "step": 15993 }, { "epoch": 1.6416256157635467, "grad_norm": 0.07311075925827026, "learning_rate": 0.01, "loss": 2.036, "step": 15996 }, { "epoch": 1.6419334975369457, "grad_norm": 0.045825451612472534, "learning_rate": 0.01, "loss": 2.0491, "step": 15999 }, { "epoch": 1.6422413793103448, "grad_norm": 0.09123300760984421, "learning_rate": 0.01, "loss": 2.0574, "step": 16002 }, { "epoch": 1.6425492610837438, "grad_norm": 0.0702185183763504, "learning_rate": 0.01, "loss": 2.0715, "step": 16005 }, { "epoch": 1.6428571428571428, "grad_norm": 0.0355604812502861, "learning_rate": 0.01, "loss": 2.0461, "step": 16008 }, { "epoch": 1.6431650246305418, "grad_norm": 0.03151632100343704, "learning_rate": 0.01, "loss": 2.0205, "step": 16011 }, { "epoch": 1.6434729064039408, "grad_norm": 0.04302441328763962, "learning_rate": 0.01, "loss": 2.0824, "step": 16014 }, { "epoch": 1.6437807881773399, "grad_norm": 0.06012306734919548, "learning_rate": 0.01, "loss": 2.0494, "step": 16017 }, { "epoch": 1.6440886699507389, "grad_norm": 0.04698712378740311, "learning_rate": 0.01, "loss": 2.0364, "step": 16020 }, { "epoch": 1.644396551724138, "grad_norm": 0.03930363059043884, "learning_rate": 0.01, "loss": 2.0612, "step": 16023 }, { "epoch": 1.644704433497537, "grad_norm": 0.0881473496556282, "learning_rate": 0.01, "loss": 2.0724, "step": 16026 }, { "epoch": 1.645012315270936, "grad_norm": 0.04207085818052292, "learning_rate": 0.01, "loss": 2.0524, "step": 16029 }, { "epoch": 1.645320197044335, "grad_norm": 0.04729215428233147, "learning_rate": 0.01, "loss": 2.0538, "step": 16032 }, { "epoch": 1.645628078817734, "grad_norm": 0.050990305840969086, "learning_rate": 0.01, "loss": 2.0473, "step": 16035 }, { "epoch": 1.645935960591133, "grad_norm": 0.05049813538789749, "learning_rate": 0.01, "loss": 2.0609, "step": 16038 }, { "epoch": 1.646243842364532, "grad_norm": 0.07787630707025528, "learning_rate": 0.01, "loss": 2.0505, "step": 16041 }, { "epoch": 1.646551724137931, "grad_norm": 0.06656081229448318, "learning_rate": 0.01, "loss": 2.0365, "step": 16044 }, { "epoch": 1.64685960591133, "grad_norm": 0.08293109387159348, "learning_rate": 0.01, "loss": 2.062, "step": 16047 }, { "epoch": 1.647167487684729, "grad_norm": 0.0775810182094574, "learning_rate": 0.01, "loss": 2.0466, "step": 16050 }, { "epoch": 1.6474753694581281, "grad_norm": 0.07917825132608414, "learning_rate": 0.01, "loss": 2.067, "step": 16053 }, { "epoch": 1.6477832512315271, "grad_norm": 0.07658717036247253, "learning_rate": 0.01, "loss": 2.0323, "step": 16056 }, { "epoch": 1.6480911330049262, "grad_norm": 0.07735300809144974, "learning_rate": 0.01, "loss": 2.0481, "step": 16059 }, { "epoch": 1.6483990147783252, "grad_norm": 0.07964644581079483, "learning_rate": 0.01, "loss": 2.0469, "step": 16062 }, { "epoch": 1.6487068965517242, "grad_norm": 0.0601799339056015, "learning_rate": 0.01, "loss": 2.0453, "step": 16065 }, { "epoch": 1.6490147783251232, "grad_norm": 0.1039920225739479, "learning_rate": 0.01, "loss": 2.0474, "step": 16068 }, { "epoch": 1.6493226600985222, "grad_norm": 0.055755455046892166, "learning_rate": 0.01, "loss": 2.0615, "step": 16071 }, { "epoch": 1.6496305418719213, "grad_norm": 0.0998646542429924, "learning_rate": 0.01, "loss": 2.0675, "step": 16074 }, { "epoch": 1.6499384236453203, "grad_norm": 0.04582648724317551, "learning_rate": 0.01, "loss": 2.0277, "step": 16077 }, { "epoch": 1.6502463054187193, "grad_norm": 0.08638078719377518, "learning_rate": 0.01, "loss": 2.0473, "step": 16080 }, { "epoch": 1.6505541871921183, "grad_norm": 0.053813617676496506, "learning_rate": 0.01, "loss": 2.0488, "step": 16083 }, { "epoch": 1.6508620689655173, "grad_norm": 0.08186789602041245, "learning_rate": 0.01, "loss": 2.07, "step": 16086 }, { "epoch": 1.6511699507389164, "grad_norm": 0.037794895470142365, "learning_rate": 0.01, "loss": 2.0554, "step": 16089 }, { "epoch": 1.6514778325123154, "grad_norm": 0.1052238717675209, "learning_rate": 0.01, "loss": 2.0614, "step": 16092 }, { "epoch": 1.6517857142857144, "grad_norm": 0.07596205919981003, "learning_rate": 0.01, "loss": 2.0358, "step": 16095 }, { "epoch": 1.6520935960591134, "grad_norm": 0.047295670956373215, "learning_rate": 0.01, "loss": 2.0488, "step": 16098 }, { "epoch": 1.6524014778325125, "grad_norm": 0.05572659894824028, "learning_rate": 0.01, "loss": 2.0468, "step": 16101 }, { "epoch": 1.6527093596059115, "grad_norm": 0.0429069958627224, "learning_rate": 0.01, "loss": 2.0681, "step": 16104 }, { "epoch": 1.6530172413793105, "grad_norm": 0.055060967803001404, "learning_rate": 0.01, "loss": 2.0347, "step": 16107 }, { "epoch": 1.6533251231527095, "grad_norm": 0.05243745073676109, "learning_rate": 0.01, "loss": 2.0696, "step": 16110 }, { "epoch": 1.6536330049261085, "grad_norm": 0.052228983491659164, "learning_rate": 0.01, "loss": 2.06, "step": 16113 }, { "epoch": 1.6539408866995073, "grad_norm": 0.065925233066082, "learning_rate": 0.01, "loss": 2.0707, "step": 16116 }, { "epoch": 1.6542487684729064, "grad_norm": 0.05819106101989746, "learning_rate": 0.01, "loss": 2.0137, "step": 16119 }, { "epoch": 1.6545566502463054, "grad_norm": 0.04320794716477394, "learning_rate": 0.01, "loss": 2.0691, "step": 16122 }, { "epoch": 1.6548645320197044, "grad_norm": 0.04202846437692642, "learning_rate": 0.01, "loss": 2.0456, "step": 16125 }, { "epoch": 1.6551724137931034, "grad_norm": 0.12747296690940857, "learning_rate": 0.01, "loss": 2.0419, "step": 16128 }, { "epoch": 1.6554802955665024, "grad_norm": 0.07199030369520187, "learning_rate": 0.01, "loss": 2.0347, "step": 16131 }, { "epoch": 1.6557881773399015, "grad_norm": 0.085335373878479, "learning_rate": 0.01, "loss": 2.0383, "step": 16134 }, { "epoch": 1.6560960591133005, "grad_norm": 0.061818841844797134, "learning_rate": 0.01, "loss": 2.0631, "step": 16137 }, { "epoch": 1.6564039408866995, "grad_norm": 0.06255804747343063, "learning_rate": 0.01, "loss": 2.0492, "step": 16140 }, { "epoch": 1.6567118226600985, "grad_norm": 0.08308485150337219, "learning_rate": 0.01, "loss": 2.0814, "step": 16143 }, { "epoch": 1.6570197044334976, "grad_norm": 0.06358073651790619, "learning_rate": 0.01, "loss": 2.048, "step": 16146 }, { "epoch": 1.6573275862068966, "grad_norm": 0.085427425801754, "learning_rate": 0.01, "loss": 2.0433, "step": 16149 }, { "epoch": 1.6576354679802956, "grad_norm": 0.043243568390607834, "learning_rate": 0.01, "loss": 2.0432, "step": 16152 }, { "epoch": 1.6579433497536946, "grad_norm": 0.06593325734138489, "learning_rate": 0.01, "loss": 2.0469, "step": 16155 }, { "epoch": 1.6582512315270936, "grad_norm": 0.14644569158554077, "learning_rate": 0.01, "loss": 2.0689, "step": 16158 }, { "epoch": 1.6585591133004927, "grad_norm": 0.1211152896285057, "learning_rate": 0.01, "loss": 2.0505, "step": 16161 }, { "epoch": 1.6588669950738915, "grad_norm": 0.11020830273628235, "learning_rate": 0.01, "loss": 2.0572, "step": 16164 }, { "epoch": 1.6591748768472905, "grad_norm": 0.08850467950105667, "learning_rate": 0.01, "loss": 2.0424, "step": 16167 }, { "epoch": 1.6594827586206895, "grad_norm": 0.050562698394060135, "learning_rate": 0.01, "loss": 2.0542, "step": 16170 }, { "epoch": 1.6597906403940885, "grad_norm": 0.048076871782541275, "learning_rate": 0.01, "loss": 2.0433, "step": 16173 }, { "epoch": 1.6600985221674875, "grad_norm": 0.03727034851908684, "learning_rate": 0.01, "loss": 2.0333, "step": 16176 }, { "epoch": 1.6604064039408866, "grad_norm": 0.048614371567964554, "learning_rate": 0.01, "loss": 2.0552, "step": 16179 }, { "epoch": 1.6607142857142856, "grad_norm": 0.05649641901254654, "learning_rate": 0.01, "loss": 2.0536, "step": 16182 }, { "epoch": 1.6610221674876846, "grad_norm": 0.05329003930091858, "learning_rate": 0.01, "loss": 2.0386, "step": 16185 }, { "epoch": 1.6613300492610836, "grad_norm": 0.06444583833217621, "learning_rate": 0.01, "loss": 2.055, "step": 16188 }, { "epoch": 1.6616379310344827, "grad_norm": 0.045777902007102966, "learning_rate": 0.01, "loss": 2.0476, "step": 16191 }, { "epoch": 1.6619458128078817, "grad_norm": 0.04831868037581444, "learning_rate": 0.01, "loss": 2.0582, "step": 16194 }, { "epoch": 1.6622536945812807, "grad_norm": 0.10648196935653687, "learning_rate": 0.01, "loss": 2.0579, "step": 16197 }, { "epoch": 1.6625615763546797, "grad_norm": 0.08369257301092148, "learning_rate": 0.01, "loss": 2.0505, "step": 16200 }, { "epoch": 1.6628694581280787, "grad_norm": 0.13716475665569305, "learning_rate": 0.01, "loss": 2.0383, "step": 16203 }, { "epoch": 1.6631773399014778, "grad_norm": 0.05025027319788933, "learning_rate": 0.01, "loss": 2.0549, "step": 16206 }, { "epoch": 1.6634852216748768, "grad_norm": 0.03850054368376732, "learning_rate": 0.01, "loss": 2.0412, "step": 16209 }, { "epoch": 1.6637931034482758, "grad_norm": 0.046656832098960876, "learning_rate": 0.01, "loss": 2.0595, "step": 16212 }, { "epoch": 1.6641009852216748, "grad_norm": 0.03826647624373436, "learning_rate": 0.01, "loss": 2.0352, "step": 16215 }, { "epoch": 1.6644088669950738, "grad_norm": 0.061087023466825485, "learning_rate": 0.01, "loss": 2.0357, "step": 16218 }, { "epoch": 1.6647167487684729, "grad_norm": 0.03787006065249443, "learning_rate": 0.01, "loss": 2.0226, "step": 16221 }, { "epoch": 1.6650246305418719, "grad_norm": 0.09619265049695969, "learning_rate": 0.01, "loss": 2.0399, "step": 16224 }, { "epoch": 1.665332512315271, "grad_norm": 0.04012330621480942, "learning_rate": 0.01, "loss": 2.044, "step": 16227 }, { "epoch": 1.66564039408867, "grad_norm": 0.062126293778419495, "learning_rate": 0.01, "loss": 2.0726, "step": 16230 }, { "epoch": 1.665948275862069, "grad_norm": 0.050277624279260635, "learning_rate": 0.01, "loss": 2.0219, "step": 16233 }, { "epoch": 1.666256157635468, "grad_norm": 0.03983129933476448, "learning_rate": 0.01, "loss": 2.0554, "step": 16236 }, { "epoch": 1.666564039408867, "grad_norm": 0.13119915127754211, "learning_rate": 0.01, "loss": 2.0682, "step": 16239 }, { "epoch": 1.666871921182266, "grad_norm": 0.0525536946952343, "learning_rate": 0.01, "loss": 2.0524, "step": 16242 }, { "epoch": 1.667179802955665, "grad_norm": 0.056762780994176865, "learning_rate": 0.01, "loss": 2.0293, "step": 16245 }, { "epoch": 1.667487684729064, "grad_norm": 0.08652041852474213, "learning_rate": 0.01, "loss": 2.0574, "step": 16248 }, { "epoch": 1.667795566502463, "grad_norm": 0.14455944299697876, "learning_rate": 0.01, "loss": 2.0406, "step": 16251 }, { "epoch": 1.668103448275862, "grad_norm": 0.03951118513941765, "learning_rate": 0.01, "loss": 2.0368, "step": 16254 }, { "epoch": 1.6684113300492611, "grad_norm": 0.040585123002529144, "learning_rate": 0.01, "loss": 2.017, "step": 16257 }, { "epoch": 1.6687192118226601, "grad_norm": 0.05393810570240021, "learning_rate": 0.01, "loss": 2.0679, "step": 16260 }, { "epoch": 1.6690270935960592, "grad_norm": 0.050093088299036026, "learning_rate": 0.01, "loss": 2.0546, "step": 16263 }, { "epoch": 1.6693349753694582, "grad_norm": 0.04196159914135933, "learning_rate": 0.01, "loss": 2.0488, "step": 16266 }, { "epoch": 1.6696428571428572, "grad_norm": 0.03978092968463898, "learning_rate": 0.01, "loss": 2.0453, "step": 16269 }, { "epoch": 1.6699507389162562, "grad_norm": 0.05054232105612755, "learning_rate": 0.01, "loss": 2.0337, "step": 16272 }, { "epoch": 1.6702586206896552, "grad_norm": 0.0746975764632225, "learning_rate": 0.01, "loss": 2.0636, "step": 16275 }, { "epoch": 1.6705665024630543, "grad_norm": 0.05685516446828842, "learning_rate": 0.01, "loss": 2.0591, "step": 16278 }, { "epoch": 1.6708743842364533, "grad_norm": 0.031971871852874756, "learning_rate": 0.01, "loss": 2.0657, "step": 16281 }, { "epoch": 1.6711822660098523, "grad_norm": 0.03947863727807999, "learning_rate": 0.01, "loss": 2.0333, "step": 16284 }, { "epoch": 1.6714901477832513, "grad_norm": 0.11271070688962936, "learning_rate": 0.01, "loss": 2.0421, "step": 16287 }, { "epoch": 1.6717980295566504, "grad_norm": 0.05308755114674568, "learning_rate": 0.01, "loss": 2.0536, "step": 16290 }, { "epoch": 1.6721059113300494, "grad_norm": 0.042826078832149506, "learning_rate": 0.01, "loss": 2.0694, "step": 16293 }, { "epoch": 1.6724137931034484, "grad_norm": 0.0458630695939064, "learning_rate": 0.01, "loss": 2.0312, "step": 16296 }, { "epoch": 1.6727216748768474, "grad_norm": 0.05401900038123131, "learning_rate": 0.01, "loss": 2.0613, "step": 16299 }, { "epoch": 1.6730295566502464, "grad_norm": 0.05380195751786232, "learning_rate": 0.01, "loss": 2.0336, "step": 16302 }, { "epoch": 1.6733374384236455, "grad_norm": 0.038716450333595276, "learning_rate": 0.01, "loss": 2.0548, "step": 16305 }, { "epoch": 1.6736453201970445, "grad_norm": 0.04034694656729698, "learning_rate": 0.01, "loss": 2.0421, "step": 16308 }, { "epoch": 1.6739532019704435, "grad_norm": 0.06753403693437576, "learning_rate": 0.01, "loss": 2.0324, "step": 16311 }, { "epoch": 1.6742610837438425, "grad_norm": 0.10001173615455627, "learning_rate": 0.01, "loss": 2.0467, "step": 16314 }, { "epoch": 1.6745689655172413, "grad_norm": 0.04366351664066315, "learning_rate": 0.01, "loss": 2.0622, "step": 16317 }, { "epoch": 1.6748768472906403, "grad_norm": 0.07137630879878998, "learning_rate": 0.01, "loss": 2.0333, "step": 16320 }, { "epoch": 1.6751847290640394, "grad_norm": 0.049938492476940155, "learning_rate": 0.01, "loss": 2.0426, "step": 16323 }, { "epoch": 1.6754926108374384, "grad_norm": 0.03337172046303749, "learning_rate": 0.01, "loss": 2.0462, "step": 16326 }, { "epoch": 1.6758004926108374, "grad_norm": 0.07407473772764206, "learning_rate": 0.01, "loss": 2.0705, "step": 16329 }, { "epoch": 1.6761083743842364, "grad_norm": 0.07006946206092834, "learning_rate": 0.01, "loss": 2.0383, "step": 16332 }, { "epoch": 1.6764162561576355, "grad_norm": 0.05342825874686241, "learning_rate": 0.01, "loss": 2.0481, "step": 16335 }, { "epoch": 1.6767241379310345, "grad_norm": 0.052405234426259995, "learning_rate": 0.01, "loss": 2.0138, "step": 16338 }, { "epoch": 1.6770320197044335, "grad_norm": 0.20231324434280396, "learning_rate": 0.01, "loss": 2.0472, "step": 16341 }, { "epoch": 1.6773399014778325, "grad_norm": 0.07893595844507217, "learning_rate": 0.01, "loss": 2.0415, "step": 16344 }, { "epoch": 1.6776477832512315, "grad_norm": 0.06872416287660599, "learning_rate": 0.01, "loss": 2.0376, "step": 16347 }, { "epoch": 1.6779556650246306, "grad_norm": 0.041687123477458954, "learning_rate": 0.01, "loss": 2.0442, "step": 16350 }, { "epoch": 1.6782635467980296, "grad_norm": 0.04184873029589653, "learning_rate": 0.01, "loss": 2.0769, "step": 16353 }, { "epoch": 1.6785714285714286, "grad_norm": 0.036598458886146545, "learning_rate": 0.01, "loss": 2.0255, "step": 16356 }, { "epoch": 1.6788793103448276, "grad_norm": 0.062203384935855865, "learning_rate": 0.01, "loss": 2.0582, "step": 16359 }, { "epoch": 1.6791871921182266, "grad_norm": 0.04513971135020256, "learning_rate": 0.01, "loss": 2.0475, "step": 16362 }, { "epoch": 1.6794950738916257, "grad_norm": 0.043875399976968765, "learning_rate": 0.01, "loss": 2.0455, "step": 16365 }, { "epoch": 1.6798029556650245, "grad_norm": 0.030207300558686256, "learning_rate": 0.01, "loss": 2.0733, "step": 16368 }, { "epoch": 1.6801108374384235, "grad_norm": 0.07749854028224945, "learning_rate": 0.01, "loss": 2.0402, "step": 16371 }, { "epoch": 1.6804187192118225, "grad_norm": 0.10269973427057266, "learning_rate": 0.01, "loss": 2.0342, "step": 16374 }, { "epoch": 1.6807266009852215, "grad_norm": 0.043558500707149506, "learning_rate": 0.01, "loss": 2.0429, "step": 16377 }, { "epoch": 1.6810344827586206, "grad_norm": 0.0490686409175396, "learning_rate": 0.01, "loss": 2.0381, "step": 16380 }, { "epoch": 1.6813423645320196, "grad_norm": 0.062107689678668976, "learning_rate": 0.01, "loss": 2.0592, "step": 16383 }, { "epoch": 1.6816502463054186, "grad_norm": 0.0856776013970375, "learning_rate": 0.01, "loss": 2.0666, "step": 16386 }, { "epoch": 1.6819581280788176, "grad_norm": 0.11694356054067612, "learning_rate": 0.01, "loss": 2.0545, "step": 16389 }, { "epoch": 1.6822660098522166, "grad_norm": 0.07279752194881439, "learning_rate": 0.01, "loss": 2.0348, "step": 16392 }, { "epoch": 1.6825738916256157, "grad_norm": 0.06813056766986847, "learning_rate": 0.01, "loss": 2.0549, "step": 16395 }, { "epoch": 1.6828817733990147, "grad_norm": 0.045916907489299774, "learning_rate": 0.01, "loss": 2.0714, "step": 16398 }, { "epoch": 1.6831896551724137, "grad_norm": 0.04464447498321533, "learning_rate": 0.01, "loss": 2.0655, "step": 16401 }, { "epoch": 1.6834975369458127, "grad_norm": 0.04815223440527916, "learning_rate": 0.01, "loss": 2.0633, "step": 16404 }, { "epoch": 1.6838054187192117, "grad_norm": 0.06025001034140587, "learning_rate": 0.01, "loss": 2.0325, "step": 16407 }, { "epoch": 1.6841133004926108, "grad_norm": 0.05691540613770485, "learning_rate": 0.01, "loss": 2.07, "step": 16410 }, { "epoch": 1.6844211822660098, "grad_norm": 0.04643694683909416, "learning_rate": 0.01, "loss": 2.0478, "step": 16413 }, { "epoch": 1.6847290640394088, "grad_norm": 0.03540325164794922, "learning_rate": 0.01, "loss": 2.0739, "step": 16416 }, { "epoch": 1.6850369458128078, "grad_norm": 0.034472569823265076, "learning_rate": 0.01, "loss": 2.0441, "step": 16419 }, { "epoch": 1.6853448275862069, "grad_norm": 0.04316902533173561, "learning_rate": 0.01, "loss": 2.0422, "step": 16422 }, { "epoch": 1.6856527093596059, "grad_norm": 0.04943558946251869, "learning_rate": 0.01, "loss": 2.0377, "step": 16425 }, { "epoch": 1.685960591133005, "grad_norm": 0.11482315510511398, "learning_rate": 0.01, "loss": 2.0668, "step": 16428 }, { "epoch": 1.686268472906404, "grad_norm": 0.10594377666711807, "learning_rate": 0.01, "loss": 2.0513, "step": 16431 }, { "epoch": 1.686576354679803, "grad_norm": 0.09860610961914062, "learning_rate": 0.01, "loss": 2.0456, "step": 16434 }, { "epoch": 1.686884236453202, "grad_norm": 0.06849053502082825, "learning_rate": 0.01, "loss": 2.0645, "step": 16437 }, { "epoch": 1.687192118226601, "grad_norm": 0.05089464411139488, "learning_rate": 0.01, "loss": 2.0383, "step": 16440 }, { "epoch": 1.6875, "grad_norm": 0.04762034863233566, "learning_rate": 0.01, "loss": 2.0443, "step": 16443 }, { "epoch": 1.687807881773399, "grad_norm": 0.09014497697353363, "learning_rate": 0.01, "loss": 2.0736, "step": 16446 }, { "epoch": 1.688115763546798, "grad_norm": 0.06832917779684067, "learning_rate": 0.01, "loss": 2.0677, "step": 16449 }, { "epoch": 1.688423645320197, "grad_norm": 0.0529920794069767, "learning_rate": 0.01, "loss": 2.0423, "step": 16452 }, { "epoch": 1.688731527093596, "grad_norm": 0.03208652511239052, "learning_rate": 0.01, "loss": 2.0561, "step": 16455 }, { "epoch": 1.689039408866995, "grad_norm": 0.13702784478664398, "learning_rate": 0.01, "loss": 2.0393, "step": 16458 }, { "epoch": 1.6893472906403941, "grad_norm": 0.05972970649600029, "learning_rate": 0.01, "loss": 2.0795, "step": 16461 }, { "epoch": 1.6896551724137931, "grad_norm": 0.043536797165870667, "learning_rate": 0.01, "loss": 2.0622, "step": 16464 }, { "epoch": 1.6899630541871922, "grad_norm": 0.0556536540389061, "learning_rate": 0.01, "loss": 2.0523, "step": 16467 }, { "epoch": 1.6902709359605912, "grad_norm": 0.06583042442798615, "learning_rate": 0.01, "loss": 2.0568, "step": 16470 }, { "epoch": 1.6905788177339902, "grad_norm": 0.0535028837621212, "learning_rate": 0.01, "loss": 2.0427, "step": 16473 }, { "epoch": 1.6908866995073892, "grad_norm": 0.09974632412195206, "learning_rate": 0.01, "loss": 2.0589, "step": 16476 }, { "epoch": 1.6911945812807883, "grad_norm": 0.058350350707769394, "learning_rate": 0.01, "loss": 2.0392, "step": 16479 }, { "epoch": 1.6915024630541873, "grad_norm": 0.10049036890268326, "learning_rate": 0.01, "loss": 2.0643, "step": 16482 }, { "epoch": 1.6918103448275863, "grad_norm": 0.061119675636291504, "learning_rate": 0.01, "loss": 2.0455, "step": 16485 }, { "epoch": 1.6921182266009853, "grad_norm": 0.07189033925533295, "learning_rate": 0.01, "loss": 2.0629, "step": 16488 }, { "epoch": 1.6924261083743843, "grad_norm": 0.08962611109018326, "learning_rate": 0.01, "loss": 2.0586, "step": 16491 }, { "epoch": 1.6927339901477834, "grad_norm": 0.05600450560450554, "learning_rate": 0.01, "loss": 2.0434, "step": 16494 }, { "epoch": 1.6930418719211824, "grad_norm": 0.1281098574399948, "learning_rate": 0.01, "loss": 2.0241, "step": 16497 }, { "epoch": 1.6933497536945814, "grad_norm": 0.036696117371320724, "learning_rate": 0.01, "loss": 2.065, "step": 16500 }, { "epoch": 1.6936576354679804, "grad_norm": 0.12428770959377289, "learning_rate": 0.01, "loss": 2.0479, "step": 16503 }, { "epoch": 1.6939655172413794, "grad_norm": 0.07593953609466553, "learning_rate": 0.01, "loss": 2.0357, "step": 16506 }, { "epoch": 1.6942733990147785, "grad_norm": 0.0686376765370369, "learning_rate": 0.01, "loss": 2.05, "step": 16509 }, { "epoch": 1.6945812807881775, "grad_norm": 0.044805269688367844, "learning_rate": 0.01, "loss": 2.0514, "step": 16512 }, { "epoch": 1.6948891625615765, "grad_norm": 0.04698259010910988, "learning_rate": 0.01, "loss": 2.0505, "step": 16515 }, { "epoch": 1.6951970443349755, "grad_norm": 0.04546966403722763, "learning_rate": 0.01, "loss": 2.0265, "step": 16518 }, { "epoch": 1.6955049261083743, "grad_norm": 0.07239431142807007, "learning_rate": 0.01, "loss": 2.0403, "step": 16521 }, { "epoch": 1.6958128078817734, "grad_norm": 0.08790195733308792, "learning_rate": 0.01, "loss": 2.0721, "step": 16524 }, { "epoch": 1.6961206896551724, "grad_norm": 0.05445432290434837, "learning_rate": 0.01, "loss": 2.039, "step": 16527 }, { "epoch": 1.6964285714285714, "grad_norm": 0.048141270875930786, "learning_rate": 0.01, "loss": 2.0191, "step": 16530 }, { "epoch": 1.6967364532019704, "grad_norm": 0.05230564624071121, "learning_rate": 0.01, "loss": 2.0646, "step": 16533 }, { "epoch": 1.6970443349753694, "grad_norm": 0.1007009968161583, "learning_rate": 0.01, "loss": 2.0751, "step": 16536 }, { "epoch": 1.6973522167487685, "grad_norm": 0.03878286853432655, "learning_rate": 0.01, "loss": 2.0257, "step": 16539 }, { "epoch": 1.6976600985221675, "grad_norm": 0.08503543585538864, "learning_rate": 0.01, "loss": 2.0516, "step": 16542 }, { "epoch": 1.6979679802955665, "grad_norm": 0.06239473819732666, "learning_rate": 0.01, "loss": 2.057, "step": 16545 }, { "epoch": 1.6982758620689655, "grad_norm": 0.06893055140972137, "learning_rate": 0.01, "loss": 2.0435, "step": 16548 }, { "epoch": 1.6985837438423645, "grad_norm": 0.08434829860925674, "learning_rate": 0.01, "loss": 2.0319, "step": 16551 }, { "epoch": 1.6988916256157636, "grad_norm": 0.031773362308740616, "learning_rate": 0.01, "loss": 2.0585, "step": 16554 }, { "epoch": 1.6991995073891626, "grad_norm": 0.11598584800958633, "learning_rate": 0.01, "loss": 2.0423, "step": 16557 }, { "epoch": 1.6995073891625616, "grad_norm": 0.07008111476898193, "learning_rate": 0.01, "loss": 2.0787, "step": 16560 }, { "epoch": 1.6998152709359606, "grad_norm": 0.03940622881054878, "learning_rate": 0.01, "loss": 2.0525, "step": 16563 }, { "epoch": 1.7001231527093597, "grad_norm": 0.05206933617591858, "learning_rate": 0.01, "loss": 2.0671, "step": 16566 }, { "epoch": 1.7004310344827587, "grad_norm": 0.04568307474255562, "learning_rate": 0.01, "loss": 2.0413, "step": 16569 }, { "epoch": 1.7007389162561575, "grad_norm": 0.031628433614969254, "learning_rate": 0.01, "loss": 2.0323, "step": 16572 }, { "epoch": 1.7010467980295565, "grad_norm": 0.05636722221970558, "learning_rate": 0.01, "loss": 2.0403, "step": 16575 }, { "epoch": 1.7013546798029555, "grad_norm": 0.11134552955627441, "learning_rate": 0.01, "loss": 2.034, "step": 16578 }, { "epoch": 1.7016625615763545, "grad_norm": 0.06964823603630066, "learning_rate": 0.01, "loss": 2.0701, "step": 16581 }, { "epoch": 1.7019704433497536, "grad_norm": 0.041148003190755844, "learning_rate": 0.01, "loss": 2.0693, "step": 16584 }, { "epoch": 1.7022783251231526, "grad_norm": 0.03673578426241875, "learning_rate": 0.01, "loss": 2.0232, "step": 16587 }, { "epoch": 1.7025862068965516, "grad_norm": 0.03659043833613396, "learning_rate": 0.01, "loss": 2.0257, "step": 16590 }, { "epoch": 1.7028940886699506, "grad_norm": 0.03824566677212715, "learning_rate": 0.01, "loss": 2.0587, "step": 16593 }, { "epoch": 1.7032019704433496, "grad_norm": 0.07334180176258087, "learning_rate": 0.01, "loss": 2.0626, "step": 16596 }, { "epoch": 1.7035098522167487, "grad_norm": 0.055927857756614685, "learning_rate": 0.01, "loss": 2.0311, "step": 16599 }, { "epoch": 1.7038177339901477, "grad_norm": 0.07610691338777542, "learning_rate": 0.01, "loss": 2.0419, "step": 16602 }, { "epoch": 1.7041256157635467, "grad_norm": 0.06405298411846161, "learning_rate": 0.01, "loss": 2.0693, "step": 16605 }, { "epoch": 1.7044334975369457, "grad_norm": 0.06193486601114273, "learning_rate": 0.01, "loss": 2.0442, "step": 16608 }, { "epoch": 1.7047413793103448, "grad_norm": 0.12181366235017776, "learning_rate": 0.01, "loss": 2.0324, "step": 16611 }, { "epoch": 1.7050492610837438, "grad_norm": 0.049060508608818054, "learning_rate": 0.01, "loss": 2.044, "step": 16614 }, { "epoch": 1.7053571428571428, "grad_norm": 0.05021090805530548, "learning_rate": 0.01, "loss": 2.0501, "step": 16617 }, { "epoch": 1.7056650246305418, "grad_norm": 0.045171257108449936, "learning_rate": 0.01, "loss": 2.0418, "step": 16620 }, { "epoch": 1.7059729064039408, "grad_norm": 0.04944808408617973, "learning_rate": 0.01, "loss": 2.0576, "step": 16623 }, { "epoch": 1.7062807881773399, "grad_norm": 0.03556932508945465, "learning_rate": 0.01, "loss": 2.0405, "step": 16626 }, { "epoch": 1.7065886699507389, "grad_norm": 0.10005172342061996, "learning_rate": 0.01, "loss": 2.0422, "step": 16629 }, { "epoch": 1.706896551724138, "grad_norm": 0.04088572412729263, "learning_rate": 0.01, "loss": 2.0526, "step": 16632 }, { "epoch": 1.707204433497537, "grad_norm": 0.04937949404120445, "learning_rate": 0.01, "loss": 2.0656, "step": 16635 }, { "epoch": 1.707512315270936, "grad_norm": 0.07822302728891373, "learning_rate": 0.01, "loss": 2.0545, "step": 16638 }, { "epoch": 1.707820197044335, "grad_norm": 0.05767158418893814, "learning_rate": 0.01, "loss": 2.0515, "step": 16641 }, { "epoch": 1.708128078817734, "grad_norm": 0.08512212336063385, "learning_rate": 0.01, "loss": 2.0579, "step": 16644 }, { "epoch": 1.708435960591133, "grad_norm": 0.06758993119001389, "learning_rate": 0.01, "loss": 2.0307, "step": 16647 }, { "epoch": 1.708743842364532, "grad_norm": 0.08142005652189255, "learning_rate": 0.01, "loss": 2.0531, "step": 16650 }, { "epoch": 1.709051724137931, "grad_norm": 0.06357218325138092, "learning_rate": 0.01, "loss": 2.0475, "step": 16653 }, { "epoch": 1.70935960591133, "grad_norm": 0.10591546446084976, "learning_rate": 0.01, "loss": 2.0447, "step": 16656 }, { "epoch": 1.709667487684729, "grad_norm": 0.06571496278047562, "learning_rate": 0.01, "loss": 2.0176, "step": 16659 }, { "epoch": 1.7099753694581281, "grad_norm": 0.049450814723968506, "learning_rate": 0.01, "loss": 2.0522, "step": 16662 }, { "epoch": 1.7102832512315271, "grad_norm": 0.11850273609161377, "learning_rate": 0.01, "loss": 2.0575, "step": 16665 }, { "epoch": 1.7105911330049262, "grad_norm": 0.0952281728386879, "learning_rate": 0.01, "loss": 2.0442, "step": 16668 }, { "epoch": 1.7108990147783252, "grad_norm": 0.09431217610836029, "learning_rate": 0.01, "loss": 2.0403, "step": 16671 }, { "epoch": 1.7112068965517242, "grad_norm": 0.07080823183059692, "learning_rate": 0.01, "loss": 2.0111, "step": 16674 }, { "epoch": 1.7115147783251232, "grad_norm": 0.049033813178539276, "learning_rate": 0.01, "loss": 2.0489, "step": 16677 }, { "epoch": 1.7118226600985222, "grad_norm": 0.04356718435883522, "learning_rate": 0.01, "loss": 2.035, "step": 16680 }, { "epoch": 1.7121305418719213, "grad_norm": 0.03276592493057251, "learning_rate": 0.01, "loss": 2.0396, "step": 16683 }, { "epoch": 1.7124384236453203, "grad_norm": 0.04438839852809906, "learning_rate": 0.01, "loss": 2.0441, "step": 16686 }, { "epoch": 1.7127463054187193, "grad_norm": 0.07276454567909241, "learning_rate": 0.01, "loss": 2.0513, "step": 16689 }, { "epoch": 1.7130541871921183, "grad_norm": 0.11324001848697662, "learning_rate": 0.01, "loss": 2.0482, "step": 16692 }, { "epoch": 1.7133620689655173, "grad_norm": 0.14715081453323364, "learning_rate": 0.01, "loss": 2.048, "step": 16695 }, { "epoch": 1.7136699507389164, "grad_norm": 0.07661852240562439, "learning_rate": 0.01, "loss": 2.0396, "step": 16698 }, { "epoch": 1.7139778325123154, "grad_norm": 0.05308947339653969, "learning_rate": 0.01, "loss": 2.0601, "step": 16701 }, { "epoch": 1.7142857142857144, "grad_norm": 0.06816977262496948, "learning_rate": 0.01, "loss": 2.0456, "step": 16704 }, { "epoch": 1.7145935960591134, "grad_norm": 0.05123249441385269, "learning_rate": 0.01, "loss": 2.0532, "step": 16707 }, { "epoch": 1.7149014778325125, "grad_norm": 0.05118009075522423, "learning_rate": 0.01, "loss": 2.0659, "step": 16710 }, { "epoch": 1.7152093596059115, "grad_norm": 0.03276235982775688, "learning_rate": 0.01, "loss": 2.0718, "step": 16713 }, { "epoch": 1.7155172413793105, "grad_norm": 0.049824655055999756, "learning_rate": 0.01, "loss": 2.0388, "step": 16716 }, { "epoch": 1.7158251231527095, "grad_norm": 0.1416471302509308, "learning_rate": 0.01, "loss": 2.0526, "step": 16719 }, { "epoch": 1.7161330049261085, "grad_norm": 0.04109251871705055, "learning_rate": 0.01, "loss": 2.0329, "step": 16722 }, { "epoch": 1.7164408866995073, "grad_norm": 0.08853971213102341, "learning_rate": 0.01, "loss": 2.0506, "step": 16725 }, { "epoch": 1.7167487684729064, "grad_norm": 0.05180136114358902, "learning_rate": 0.01, "loss": 2.0608, "step": 16728 }, { "epoch": 1.7170566502463054, "grad_norm": 0.0667758584022522, "learning_rate": 0.01, "loss": 2.0347, "step": 16731 }, { "epoch": 1.7173645320197044, "grad_norm": 0.039203155785799026, "learning_rate": 0.01, "loss": 2.0331, "step": 16734 }, { "epoch": 1.7176724137931034, "grad_norm": 0.05210564285516739, "learning_rate": 0.01, "loss": 2.0666, "step": 16737 }, { "epoch": 1.7179802955665024, "grad_norm": 0.0668390691280365, "learning_rate": 0.01, "loss": 2.0365, "step": 16740 }, { "epoch": 1.7182881773399015, "grad_norm": 0.05041831359267235, "learning_rate": 0.01, "loss": 2.0261, "step": 16743 }, { "epoch": 1.7185960591133005, "grad_norm": 0.04496284946799278, "learning_rate": 0.01, "loss": 2.0182, "step": 16746 }, { "epoch": 1.7189039408866995, "grad_norm": 0.08660906553268433, "learning_rate": 0.01, "loss": 2.0434, "step": 16749 }, { "epoch": 1.7192118226600985, "grad_norm": 0.054843079298734665, "learning_rate": 0.01, "loss": 2.0522, "step": 16752 }, { "epoch": 1.7195197044334976, "grad_norm": 0.05377354100346565, "learning_rate": 0.01, "loss": 2.0263, "step": 16755 }, { "epoch": 1.7198275862068966, "grad_norm": 0.060547634959220886, "learning_rate": 0.01, "loss": 2.0165, "step": 16758 }, { "epoch": 1.7201354679802956, "grad_norm": 0.06253104656934738, "learning_rate": 0.01, "loss": 2.0479, "step": 16761 }, { "epoch": 1.7204433497536946, "grad_norm": 0.052225805819034576, "learning_rate": 0.01, "loss": 2.03, "step": 16764 }, { "epoch": 1.7207512315270936, "grad_norm": 0.04988449066877365, "learning_rate": 0.01, "loss": 2.0498, "step": 16767 }, { "epoch": 1.7210591133004927, "grad_norm": 0.049726665019989014, "learning_rate": 0.01, "loss": 2.0588, "step": 16770 }, { "epoch": 1.7213669950738915, "grad_norm": 0.05053321272134781, "learning_rate": 0.01, "loss": 2.0518, "step": 16773 }, { "epoch": 1.7216748768472905, "grad_norm": 0.060702208429574966, "learning_rate": 0.01, "loss": 2.0448, "step": 16776 }, { "epoch": 1.7219827586206895, "grad_norm": 0.06695824861526489, "learning_rate": 0.01, "loss": 2.0361, "step": 16779 }, { "epoch": 1.7222906403940885, "grad_norm": 0.07498425990343094, "learning_rate": 0.01, "loss": 2.0652, "step": 16782 }, { "epoch": 1.7225985221674875, "grad_norm": 0.04544251784682274, "learning_rate": 0.01, "loss": 2.0294, "step": 16785 }, { "epoch": 1.7229064039408866, "grad_norm": 0.09211461246013641, "learning_rate": 0.01, "loss": 2.0261, "step": 16788 }, { "epoch": 1.7232142857142856, "grad_norm": 0.19020068645477295, "learning_rate": 0.01, "loss": 2.0459, "step": 16791 }, { "epoch": 1.7235221674876846, "grad_norm": 0.08979224413633347, "learning_rate": 0.01, "loss": 2.052, "step": 16794 }, { "epoch": 1.7238300492610836, "grad_norm": 0.06724744290113449, "learning_rate": 0.01, "loss": 2.0463, "step": 16797 }, { "epoch": 1.7241379310344827, "grad_norm": 0.05009309947490692, "learning_rate": 0.01, "loss": 2.0338, "step": 16800 }, { "epoch": 1.7244458128078817, "grad_norm": 0.04953375831246376, "learning_rate": 0.01, "loss": 2.0554, "step": 16803 }, { "epoch": 1.7247536945812807, "grad_norm": 0.12439639121294022, "learning_rate": 0.01, "loss": 2.0363, "step": 16806 }, { "epoch": 1.7250615763546797, "grad_norm": 0.126046821475029, "learning_rate": 0.01, "loss": 2.0435, "step": 16809 }, { "epoch": 1.7253694581280787, "grad_norm": 0.06693071871995926, "learning_rate": 0.01, "loss": 2.058, "step": 16812 }, { "epoch": 1.7256773399014778, "grad_norm": 0.04883793368935585, "learning_rate": 0.01, "loss": 2.0768, "step": 16815 }, { "epoch": 1.7259852216748768, "grad_norm": 0.06895219534635544, "learning_rate": 0.01, "loss": 2.057, "step": 16818 }, { "epoch": 1.7262931034482758, "grad_norm": 0.06979874521493912, "learning_rate": 0.01, "loss": 2.0641, "step": 16821 }, { "epoch": 1.7266009852216748, "grad_norm": 0.06453370302915573, "learning_rate": 0.01, "loss": 2.025, "step": 16824 }, { "epoch": 1.7269088669950738, "grad_norm": 0.11405997723340988, "learning_rate": 0.01, "loss": 2.0469, "step": 16827 }, { "epoch": 1.7272167487684729, "grad_norm": 0.10612863302230835, "learning_rate": 0.01, "loss": 2.0825, "step": 16830 }, { "epoch": 1.7275246305418719, "grad_norm": 0.09805281460285187, "learning_rate": 0.01, "loss": 2.0256, "step": 16833 }, { "epoch": 1.727832512315271, "grad_norm": 0.08369138836860657, "learning_rate": 0.01, "loss": 2.0307, "step": 16836 }, { "epoch": 1.72814039408867, "grad_norm": 0.08893299847841263, "learning_rate": 0.01, "loss": 2.0362, "step": 16839 }, { "epoch": 1.728448275862069, "grad_norm": 0.06902442872524261, "learning_rate": 0.01, "loss": 2.0562, "step": 16842 }, { "epoch": 1.728756157635468, "grad_norm": 0.058703918009996414, "learning_rate": 0.01, "loss": 2.0406, "step": 16845 }, { "epoch": 1.729064039408867, "grad_norm": 0.03979343920946121, "learning_rate": 0.01, "loss": 2.0636, "step": 16848 }, { "epoch": 1.729371921182266, "grad_norm": 0.08049800246953964, "learning_rate": 0.01, "loss": 2.0483, "step": 16851 }, { "epoch": 1.729679802955665, "grad_norm": 0.07980278134346008, "learning_rate": 0.01, "loss": 2.0471, "step": 16854 }, { "epoch": 1.729987684729064, "grad_norm": 0.051650844514369965, "learning_rate": 0.01, "loss": 2.0322, "step": 16857 }, { "epoch": 1.730295566502463, "grad_norm": 0.06828897446393967, "learning_rate": 0.01, "loss": 2.0456, "step": 16860 }, { "epoch": 1.730603448275862, "grad_norm": 0.10987185686826706, "learning_rate": 0.01, "loss": 2.0334, "step": 16863 }, { "epoch": 1.7309113300492611, "grad_norm": 0.055677276104688644, "learning_rate": 0.01, "loss": 2.018, "step": 16866 }, { "epoch": 1.7312192118226601, "grad_norm": 0.056478437036275864, "learning_rate": 0.01, "loss": 2.0241, "step": 16869 }, { "epoch": 1.7315270935960592, "grad_norm": 0.1247091144323349, "learning_rate": 0.01, "loss": 2.0128, "step": 16872 }, { "epoch": 1.7318349753694582, "grad_norm": 0.06603918969631195, "learning_rate": 0.01, "loss": 2.0456, "step": 16875 }, { "epoch": 1.7321428571428572, "grad_norm": 0.04174170270562172, "learning_rate": 0.01, "loss": 2.0349, "step": 16878 }, { "epoch": 1.7324507389162562, "grad_norm": 0.03841250762343407, "learning_rate": 0.01, "loss": 2.0365, "step": 16881 }, { "epoch": 1.7327586206896552, "grad_norm": 0.03429241105914116, "learning_rate": 0.01, "loss": 2.038, "step": 16884 }, { "epoch": 1.7330665024630543, "grad_norm": 0.05175672471523285, "learning_rate": 0.01, "loss": 2.0583, "step": 16887 }, { "epoch": 1.7333743842364533, "grad_norm": 0.06318958103656769, "learning_rate": 0.01, "loss": 2.0398, "step": 16890 }, { "epoch": 1.7336822660098523, "grad_norm": 0.08888188004493713, "learning_rate": 0.01, "loss": 2.0447, "step": 16893 }, { "epoch": 1.7339901477832513, "grad_norm": 0.04479747265577316, "learning_rate": 0.01, "loss": 2.0671, "step": 16896 }, { "epoch": 1.7342980295566504, "grad_norm": 0.05286455899477005, "learning_rate": 0.01, "loss": 2.0342, "step": 16899 }, { "epoch": 1.7346059113300494, "grad_norm": 0.04719952121376991, "learning_rate": 0.01, "loss": 2.0251, "step": 16902 }, { "epoch": 1.7349137931034484, "grad_norm": 0.06204066798090935, "learning_rate": 0.01, "loss": 2.0412, "step": 16905 }, { "epoch": 1.7352216748768474, "grad_norm": 0.1099078357219696, "learning_rate": 0.01, "loss": 2.008, "step": 16908 }, { "epoch": 1.7355295566502464, "grad_norm": 0.08311746269464493, "learning_rate": 0.01, "loss": 2.0248, "step": 16911 }, { "epoch": 1.7358374384236455, "grad_norm": 0.09649046510457993, "learning_rate": 0.01, "loss": 2.0585, "step": 16914 }, { "epoch": 1.7361453201970445, "grad_norm": 0.06716254353523254, "learning_rate": 0.01, "loss": 2.0128, "step": 16917 }, { "epoch": 1.7364532019704435, "grad_norm": 0.05400918424129486, "learning_rate": 0.01, "loss": 2.035, "step": 16920 }, { "epoch": 1.7367610837438425, "grad_norm": 0.06857965141534805, "learning_rate": 0.01, "loss": 2.0394, "step": 16923 }, { "epoch": 1.7370689655172413, "grad_norm": 0.05842095986008644, "learning_rate": 0.01, "loss": 2.0418, "step": 16926 }, { "epoch": 1.7373768472906403, "grad_norm": 0.06279005855321884, "learning_rate": 0.01, "loss": 2.0528, "step": 16929 }, { "epoch": 1.7376847290640394, "grad_norm": 0.04209805652499199, "learning_rate": 0.01, "loss": 2.0374, "step": 16932 }, { "epoch": 1.7379926108374384, "grad_norm": 0.07557601481676102, "learning_rate": 0.01, "loss": 2.0351, "step": 16935 }, { "epoch": 1.7383004926108374, "grad_norm": 0.05998634174466133, "learning_rate": 0.01, "loss": 2.0283, "step": 16938 }, { "epoch": 1.7386083743842364, "grad_norm": 0.05477887764573097, "learning_rate": 0.01, "loss": 2.0659, "step": 16941 }, { "epoch": 1.7389162561576355, "grad_norm": 0.09489591419696808, "learning_rate": 0.01, "loss": 2.0313, "step": 16944 }, { "epoch": 1.7392241379310345, "grad_norm": 0.13861320912837982, "learning_rate": 0.01, "loss": 2.0778, "step": 16947 }, { "epoch": 1.7395320197044335, "grad_norm": 0.1302078664302826, "learning_rate": 0.01, "loss": 2.024, "step": 16950 }, { "epoch": 1.7398399014778325, "grad_norm": 0.08335380256175995, "learning_rate": 0.01, "loss": 2.0612, "step": 16953 }, { "epoch": 1.7401477832512315, "grad_norm": 0.06901963800191879, "learning_rate": 0.01, "loss": 2.0357, "step": 16956 }, { "epoch": 1.7404556650246306, "grad_norm": 0.06619597226381302, "learning_rate": 0.01, "loss": 2.0207, "step": 16959 }, { "epoch": 1.7407635467980296, "grad_norm": 0.045459166169166565, "learning_rate": 0.01, "loss": 2.071, "step": 16962 }, { "epoch": 1.7410714285714286, "grad_norm": 0.06302090734243393, "learning_rate": 0.01, "loss": 2.0311, "step": 16965 }, { "epoch": 1.7413793103448276, "grad_norm": 0.07184530049562454, "learning_rate": 0.01, "loss": 2.0309, "step": 16968 }, { "epoch": 1.7416871921182266, "grad_norm": 0.07319154590368271, "learning_rate": 0.01, "loss": 2.0316, "step": 16971 }, { "epoch": 1.7419950738916257, "grad_norm": 0.0667872503399849, "learning_rate": 0.01, "loss": 2.0406, "step": 16974 }, { "epoch": 1.7423029556650245, "grad_norm": 0.0988057404756546, "learning_rate": 0.01, "loss": 2.0466, "step": 16977 }, { "epoch": 1.7426108374384235, "grad_norm": 0.09142420440912247, "learning_rate": 0.01, "loss": 2.0503, "step": 16980 }, { "epoch": 1.7429187192118225, "grad_norm": 0.07581423968076706, "learning_rate": 0.01, "loss": 2.0004, "step": 16983 }, { "epoch": 1.7432266009852215, "grad_norm": 0.05793863534927368, "learning_rate": 0.01, "loss": 2.0278, "step": 16986 }, { "epoch": 1.7435344827586206, "grad_norm": 0.0717446580529213, "learning_rate": 0.01, "loss": 2.061, "step": 16989 }, { "epoch": 1.7438423645320196, "grad_norm": 0.03572774678468704, "learning_rate": 0.01, "loss": 2.0663, "step": 16992 }, { "epoch": 1.7441502463054186, "grad_norm": 0.09349701553583145, "learning_rate": 0.01, "loss": 2.0433, "step": 16995 }, { "epoch": 1.7444581280788176, "grad_norm": 0.08374779671430588, "learning_rate": 0.01, "loss": 2.0228, "step": 16998 }, { "epoch": 1.7447660098522166, "grad_norm": 0.045029062777757645, "learning_rate": 0.01, "loss": 2.0425, "step": 17001 }, { "epoch": 1.7450738916256157, "grad_norm": 0.06359710544347763, "learning_rate": 0.01, "loss": 2.073, "step": 17004 }, { "epoch": 1.7453817733990147, "grad_norm": 0.0948299989104271, "learning_rate": 0.01, "loss": 2.0326, "step": 17007 }, { "epoch": 1.7456896551724137, "grad_norm": 0.05561600998044014, "learning_rate": 0.01, "loss": 2.0702, "step": 17010 }, { "epoch": 1.7459975369458127, "grad_norm": 0.05672406032681465, "learning_rate": 0.01, "loss": 2.0591, "step": 17013 }, { "epoch": 1.7463054187192117, "grad_norm": 0.047100841999053955, "learning_rate": 0.01, "loss": 2.0597, "step": 17016 }, { "epoch": 1.7466133004926108, "grad_norm": 0.09477028995752335, "learning_rate": 0.01, "loss": 2.046, "step": 17019 }, { "epoch": 1.7469211822660098, "grad_norm": 0.07235399633646011, "learning_rate": 0.01, "loss": 2.0778, "step": 17022 }, { "epoch": 1.7472290640394088, "grad_norm": 0.08015649020671844, "learning_rate": 0.01, "loss": 2.0219, "step": 17025 }, { "epoch": 1.7475369458128078, "grad_norm": 0.07459922134876251, "learning_rate": 0.01, "loss": 2.0278, "step": 17028 }, { "epoch": 1.7478448275862069, "grad_norm": 0.10745642334222794, "learning_rate": 0.01, "loss": 2.0536, "step": 17031 }, { "epoch": 1.7481527093596059, "grad_norm": 0.049479395151138306, "learning_rate": 0.01, "loss": 2.0477, "step": 17034 }, { "epoch": 1.748460591133005, "grad_norm": 0.03935602307319641, "learning_rate": 0.01, "loss": 2.0495, "step": 17037 }, { "epoch": 1.748768472906404, "grad_norm": 0.05755804106593132, "learning_rate": 0.01, "loss": 2.0186, "step": 17040 }, { "epoch": 1.749076354679803, "grad_norm": 0.07461614906787872, "learning_rate": 0.01, "loss": 2.0577, "step": 17043 }, { "epoch": 1.749384236453202, "grad_norm": 0.07078621536493301, "learning_rate": 0.01, "loss": 2.0159, "step": 17046 }, { "epoch": 1.749692118226601, "grad_norm": 0.12035417556762695, "learning_rate": 0.01, "loss": 2.0549, "step": 17049 }, { "epoch": 1.75, "grad_norm": 0.054343827068805695, "learning_rate": 0.01, "loss": 2.0394, "step": 17052 }, { "epoch": 1.750307881773399, "grad_norm": 0.056529540568590164, "learning_rate": 0.01, "loss": 2.0604, "step": 17055 }, { "epoch": 1.750615763546798, "grad_norm": 0.09392616152763367, "learning_rate": 0.01, "loss": 2.0233, "step": 17058 }, { "epoch": 1.750923645320197, "grad_norm": 0.0874391570687294, "learning_rate": 0.01, "loss": 2.0733, "step": 17061 }, { "epoch": 1.751231527093596, "grad_norm": 0.03889552876353264, "learning_rate": 0.01, "loss": 2.0099, "step": 17064 }, { "epoch": 1.751539408866995, "grad_norm": 0.06299902498722076, "learning_rate": 0.01, "loss": 2.01, "step": 17067 }, { "epoch": 1.7518472906403941, "grad_norm": 0.05655315890908241, "learning_rate": 0.01, "loss": 2.0571, "step": 17070 }, { "epoch": 1.7521551724137931, "grad_norm": 0.04646646976470947, "learning_rate": 0.01, "loss": 2.0401, "step": 17073 }, { "epoch": 1.7524630541871922, "grad_norm": 0.04910219460725784, "learning_rate": 0.01, "loss": 2.0527, "step": 17076 }, { "epoch": 1.7527709359605912, "grad_norm": 0.03616022691130638, "learning_rate": 0.01, "loss": 2.0152, "step": 17079 }, { "epoch": 1.7530788177339902, "grad_norm": 0.05539899319410324, "learning_rate": 0.01, "loss": 2.029, "step": 17082 }, { "epoch": 1.7533866995073892, "grad_norm": 0.09409206360578537, "learning_rate": 0.01, "loss": 2.0621, "step": 17085 }, { "epoch": 1.7536945812807883, "grad_norm": 0.048882581293582916, "learning_rate": 0.01, "loss": 2.057, "step": 17088 }, { "epoch": 1.7540024630541873, "grad_norm": 0.09687826037406921, "learning_rate": 0.01, "loss": 2.0472, "step": 17091 }, { "epoch": 1.7543103448275863, "grad_norm": 0.06646592915058136, "learning_rate": 0.01, "loss": 2.0469, "step": 17094 }, { "epoch": 1.7546182266009853, "grad_norm": 0.07316572964191437, "learning_rate": 0.01, "loss": 2.0427, "step": 17097 }, { "epoch": 1.7549261083743843, "grad_norm": 0.058037593960762024, "learning_rate": 0.01, "loss": 2.0462, "step": 17100 }, { "epoch": 1.7552339901477834, "grad_norm": 0.05486461892724037, "learning_rate": 0.01, "loss": 2.0426, "step": 17103 }, { "epoch": 1.7555418719211824, "grad_norm": 0.0397610180079937, "learning_rate": 0.01, "loss": 2.0397, "step": 17106 }, { "epoch": 1.7558497536945814, "grad_norm": 0.11639061570167542, "learning_rate": 0.01, "loss": 2.0248, "step": 17109 }, { "epoch": 1.7561576354679804, "grad_norm": 0.04207361862063408, "learning_rate": 0.01, "loss": 2.064, "step": 17112 }, { "epoch": 1.7564655172413794, "grad_norm": 0.09336013346910477, "learning_rate": 0.01, "loss": 2.0399, "step": 17115 }, { "epoch": 1.7567733990147785, "grad_norm": 0.059693820774555206, "learning_rate": 0.01, "loss": 2.0347, "step": 17118 }, { "epoch": 1.7570812807881775, "grad_norm": 0.05269778147339821, "learning_rate": 0.01, "loss": 2.0222, "step": 17121 }, { "epoch": 1.7573891625615765, "grad_norm": 0.0548628568649292, "learning_rate": 0.01, "loss": 2.0269, "step": 17124 }, { "epoch": 1.7576970443349755, "grad_norm": 0.049783483147621155, "learning_rate": 0.01, "loss": 2.0469, "step": 17127 }, { "epoch": 1.7580049261083743, "grad_norm": 0.11240525543689728, "learning_rate": 0.01, "loss": 2.0405, "step": 17130 }, { "epoch": 1.7583128078817734, "grad_norm": 0.04133368283510208, "learning_rate": 0.01, "loss": 2.064, "step": 17133 }, { "epoch": 1.7586206896551724, "grad_norm": 0.042926132678985596, "learning_rate": 0.01, "loss": 2.0685, "step": 17136 }, { "epoch": 1.7589285714285714, "grad_norm": 0.053613241761922836, "learning_rate": 0.01, "loss": 2.0241, "step": 17139 }, { "epoch": 1.7592364532019704, "grad_norm": 0.03950737044215202, "learning_rate": 0.01, "loss": 2.0662, "step": 17142 }, { "epoch": 1.7595443349753694, "grad_norm": 0.045378413051366806, "learning_rate": 0.01, "loss": 2.0401, "step": 17145 }, { "epoch": 1.7598522167487685, "grad_norm": 0.036304816603660583, "learning_rate": 0.01, "loss": 2.0789, "step": 17148 }, { "epoch": 1.7601600985221675, "grad_norm": 0.03886290267109871, "learning_rate": 0.01, "loss": 2.0516, "step": 17151 }, { "epoch": 1.7604679802955665, "grad_norm": 0.0885484591126442, "learning_rate": 0.01, "loss": 2.0333, "step": 17154 }, { "epoch": 1.7607758620689655, "grad_norm": 0.06733599305152893, "learning_rate": 0.01, "loss": 2.039, "step": 17157 }, { "epoch": 1.7610837438423645, "grad_norm": 0.10319662094116211, "learning_rate": 0.01, "loss": 2.0629, "step": 17160 }, { "epoch": 1.7613916256157636, "grad_norm": 0.047492869198322296, "learning_rate": 0.01, "loss": 2.0726, "step": 17163 }, { "epoch": 1.7616995073891626, "grad_norm": 0.04345547780394554, "learning_rate": 0.01, "loss": 2.0259, "step": 17166 }, { "epoch": 1.7620073891625616, "grad_norm": 0.0452197827398777, "learning_rate": 0.01, "loss": 2.0393, "step": 17169 }, { "epoch": 1.7623152709359606, "grad_norm": 0.0703844428062439, "learning_rate": 0.01, "loss": 2.0458, "step": 17172 }, { "epoch": 1.7626231527093597, "grad_norm": 0.07864879071712494, "learning_rate": 0.01, "loss": 2.047, "step": 17175 }, { "epoch": 1.7629310344827587, "grad_norm": 0.1282995641231537, "learning_rate": 0.01, "loss": 2.0226, "step": 17178 }, { "epoch": 1.7632389162561575, "grad_norm": 0.0837298184633255, "learning_rate": 0.01, "loss": 2.0483, "step": 17181 }, { "epoch": 1.7635467980295565, "grad_norm": 0.05081562697887421, "learning_rate": 0.01, "loss": 2.0656, "step": 17184 }, { "epoch": 1.7638546798029555, "grad_norm": 0.07952243834733963, "learning_rate": 0.01, "loss": 2.0561, "step": 17187 }, { "epoch": 1.7641625615763545, "grad_norm": 0.06592147797346115, "learning_rate": 0.01, "loss": 2.0311, "step": 17190 }, { "epoch": 1.7644704433497536, "grad_norm": 0.04341195523738861, "learning_rate": 0.01, "loss": 2.0413, "step": 17193 }, { "epoch": 1.7647783251231526, "grad_norm": 0.04649266228079796, "learning_rate": 0.01, "loss": 2.0338, "step": 17196 }, { "epoch": 1.7650862068965516, "grad_norm": 0.04569242149591446, "learning_rate": 0.01, "loss": 2.0428, "step": 17199 }, { "epoch": 1.7653940886699506, "grad_norm": 0.040291350334882736, "learning_rate": 0.01, "loss": 2.0165, "step": 17202 }, { "epoch": 1.7657019704433496, "grad_norm": 0.05328141525387764, "learning_rate": 0.01, "loss": 2.0384, "step": 17205 }, { "epoch": 1.7660098522167487, "grad_norm": 0.04405885562300682, "learning_rate": 0.01, "loss": 2.0566, "step": 17208 }, { "epoch": 1.7663177339901477, "grad_norm": 0.06635614484548569, "learning_rate": 0.01, "loss": 2.0397, "step": 17211 }, { "epoch": 1.7666256157635467, "grad_norm": 0.09231774508953094, "learning_rate": 0.01, "loss": 2.0247, "step": 17214 }, { "epoch": 1.7669334975369457, "grad_norm": 0.056320998817682266, "learning_rate": 0.01, "loss": 2.066, "step": 17217 }, { "epoch": 1.7672413793103448, "grad_norm": 0.049784105271101, "learning_rate": 0.01, "loss": 2.0175, "step": 17220 }, { "epoch": 1.7675492610837438, "grad_norm": 0.03728071227669716, "learning_rate": 0.01, "loss": 2.0403, "step": 17223 }, { "epoch": 1.7678571428571428, "grad_norm": 0.06607525050640106, "learning_rate": 0.01, "loss": 2.0364, "step": 17226 }, { "epoch": 1.7681650246305418, "grad_norm": 0.07367686927318573, "learning_rate": 0.01, "loss": 2.0478, "step": 17229 }, { "epoch": 1.7684729064039408, "grad_norm": 0.039499782025814056, "learning_rate": 0.01, "loss": 2.0508, "step": 17232 }, { "epoch": 1.7687807881773399, "grad_norm": 0.04863186180591583, "learning_rate": 0.01, "loss": 2.0595, "step": 17235 }, { "epoch": 1.7690886699507389, "grad_norm": 0.03877348452806473, "learning_rate": 0.01, "loss": 2.0608, "step": 17238 }, { "epoch": 1.769396551724138, "grad_norm": 0.049965135753154755, "learning_rate": 0.01, "loss": 2.0521, "step": 17241 }, { "epoch": 1.769704433497537, "grad_norm": 0.0697547048330307, "learning_rate": 0.01, "loss": 2.0191, "step": 17244 }, { "epoch": 1.770012315270936, "grad_norm": 0.0562531016767025, "learning_rate": 0.01, "loss": 2.0581, "step": 17247 }, { "epoch": 1.770320197044335, "grad_norm": 0.12931805849075317, "learning_rate": 0.01, "loss": 2.072, "step": 17250 }, { "epoch": 1.770628078817734, "grad_norm": 0.06590058654546738, "learning_rate": 0.01, "loss": 2.0487, "step": 17253 }, { "epoch": 1.770935960591133, "grad_norm": 0.045246463268995285, "learning_rate": 0.01, "loss": 2.0424, "step": 17256 }, { "epoch": 1.771243842364532, "grad_norm": 0.03972258046269417, "learning_rate": 0.01, "loss": 2.043, "step": 17259 }, { "epoch": 1.771551724137931, "grad_norm": 0.030682874843478203, "learning_rate": 0.01, "loss": 2.0665, "step": 17262 }, { "epoch": 1.77185960591133, "grad_norm": 0.08989464491605759, "learning_rate": 0.01, "loss": 2.042, "step": 17265 }, { "epoch": 1.772167487684729, "grad_norm": 0.05595966801047325, "learning_rate": 0.01, "loss": 2.0399, "step": 17268 }, { "epoch": 1.7724753694581281, "grad_norm": 0.16923703253269196, "learning_rate": 0.01, "loss": 2.0161, "step": 17271 }, { "epoch": 1.7727832512315271, "grad_norm": 0.08722022920846939, "learning_rate": 0.01, "loss": 2.0379, "step": 17274 }, { "epoch": 1.7730911330049262, "grad_norm": 0.0741046667098999, "learning_rate": 0.01, "loss": 2.0512, "step": 17277 }, { "epoch": 1.7733990147783252, "grad_norm": 0.06061973422765732, "learning_rate": 0.01, "loss": 2.0318, "step": 17280 }, { "epoch": 1.7737068965517242, "grad_norm": 0.036843594163656235, "learning_rate": 0.01, "loss": 2.056, "step": 17283 }, { "epoch": 1.7740147783251232, "grad_norm": 0.03937767818570137, "learning_rate": 0.01, "loss": 2.019, "step": 17286 }, { "epoch": 1.7743226600985222, "grad_norm": 0.03801162540912628, "learning_rate": 0.01, "loss": 2.04, "step": 17289 }, { "epoch": 1.7746305418719213, "grad_norm": 0.045572392642498016, "learning_rate": 0.01, "loss": 2.0665, "step": 17292 }, { "epoch": 1.7749384236453203, "grad_norm": 0.06430240720510483, "learning_rate": 0.01, "loss": 2.0357, "step": 17295 }, { "epoch": 1.7752463054187193, "grad_norm": 0.09266401827335358, "learning_rate": 0.01, "loss": 2.0474, "step": 17298 }, { "epoch": 1.7755541871921183, "grad_norm": 0.09686179459095001, "learning_rate": 0.01, "loss": 2.0224, "step": 17301 }, { "epoch": 1.7758620689655173, "grad_norm": 0.04640132188796997, "learning_rate": 0.01, "loss": 2.0611, "step": 17304 }, { "epoch": 1.7761699507389164, "grad_norm": 0.03891894221305847, "learning_rate": 0.01, "loss": 2.0529, "step": 17307 }, { "epoch": 1.7764778325123154, "grad_norm": 0.06023077294230461, "learning_rate": 0.01, "loss": 2.0282, "step": 17310 }, { "epoch": 1.7767857142857144, "grad_norm": 0.12215135246515274, "learning_rate": 0.01, "loss": 2.0472, "step": 17313 }, { "epoch": 1.7770935960591134, "grad_norm": 0.04197768121957779, "learning_rate": 0.01, "loss": 2.038, "step": 17316 }, { "epoch": 1.7774014778325125, "grad_norm": 0.0429445244371891, "learning_rate": 0.01, "loss": 2.0291, "step": 17319 }, { "epoch": 1.7777093596059115, "grad_norm": 0.04674970358610153, "learning_rate": 0.01, "loss": 2.0493, "step": 17322 }, { "epoch": 1.7780172413793105, "grad_norm": 0.11712675541639328, "learning_rate": 0.01, "loss": 2.0421, "step": 17325 }, { "epoch": 1.7783251231527095, "grad_norm": 0.04812907800078392, "learning_rate": 0.01, "loss": 2.0395, "step": 17328 }, { "epoch": 1.7786330049261085, "grad_norm": 0.04147825017571449, "learning_rate": 0.01, "loss": 2.0057, "step": 17331 }, { "epoch": 1.7789408866995073, "grad_norm": 0.07262876629829407, "learning_rate": 0.01, "loss": 2.0383, "step": 17334 }, { "epoch": 1.7792487684729064, "grad_norm": 0.08528011292219162, "learning_rate": 0.01, "loss": 2.0151, "step": 17337 }, { "epoch": 1.7795566502463054, "grad_norm": 0.046615902334451675, "learning_rate": 0.01, "loss": 2.0368, "step": 17340 }, { "epoch": 1.7798645320197044, "grad_norm": 0.06018273904919624, "learning_rate": 0.01, "loss": 2.0411, "step": 17343 }, { "epoch": 1.7801724137931034, "grad_norm": 0.07272887974977493, "learning_rate": 0.01, "loss": 2.026, "step": 17346 }, { "epoch": 1.7804802955665024, "grad_norm": 0.07152794301509857, "learning_rate": 0.01, "loss": 2.0631, "step": 17349 }, { "epoch": 1.7807881773399015, "grad_norm": 0.07950329035520554, "learning_rate": 0.01, "loss": 2.0435, "step": 17352 }, { "epoch": 1.7810960591133005, "grad_norm": 0.040778059512376785, "learning_rate": 0.01, "loss": 2.0089, "step": 17355 }, { "epoch": 1.7814039408866995, "grad_norm": 0.06180460751056671, "learning_rate": 0.01, "loss": 2.0183, "step": 17358 }, { "epoch": 1.7817118226600985, "grad_norm": 0.06950334459543228, "learning_rate": 0.01, "loss": 2.0352, "step": 17361 }, { "epoch": 1.7820197044334976, "grad_norm": 0.037724483758211136, "learning_rate": 0.01, "loss": 2.0324, "step": 17364 }, { "epoch": 1.7823275862068966, "grad_norm": 0.05991238355636597, "learning_rate": 0.01, "loss": 2.053, "step": 17367 }, { "epoch": 1.7826354679802956, "grad_norm": 0.047278665006160736, "learning_rate": 0.01, "loss": 2.0427, "step": 17370 }, { "epoch": 1.7829433497536946, "grad_norm": 0.05376293137669563, "learning_rate": 0.01, "loss": 2.0315, "step": 17373 }, { "epoch": 1.7832512315270936, "grad_norm": 0.04049403965473175, "learning_rate": 0.01, "loss": 2.0483, "step": 17376 }, { "epoch": 1.7835591133004927, "grad_norm": 0.04954640567302704, "learning_rate": 0.01, "loss": 2.0393, "step": 17379 }, { "epoch": 1.7838669950738915, "grad_norm": 0.049089133739471436, "learning_rate": 0.01, "loss": 2.0633, "step": 17382 }, { "epoch": 1.7841748768472905, "grad_norm": 0.0531185045838356, "learning_rate": 0.01, "loss": 2.0474, "step": 17385 }, { "epoch": 1.7844827586206895, "grad_norm": 0.060973040759563446, "learning_rate": 0.01, "loss": 2.0219, "step": 17388 }, { "epoch": 1.7847906403940885, "grad_norm": 0.044274650514125824, "learning_rate": 0.01, "loss": 2.0403, "step": 17391 }, { "epoch": 1.7850985221674875, "grad_norm": 0.08154580742120743, "learning_rate": 0.01, "loss": 2.011, "step": 17394 }, { "epoch": 1.7854064039408866, "grad_norm": 0.05253531411290169, "learning_rate": 0.01, "loss": 2.0352, "step": 17397 }, { "epoch": 1.7857142857142856, "grad_norm": 0.056620582938194275, "learning_rate": 0.01, "loss": 2.04, "step": 17400 }, { "epoch": 1.7860221674876846, "grad_norm": 0.069371297955513, "learning_rate": 0.01, "loss": 2.0456, "step": 17403 }, { "epoch": 1.7863300492610836, "grad_norm": 0.04726189747452736, "learning_rate": 0.01, "loss": 2.018, "step": 17406 }, { "epoch": 1.7866379310344827, "grad_norm": 0.11150949448347092, "learning_rate": 0.01, "loss": 2.0503, "step": 17409 }, { "epoch": 1.7869458128078817, "grad_norm": 0.07482532411813736, "learning_rate": 0.01, "loss": 2.0361, "step": 17412 }, { "epoch": 1.7872536945812807, "grad_norm": 0.03803645819425583, "learning_rate": 0.01, "loss": 2.0555, "step": 17415 }, { "epoch": 1.7875615763546797, "grad_norm": 0.08635829389095306, "learning_rate": 0.01, "loss": 2.0551, "step": 17418 }, { "epoch": 1.7878694581280787, "grad_norm": 0.08558929711580276, "learning_rate": 0.01, "loss": 2.0611, "step": 17421 }, { "epoch": 1.7881773399014778, "grad_norm": 0.051051054149866104, "learning_rate": 0.01, "loss": 2.0375, "step": 17424 }, { "epoch": 1.7884852216748768, "grad_norm": 0.0584864616394043, "learning_rate": 0.01, "loss": 2.0131, "step": 17427 }, { "epoch": 1.7887931034482758, "grad_norm": 0.04015490040183067, "learning_rate": 0.01, "loss": 2.0559, "step": 17430 }, { "epoch": 1.7891009852216748, "grad_norm": 0.0499749630689621, "learning_rate": 0.01, "loss": 2.0611, "step": 17433 }, { "epoch": 1.7894088669950738, "grad_norm": 0.08796360343694687, "learning_rate": 0.01, "loss": 2.0538, "step": 17436 }, { "epoch": 1.7897167487684729, "grad_norm": 0.08200754970312119, "learning_rate": 0.01, "loss": 2.0459, "step": 17439 }, { "epoch": 1.7900246305418719, "grad_norm": 0.09300393611192703, "learning_rate": 0.01, "loss": 2.051, "step": 17442 }, { "epoch": 1.790332512315271, "grad_norm": 0.08223576098680496, "learning_rate": 0.01, "loss": 2.0194, "step": 17445 }, { "epoch": 1.79064039408867, "grad_norm": 0.05235210806131363, "learning_rate": 0.01, "loss": 2.0421, "step": 17448 }, { "epoch": 1.790948275862069, "grad_norm": 0.047677502036094666, "learning_rate": 0.01, "loss": 2.043, "step": 17451 }, { "epoch": 1.791256157635468, "grad_norm": 0.044341955333948135, "learning_rate": 0.01, "loss": 2.0381, "step": 17454 }, { "epoch": 1.791564039408867, "grad_norm": 0.09555595368146896, "learning_rate": 0.01, "loss": 2.0224, "step": 17457 }, { "epoch": 1.791871921182266, "grad_norm": 0.05652477219700813, "learning_rate": 0.01, "loss": 2.0318, "step": 17460 }, { "epoch": 1.792179802955665, "grad_norm": 0.0979117676615715, "learning_rate": 0.01, "loss": 2.0747, "step": 17463 }, { "epoch": 1.792487684729064, "grad_norm": 0.0674947127699852, "learning_rate": 0.01, "loss": 2.0723, "step": 17466 }, { "epoch": 1.792795566502463, "grad_norm": 0.05617907643318176, "learning_rate": 0.01, "loss": 2.0444, "step": 17469 }, { "epoch": 1.793103448275862, "grad_norm": 0.10979234427213669, "learning_rate": 0.01, "loss": 2.0638, "step": 17472 }, { "epoch": 1.7934113300492611, "grad_norm": 0.056006476283073425, "learning_rate": 0.01, "loss": 2.0396, "step": 17475 }, { "epoch": 1.7937192118226601, "grad_norm": 0.10030517727136612, "learning_rate": 0.01, "loss": 2.0379, "step": 17478 }, { "epoch": 1.7940270935960592, "grad_norm": 0.042350657284259796, "learning_rate": 0.01, "loss": 2.0319, "step": 17481 }, { "epoch": 1.7943349753694582, "grad_norm": 0.03725098446011543, "learning_rate": 0.01, "loss": 2.0537, "step": 17484 }, { "epoch": 1.7946428571428572, "grad_norm": 0.09215757250785828, "learning_rate": 0.01, "loss": 2.0247, "step": 17487 }, { "epoch": 1.7949507389162562, "grad_norm": 0.08012344688177109, "learning_rate": 0.01, "loss": 2.0428, "step": 17490 }, { "epoch": 1.7952586206896552, "grad_norm": 0.128404900431633, "learning_rate": 0.01, "loss": 2.038, "step": 17493 }, { "epoch": 1.7955665024630543, "grad_norm": 0.08718766272068024, "learning_rate": 0.01, "loss": 2.0557, "step": 17496 }, { "epoch": 1.7958743842364533, "grad_norm": 0.030426733195781708, "learning_rate": 0.01, "loss": 2.0192, "step": 17499 }, { "epoch": 1.7961822660098523, "grad_norm": 0.03950949385762215, "learning_rate": 0.01, "loss": 2.0228, "step": 17502 }, { "epoch": 1.7964901477832513, "grad_norm": 0.049466658383607864, "learning_rate": 0.01, "loss": 2.0514, "step": 17505 }, { "epoch": 1.7967980295566504, "grad_norm": 0.06188172101974487, "learning_rate": 0.01, "loss": 2.0512, "step": 17508 }, { "epoch": 1.7971059113300494, "grad_norm": 0.06420351564884186, "learning_rate": 0.01, "loss": 2.0365, "step": 17511 }, { "epoch": 1.7974137931034484, "grad_norm": 0.04329871013760567, "learning_rate": 0.01, "loss": 2.0511, "step": 17514 }, { "epoch": 1.7977216748768474, "grad_norm": 0.04420280084013939, "learning_rate": 0.01, "loss": 2.0481, "step": 17517 }, { "epoch": 1.7980295566502464, "grad_norm": 0.04043954238295555, "learning_rate": 0.01, "loss": 2.0184, "step": 17520 }, { "epoch": 1.7983374384236455, "grad_norm": 0.049305226653814316, "learning_rate": 0.01, "loss": 2.0353, "step": 17523 }, { "epoch": 1.7986453201970445, "grad_norm": 0.1928088515996933, "learning_rate": 0.01, "loss": 2.0869, "step": 17526 }, { "epoch": 1.7989532019704435, "grad_norm": 0.12283357232809067, "learning_rate": 0.01, "loss": 2.0378, "step": 17529 }, { "epoch": 1.7992610837438425, "grad_norm": 0.07897382229566574, "learning_rate": 0.01, "loss": 2.045, "step": 17532 }, { "epoch": 1.7995689655172413, "grad_norm": 0.0749836266040802, "learning_rate": 0.01, "loss": 2.0388, "step": 17535 }, { "epoch": 1.7998768472906403, "grad_norm": 0.06578727811574936, "learning_rate": 0.01, "loss": 2.0575, "step": 17538 }, { "epoch": 1.8001847290640394, "grad_norm": 0.06609571725130081, "learning_rate": 0.01, "loss": 2.0448, "step": 17541 }, { "epoch": 1.8004926108374384, "grad_norm": 0.047696053981781006, "learning_rate": 0.01, "loss": 2.0574, "step": 17544 }, { "epoch": 1.8008004926108374, "grad_norm": 0.05110754072666168, "learning_rate": 0.01, "loss": 2.0191, "step": 17547 }, { "epoch": 1.8011083743842364, "grad_norm": 0.03783520683646202, "learning_rate": 0.01, "loss": 2.0328, "step": 17550 }, { "epoch": 1.8014162561576355, "grad_norm": 0.03145405650138855, "learning_rate": 0.01, "loss": 2.0373, "step": 17553 }, { "epoch": 1.8017241379310345, "grad_norm": 0.09492892026901245, "learning_rate": 0.01, "loss": 2.0173, "step": 17556 }, { "epoch": 1.8020320197044335, "grad_norm": 0.06920488178730011, "learning_rate": 0.01, "loss": 2.0809, "step": 17559 }, { "epoch": 1.8023399014778325, "grad_norm": 0.0583655945956707, "learning_rate": 0.01, "loss": 2.0259, "step": 17562 }, { "epoch": 1.8026477832512315, "grad_norm": 0.08449242264032364, "learning_rate": 0.01, "loss": 2.0205, "step": 17565 }, { "epoch": 1.8029556650246306, "grad_norm": 0.12186135351657867, "learning_rate": 0.01, "loss": 2.0076, "step": 17568 }, { "epoch": 1.8032635467980296, "grad_norm": 0.09926268458366394, "learning_rate": 0.01, "loss": 2.0444, "step": 17571 }, { "epoch": 1.8035714285714286, "grad_norm": 0.06820474565029144, "learning_rate": 0.01, "loss": 2.0211, "step": 17574 }, { "epoch": 1.8038793103448276, "grad_norm": 0.050847604870796204, "learning_rate": 0.01, "loss": 2.0377, "step": 17577 }, { "epoch": 1.8041871921182266, "grad_norm": 0.053053803741931915, "learning_rate": 0.01, "loss": 2.0462, "step": 17580 }, { "epoch": 1.8044950738916257, "grad_norm": 0.047114890068769455, "learning_rate": 0.01, "loss": 2.0171, "step": 17583 }, { "epoch": 1.8048029556650245, "grad_norm": 0.05182573199272156, "learning_rate": 0.01, "loss": 2.0396, "step": 17586 }, { "epoch": 1.8051108374384235, "grad_norm": 0.12609605491161346, "learning_rate": 0.01, "loss": 2.053, "step": 17589 }, { "epoch": 1.8054187192118225, "grad_norm": 0.0496569462120533, "learning_rate": 0.01, "loss": 2.0418, "step": 17592 }, { "epoch": 1.8057266009852215, "grad_norm": 0.0490572527050972, "learning_rate": 0.01, "loss": 2.0199, "step": 17595 }, { "epoch": 1.8060344827586206, "grad_norm": 0.038300756365060806, "learning_rate": 0.01, "loss": 2.0337, "step": 17598 }, { "epoch": 1.8063423645320196, "grad_norm": 0.03666609153151512, "learning_rate": 0.01, "loss": 2.0392, "step": 17601 }, { "epoch": 1.8066502463054186, "grad_norm": 0.036330632865428925, "learning_rate": 0.01, "loss": 2.0319, "step": 17604 }, { "epoch": 1.8069581280788176, "grad_norm": 0.0605342797935009, "learning_rate": 0.01, "loss": 2.0356, "step": 17607 }, { "epoch": 1.8072660098522166, "grad_norm": 0.04346880316734314, "learning_rate": 0.01, "loss": 2.0188, "step": 17610 }, { "epoch": 1.8075738916256157, "grad_norm": 0.06400660425424576, "learning_rate": 0.01, "loss": 2.0342, "step": 17613 }, { "epoch": 1.8078817733990147, "grad_norm": 0.0812198668718338, "learning_rate": 0.01, "loss": 2.0622, "step": 17616 }, { "epoch": 1.8081896551724137, "grad_norm": 0.06756972521543503, "learning_rate": 0.01, "loss": 2.0396, "step": 17619 }, { "epoch": 1.8084975369458127, "grad_norm": 0.05277147516608238, "learning_rate": 0.01, "loss": 2.0149, "step": 17622 }, { "epoch": 1.8088054187192117, "grad_norm": 0.07904385775327682, "learning_rate": 0.01, "loss": 2.0393, "step": 17625 }, { "epoch": 1.8091133004926108, "grad_norm": 0.06955704092979431, "learning_rate": 0.01, "loss": 2.0433, "step": 17628 }, { "epoch": 1.8094211822660098, "grad_norm": 0.06605497002601624, "learning_rate": 0.01, "loss": 2.0439, "step": 17631 }, { "epoch": 1.8097290640394088, "grad_norm": 0.03861093521118164, "learning_rate": 0.01, "loss": 2.03, "step": 17634 }, { "epoch": 1.8100369458128078, "grad_norm": 0.04323074221611023, "learning_rate": 0.01, "loss": 2.0444, "step": 17637 }, { "epoch": 1.8103448275862069, "grad_norm": 0.03443233296275139, "learning_rate": 0.01, "loss": 2.0466, "step": 17640 }, { "epoch": 1.8106527093596059, "grad_norm": 0.04190131649374962, "learning_rate": 0.01, "loss": 2.0307, "step": 17643 }, { "epoch": 1.810960591133005, "grad_norm": 0.09095717966556549, "learning_rate": 0.01, "loss": 2.0529, "step": 17646 }, { "epoch": 1.811268472906404, "grad_norm": 0.05452005937695503, "learning_rate": 0.01, "loss": 2.0337, "step": 17649 }, { "epoch": 1.811576354679803, "grad_norm": 0.05032350867986679, "learning_rate": 0.01, "loss": 2.0398, "step": 17652 }, { "epoch": 1.811884236453202, "grad_norm": 0.05733015760779381, "learning_rate": 0.01, "loss": 2.0573, "step": 17655 }, { "epoch": 1.812192118226601, "grad_norm": 0.09373817592859268, "learning_rate": 0.01, "loss": 2.0278, "step": 17658 }, { "epoch": 1.8125, "grad_norm": 0.07385890185832977, "learning_rate": 0.01, "loss": 2.032, "step": 17661 }, { "epoch": 1.812807881773399, "grad_norm": 0.08643963187932968, "learning_rate": 0.01, "loss": 2.0351, "step": 17664 }, { "epoch": 1.813115763546798, "grad_norm": 0.09909530729055405, "learning_rate": 0.01, "loss": 2.0562, "step": 17667 }, { "epoch": 1.813423645320197, "grad_norm": 0.04600978642702103, "learning_rate": 0.01, "loss": 2.0219, "step": 17670 }, { "epoch": 1.813731527093596, "grad_norm": 0.033060222864151, "learning_rate": 0.01, "loss": 2.0479, "step": 17673 }, { "epoch": 1.814039408866995, "grad_norm": 0.03789517655968666, "learning_rate": 0.01, "loss": 2.0242, "step": 17676 }, { "epoch": 1.8143472906403941, "grad_norm": 0.0502844899892807, "learning_rate": 0.01, "loss": 2.0519, "step": 17679 }, { "epoch": 1.8146551724137931, "grad_norm": 0.0627695843577385, "learning_rate": 0.01, "loss": 2.0327, "step": 17682 }, { "epoch": 1.8149630541871922, "grad_norm": 0.15737055242061615, "learning_rate": 0.01, "loss": 2.0572, "step": 17685 }, { "epoch": 1.8152709359605912, "grad_norm": 0.09944868832826614, "learning_rate": 0.01, "loss": 2.0462, "step": 17688 }, { "epoch": 1.8155788177339902, "grad_norm": 0.12345952540636063, "learning_rate": 0.01, "loss": 2.0447, "step": 17691 }, { "epoch": 1.8158866995073892, "grad_norm": 0.06330909579992294, "learning_rate": 0.01, "loss": 2.0511, "step": 17694 }, { "epoch": 1.8161945812807883, "grad_norm": 0.0584748238325119, "learning_rate": 0.01, "loss": 2.0164, "step": 17697 }, { "epoch": 1.8165024630541873, "grad_norm": 0.07284627109766006, "learning_rate": 0.01, "loss": 2.0308, "step": 17700 }, { "epoch": 1.8168103448275863, "grad_norm": 0.07302995771169662, "learning_rate": 0.01, "loss": 2.0347, "step": 17703 }, { "epoch": 1.8171182266009853, "grad_norm": 0.06292667984962463, "learning_rate": 0.01, "loss": 2.026, "step": 17706 }, { "epoch": 1.8174261083743843, "grad_norm": 0.04821958392858505, "learning_rate": 0.01, "loss": 2.033, "step": 17709 }, { "epoch": 1.8177339901477834, "grad_norm": 0.03572079911828041, "learning_rate": 0.01, "loss": 2.047, "step": 17712 }, { "epoch": 1.8180418719211824, "grad_norm": 0.12643416225910187, "learning_rate": 0.01, "loss": 2.0621, "step": 17715 }, { "epoch": 1.8183497536945814, "grad_norm": 0.08803770691156387, "learning_rate": 0.01, "loss": 2.0422, "step": 17718 }, { "epoch": 1.8186576354679804, "grad_norm": 0.061583418399095535, "learning_rate": 0.01, "loss": 1.9895, "step": 17721 }, { "epoch": 1.8189655172413794, "grad_norm": 0.04947415366768837, "learning_rate": 0.01, "loss": 2.0249, "step": 17724 }, { "epoch": 1.8192733990147785, "grad_norm": 0.06042906641960144, "learning_rate": 0.01, "loss": 2.0563, "step": 17727 }, { "epoch": 1.8195812807881775, "grad_norm": 0.03236406669020653, "learning_rate": 0.01, "loss": 2.0482, "step": 17730 }, { "epoch": 1.8198891625615765, "grad_norm": 0.05975859984755516, "learning_rate": 0.01, "loss": 2.0353, "step": 17733 }, { "epoch": 1.8201970443349755, "grad_norm": 0.11028258502483368, "learning_rate": 0.01, "loss": 2.0654, "step": 17736 }, { "epoch": 1.8205049261083743, "grad_norm": 0.055842846632003784, "learning_rate": 0.01, "loss": 2.0589, "step": 17739 }, { "epoch": 1.8208128078817734, "grad_norm": 0.09189102053642273, "learning_rate": 0.01, "loss": 2.0444, "step": 17742 }, { "epoch": 1.8211206896551724, "grad_norm": 0.07795927673578262, "learning_rate": 0.01, "loss": 2.0628, "step": 17745 }, { "epoch": 1.8214285714285714, "grad_norm": 0.06452701985836029, "learning_rate": 0.01, "loss": 2.0415, "step": 17748 }, { "epoch": 1.8217364532019704, "grad_norm": 0.056360337883234024, "learning_rate": 0.01, "loss": 2.0159, "step": 17751 }, { "epoch": 1.8220443349753694, "grad_norm": 0.08861987292766571, "learning_rate": 0.01, "loss": 2.0325, "step": 17754 }, { "epoch": 1.8223522167487685, "grad_norm": 0.07276416569948196, "learning_rate": 0.01, "loss": 2.022, "step": 17757 }, { "epoch": 1.8226600985221675, "grad_norm": 0.07501320540904999, "learning_rate": 0.01, "loss": 2.0592, "step": 17760 }, { "epoch": 1.8229679802955665, "grad_norm": 0.08408310264348984, "learning_rate": 0.01, "loss": 2.0353, "step": 17763 }, { "epoch": 1.8232758620689655, "grad_norm": 0.039008378982543945, "learning_rate": 0.01, "loss": 2.0541, "step": 17766 }, { "epoch": 1.8235837438423645, "grad_norm": 0.05153367295861244, "learning_rate": 0.01, "loss": 2.0614, "step": 17769 }, { "epoch": 1.8238916256157636, "grad_norm": 0.05685068294405937, "learning_rate": 0.01, "loss": 2.0603, "step": 17772 }, { "epoch": 1.8241995073891626, "grad_norm": 0.10836745798587799, "learning_rate": 0.01, "loss": 2.0293, "step": 17775 }, { "epoch": 1.8245073891625616, "grad_norm": 0.13855011761188507, "learning_rate": 0.01, "loss": 2.044, "step": 17778 }, { "epoch": 1.8248152709359606, "grad_norm": 0.07912803441286087, "learning_rate": 0.01, "loss": 2.062, "step": 17781 }, { "epoch": 1.8251231527093597, "grad_norm": 0.065729521214962, "learning_rate": 0.01, "loss": 2.0416, "step": 17784 }, { "epoch": 1.8254310344827587, "grad_norm": 0.04546307399868965, "learning_rate": 0.01, "loss": 2.0291, "step": 17787 }, { "epoch": 1.8257389162561575, "grad_norm": 0.03415641188621521, "learning_rate": 0.01, "loss": 2.0391, "step": 17790 }, { "epoch": 1.8260467980295565, "grad_norm": 0.038325123488903046, "learning_rate": 0.01, "loss": 2.0249, "step": 17793 }, { "epoch": 1.8263546798029555, "grad_norm": 0.057417213916778564, "learning_rate": 0.01, "loss": 2.0465, "step": 17796 }, { "epoch": 1.8266625615763545, "grad_norm": 0.07312962412834167, "learning_rate": 0.01, "loss": 2.009, "step": 17799 }, { "epoch": 1.8269704433497536, "grad_norm": 0.06465096771717072, "learning_rate": 0.01, "loss": 2.0582, "step": 17802 }, { "epoch": 1.8272783251231526, "grad_norm": 0.049065001308918, "learning_rate": 0.01, "loss": 2.0466, "step": 17805 }, { "epoch": 1.8275862068965516, "grad_norm": 0.05004505068063736, "learning_rate": 0.01, "loss": 2.0368, "step": 17808 }, { "epoch": 1.8278940886699506, "grad_norm": 0.12177273631095886, "learning_rate": 0.01, "loss": 2.0299, "step": 17811 }, { "epoch": 1.8282019704433496, "grad_norm": 0.09006219357252121, "learning_rate": 0.01, "loss": 2.0442, "step": 17814 }, { "epoch": 1.8285098522167487, "grad_norm": 0.07000398635864258, "learning_rate": 0.01, "loss": 2.0346, "step": 17817 }, { "epoch": 1.8288177339901477, "grad_norm": 0.03561507910490036, "learning_rate": 0.01, "loss": 2.022, "step": 17820 }, { "epoch": 1.8291256157635467, "grad_norm": 0.050965216010808945, "learning_rate": 0.01, "loss": 2.0577, "step": 17823 }, { "epoch": 1.8294334975369457, "grad_norm": 0.04437123239040375, "learning_rate": 0.01, "loss": 2.0204, "step": 17826 }, { "epoch": 1.8297413793103448, "grad_norm": 0.046157170087099075, "learning_rate": 0.01, "loss": 2.0315, "step": 17829 }, { "epoch": 1.8300492610837438, "grad_norm": 0.0641985610127449, "learning_rate": 0.01, "loss": 2.0619, "step": 17832 }, { "epoch": 1.8303571428571428, "grad_norm": 0.10295763611793518, "learning_rate": 0.01, "loss": 2.0142, "step": 17835 }, { "epoch": 1.8306650246305418, "grad_norm": 0.08395816385746002, "learning_rate": 0.01, "loss": 2.0388, "step": 17838 }, { "epoch": 1.8309729064039408, "grad_norm": 0.07087874412536621, "learning_rate": 0.01, "loss": 2.0458, "step": 17841 }, { "epoch": 1.8312807881773399, "grad_norm": 0.04754515364766121, "learning_rate": 0.01, "loss": 2.0305, "step": 17844 }, { "epoch": 1.8315886699507389, "grad_norm": 0.042998362332582474, "learning_rate": 0.01, "loss": 2.0334, "step": 17847 }, { "epoch": 1.831896551724138, "grad_norm": 0.044786881655454636, "learning_rate": 0.01, "loss": 2.0545, "step": 17850 }, { "epoch": 1.832204433497537, "grad_norm": 0.05035366117954254, "learning_rate": 0.01, "loss": 2.0346, "step": 17853 }, { "epoch": 1.832512315270936, "grad_norm": 0.08760454505681992, "learning_rate": 0.01, "loss": 2.0407, "step": 17856 }, { "epoch": 1.832820197044335, "grad_norm": 0.07182349264621735, "learning_rate": 0.01, "loss": 2.0617, "step": 17859 }, { "epoch": 1.833128078817734, "grad_norm": 0.0653420239686966, "learning_rate": 0.01, "loss": 2.02, "step": 17862 }, { "epoch": 1.833435960591133, "grad_norm": 0.07664595544338226, "learning_rate": 0.01, "loss": 2.0453, "step": 17865 }, { "epoch": 1.833743842364532, "grad_norm": 0.052884750068187714, "learning_rate": 0.01, "loss": 2.0433, "step": 17868 }, { "epoch": 1.834051724137931, "grad_norm": 0.049432456493377686, "learning_rate": 0.01, "loss": 2.0392, "step": 17871 }, { "epoch": 1.83435960591133, "grad_norm": 0.10208621621131897, "learning_rate": 0.01, "loss": 2.0425, "step": 17874 }, { "epoch": 1.834667487684729, "grad_norm": 0.0663546770811081, "learning_rate": 0.01, "loss": 2.0276, "step": 17877 }, { "epoch": 1.8349753694581281, "grad_norm": 0.0952199399471283, "learning_rate": 0.01, "loss": 2.0273, "step": 17880 }, { "epoch": 1.8352832512315271, "grad_norm": 0.04969238117337227, "learning_rate": 0.01, "loss": 2.0227, "step": 17883 }, { "epoch": 1.8355911330049262, "grad_norm": 0.05101123824715614, "learning_rate": 0.01, "loss": 2.0642, "step": 17886 }, { "epoch": 1.8358990147783252, "grad_norm": 0.1026005819439888, "learning_rate": 0.01, "loss": 2.0118, "step": 17889 }, { "epoch": 1.8362068965517242, "grad_norm": 0.06481184810400009, "learning_rate": 0.01, "loss": 2.0457, "step": 17892 }, { "epoch": 1.8365147783251232, "grad_norm": 0.0684402734041214, "learning_rate": 0.01, "loss": 2.0364, "step": 17895 }, { "epoch": 1.8368226600985222, "grad_norm": 0.1051085963845253, "learning_rate": 0.01, "loss": 2.0178, "step": 17898 }, { "epoch": 1.8371305418719213, "grad_norm": 0.06582857668399811, "learning_rate": 0.01, "loss": 2.0409, "step": 17901 }, { "epoch": 1.8374384236453203, "grad_norm": 0.05665391683578491, "learning_rate": 0.01, "loss": 2.04, "step": 17904 }, { "epoch": 1.8377463054187193, "grad_norm": 0.06239892914891243, "learning_rate": 0.01, "loss": 2.0199, "step": 17907 }, { "epoch": 1.8380541871921183, "grad_norm": 0.08531507849693298, "learning_rate": 0.01, "loss": 2.0429, "step": 17910 }, { "epoch": 1.8383620689655173, "grad_norm": 0.07379250973463058, "learning_rate": 0.01, "loss": 2.0226, "step": 17913 }, { "epoch": 1.8386699507389164, "grad_norm": 0.052789974957704544, "learning_rate": 0.01, "loss": 2.0198, "step": 17916 }, { "epoch": 1.8389778325123154, "grad_norm": 0.09525316208600998, "learning_rate": 0.01, "loss": 2.0423, "step": 17919 }, { "epoch": 1.8392857142857144, "grad_norm": 0.05700648948550224, "learning_rate": 0.01, "loss": 2.0332, "step": 17922 }, { "epoch": 1.8395935960591134, "grad_norm": 0.061519671231508255, "learning_rate": 0.01, "loss": 2.038, "step": 17925 }, { "epoch": 1.8399014778325125, "grad_norm": 0.05594256520271301, "learning_rate": 0.01, "loss": 2.0247, "step": 17928 }, { "epoch": 1.8402093596059115, "grad_norm": 0.06823567301034927, "learning_rate": 0.01, "loss": 2.0319, "step": 17931 }, { "epoch": 1.8405172413793105, "grad_norm": 0.061398666352033615, "learning_rate": 0.01, "loss": 2.038, "step": 17934 }, { "epoch": 1.8408251231527095, "grad_norm": 0.10590513050556183, "learning_rate": 0.01, "loss": 2.0336, "step": 17937 }, { "epoch": 1.8411330049261085, "grad_norm": 0.0579022578895092, "learning_rate": 0.01, "loss": 2.0249, "step": 17940 }, { "epoch": 1.8414408866995073, "grad_norm": 0.07047640532255173, "learning_rate": 0.01, "loss": 2.0147, "step": 17943 }, { "epoch": 1.8417487684729064, "grad_norm": 0.07486578077077866, "learning_rate": 0.01, "loss": 2.0413, "step": 17946 }, { "epoch": 1.8420566502463054, "grad_norm": 0.057884715497493744, "learning_rate": 0.01, "loss": 2.0357, "step": 17949 }, { "epoch": 1.8423645320197044, "grad_norm": 0.10381656140089035, "learning_rate": 0.01, "loss": 2.0382, "step": 17952 }, { "epoch": 1.8426724137931034, "grad_norm": 0.041863467544317245, "learning_rate": 0.01, "loss": 2.0345, "step": 17955 }, { "epoch": 1.8429802955665024, "grad_norm": 0.10012530535459518, "learning_rate": 0.01, "loss": 2.0648, "step": 17958 }, { "epoch": 1.8432881773399015, "grad_norm": 0.05597177520394325, "learning_rate": 0.01, "loss": 2.0513, "step": 17961 }, { "epoch": 1.8435960591133005, "grad_norm": 0.05338521674275398, "learning_rate": 0.01, "loss": 2.0287, "step": 17964 }, { "epoch": 1.8439039408866995, "grad_norm": 0.049141060560941696, "learning_rate": 0.01, "loss": 2.0486, "step": 17967 }, { "epoch": 1.8442118226600985, "grad_norm": 0.0784049779176712, "learning_rate": 0.01, "loss": 2.0176, "step": 17970 }, { "epoch": 1.8445197044334976, "grad_norm": 0.038596317172050476, "learning_rate": 0.01, "loss": 2.0167, "step": 17973 }, { "epoch": 1.8448275862068966, "grad_norm": 0.08521022647619247, "learning_rate": 0.01, "loss": 2.0364, "step": 17976 }, { "epoch": 1.8451354679802956, "grad_norm": 0.05890432372689247, "learning_rate": 0.01, "loss": 2.062, "step": 17979 }, { "epoch": 1.8454433497536946, "grad_norm": 0.09090931713581085, "learning_rate": 0.01, "loss": 2.0514, "step": 17982 }, { "epoch": 1.8457512315270936, "grad_norm": 0.06019595265388489, "learning_rate": 0.01, "loss": 2.0463, "step": 17985 }, { "epoch": 1.8460591133004927, "grad_norm": 0.07712443917989731, "learning_rate": 0.01, "loss": 2.0466, "step": 17988 }, { "epoch": 1.8463669950738915, "grad_norm": 0.06155428662896156, "learning_rate": 0.01, "loss": 2.0224, "step": 17991 }, { "epoch": 1.8466748768472905, "grad_norm": 0.07221681624650955, "learning_rate": 0.01, "loss": 2.0128, "step": 17994 }, { "epoch": 1.8469827586206895, "grad_norm": 0.056776583194732666, "learning_rate": 0.01, "loss": 2.0156, "step": 17997 }, { "epoch": 1.8472906403940885, "grad_norm": 0.12099254876375198, "learning_rate": 0.01, "loss": 2.0522, "step": 18000 }, { "epoch": 1.8475985221674875, "grad_norm": 0.060344647616147995, "learning_rate": 0.01, "loss": 2.038, "step": 18003 }, { "epoch": 1.8479064039408866, "grad_norm": 0.042333200573921204, "learning_rate": 0.01, "loss": 2.0202, "step": 18006 }, { "epoch": 1.8482142857142856, "grad_norm": 0.046059176325798035, "learning_rate": 0.01, "loss": 2.0505, "step": 18009 }, { "epoch": 1.8485221674876846, "grad_norm": 0.03853166475892067, "learning_rate": 0.01, "loss": 2.0681, "step": 18012 }, { "epoch": 1.8488300492610836, "grad_norm": 0.05197960138320923, "learning_rate": 0.01, "loss": 2.0563, "step": 18015 }, { "epoch": 1.8491379310344827, "grad_norm": 0.03316551446914673, "learning_rate": 0.01, "loss": 2.0279, "step": 18018 }, { "epoch": 1.8494458128078817, "grad_norm": 0.05977516993880272, "learning_rate": 0.01, "loss": 2.0217, "step": 18021 }, { "epoch": 1.8497536945812807, "grad_norm": 0.12331486493349075, "learning_rate": 0.01, "loss": 2.0531, "step": 18024 }, { "epoch": 1.8500615763546797, "grad_norm": 0.08065730333328247, "learning_rate": 0.01, "loss": 2.0508, "step": 18027 }, { "epoch": 1.8503694581280787, "grad_norm": 0.07649014890193939, "learning_rate": 0.01, "loss": 2.0629, "step": 18030 }, { "epoch": 1.8506773399014778, "grad_norm": 0.08431357145309448, "learning_rate": 0.01, "loss": 2.0504, "step": 18033 }, { "epoch": 1.8509852216748768, "grad_norm": 0.041856877505779266, "learning_rate": 0.01, "loss": 2.0376, "step": 18036 }, { "epoch": 1.8512931034482758, "grad_norm": 0.03598650172352791, "learning_rate": 0.01, "loss": 2.0455, "step": 18039 }, { "epoch": 1.8516009852216748, "grad_norm": 0.0360511913895607, "learning_rate": 0.01, "loss": 2.044, "step": 18042 }, { "epoch": 1.8519088669950738, "grad_norm": 0.0409335158765316, "learning_rate": 0.01, "loss": 2.0307, "step": 18045 }, { "epoch": 1.8522167487684729, "grad_norm": 0.04646136611700058, "learning_rate": 0.01, "loss": 2.0306, "step": 18048 }, { "epoch": 1.8525246305418719, "grad_norm": 0.08265028148889542, "learning_rate": 0.01, "loss": 2.0325, "step": 18051 }, { "epoch": 1.852832512315271, "grad_norm": 0.08118387311697006, "learning_rate": 0.01, "loss": 2.0315, "step": 18054 }, { "epoch": 1.85314039408867, "grad_norm": 0.05400428548455238, "learning_rate": 0.01, "loss": 2.0186, "step": 18057 }, { "epoch": 1.853448275862069, "grad_norm": 0.04605553671717644, "learning_rate": 0.01, "loss": 2.0212, "step": 18060 }, { "epoch": 1.853756157635468, "grad_norm": 0.06259449571371078, "learning_rate": 0.01, "loss": 2.0443, "step": 18063 }, { "epoch": 1.854064039408867, "grad_norm": 0.04901091381907463, "learning_rate": 0.01, "loss": 2.023, "step": 18066 }, { "epoch": 1.854371921182266, "grad_norm": 0.13033097982406616, "learning_rate": 0.01, "loss": 2.0696, "step": 18069 }, { "epoch": 1.854679802955665, "grad_norm": 0.04114639014005661, "learning_rate": 0.01, "loss": 2.0314, "step": 18072 }, { "epoch": 1.854987684729064, "grad_norm": 0.05269275978207588, "learning_rate": 0.01, "loss": 2.0576, "step": 18075 }, { "epoch": 1.855295566502463, "grad_norm": 0.052419982850551605, "learning_rate": 0.01, "loss": 2.0309, "step": 18078 }, { "epoch": 1.855603448275862, "grad_norm": 0.11109264940023422, "learning_rate": 0.01, "loss": 2.0234, "step": 18081 }, { "epoch": 1.8559113300492611, "grad_norm": 0.09544682502746582, "learning_rate": 0.01, "loss": 2.0287, "step": 18084 }, { "epoch": 1.8562192118226601, "grad_norm": 0.08282047510147095, "learning_rate": 0.01, "loss": 2.0601, "step": 18087 }, { "epoch": 1.8565270935960592, "grad_norm": 0.04255926236510277, "learning_rate": 0.01, "loss": 2.0094, "step": 18090 }, { "epoch": 1.8568349753694582, "grad_norm": 0.04899003729224205, "learning_rate": 0.01, "loss": 2.0494, "step": 18093 }, { "epoch": 1.8571428571428572, "grad_norm": 0.05081721395254135, "learning_rate": 0.01, "loss": 2.0309, "step": 18096 }, { "epoch": 1.8574507389162562, "grad_norm": 0.06633096188306808, "learning_rate": 0.01, "loss": 2.0526, "step": 18099 }, { "epoch": 1.8577586206896552, "grad_norm": 0.06513489037752151, "learning_rate": 0.01, "loss": 2.0072, "step": 18102 }, { "epoch": 1.8580665024630543, "grad_norm": 0.09429512917995453, "learning_rate": 0.01, "loss": 2.0522, "step": 18105 }, { "epoch": 1.8583743842364533, "grad_norm": 0.06042760610580444, "learning_rate": 0.01, "loss": 2.0613, "step": 18108 }, { "epoch": 1.8586822660098523, "grad_norm": 0.04098260775208473, "learning_rate": 0.01, "loss": 2.0712, "step": 18111 }, { "epoch": 1.8589901477832513, "grad_norm": 0.04882989823818207, "learning_rate": 0.01, "loss": 2.0308, "step": 18114 }, { "epoch": 1.8592980295566504, "grad_norm": 0.06196373701095581, "learning_rate": 0.01, "loss": 2.0535, "step": 18117 }, { "epoch": 1.8596059113300494, "grad_norm": 0.10515942424535751, "learning_rate": 0.01, "loss": 2.0402, "step": 18120 }, { "epoch": 1.8599137931034484, "grad_norm": 0.08962828665971756, "learning_rate": 0.01, "loss": 2.048, "step": 18123 }, { "epoch": 1.8602216748768474, "grad_norm": 0.07672600448131561, "learning_rate": 0.01, "loss": 2.0502, "step": 18126 }, { "epoch": 1.8605295566502464, "grad_norm": 0.04769902676343918, "learning_rate": 0.01, "loss": 2.0444, "step": 18129 }, { "epoch": 1.8608374384236455, "grad_norm": 0.06558270752429962, "learning_rate": 0.01, "loss": 2.0431, "step": 18132 }, { "epoch": 1.8611453201970445, "grad_norm": 0.06909210234880447, "learning_rate": 0.01, "loss": 2.0429, "step": 18135 }, { "epoch": 1.8614532019704435, "grad_norm": 0.07571686059236526, "learning_rate": 0.01, "loss": 2.045, "step": 18138 }, { "epoch": 1.8617610837438425, "grad_norm": 0.11170367896556854, "learning_rate": 0.01, "loss": 2.05, "step": 18141 }, { "epoch": 1.8620689655172413, "grad_norm": 0.12978370487689972, "learning_rate": 0.01, "loss": 2.0621, "step": 18144 }, { "epoch": 1.8623768472906403, "grad_norm": 0.056673552840948105, "learning_rate": 0.01, "loss": 2.0264, "step": 18147 }, { "epoch": 1.8626847290640394, "grad_norm": 0.04110763967037201, "learning_rate": 0.01, "loss": 2.0478, "step": 18150 }, { "epoch": 1.8629926108374384, "grad_norm": 0.06502550840377808, "learning_rate": 0.01, "loss": 2.0404, "step": 18153 }, { "epoch": 1.8633004926108374, "grad_norm": 0.059242501854896545, "learning_rate": 0.01, "loss": 2.0545, "step": 18156 }, { "epoch": 1.8636083743842364, "grad_norm": 0.05173099413514137, "learning_rate": 0.01, "loss": 2.0327, "step": 18159 }, { "epoch": 1.8639162561576355, "grad_norm": 0.0403546541929245, "learning_rate": 0.01, "loss": 2.037, "step": 18162 }, { "epoch": 1.8642241379310345, "grad_norm": 0.10920348763465881, "learning_rate": 0.01, "loss": 2.04, "step": 18165 }, { "epoch": 1.8645320197044335, "grad_norm": 0.05528813973069191, "learning_rate": 0.01, "loss": 2.0207, "step": 18168 }, { "epoch": 1.8648399014778325, "grad_norm": 0.04583175480365753, "learning_rate": 0.01, "loss": 2.0409, "step": 18171 }, { "epoch": 1.8651477832512315, "grad_norm": 0.04503155127167702, "learning_rate": 0.01, "loss": 2.0412, "step": 18174 }, { "epoch": 1.8654556650246306, "grad_norm": 0.038750261068344116, "learning_rate": 0.01, "loss": 2.0412, "step": 18177 }, { "epoch": 1.8657635467980296, "grad_norm": 0.0650535374879837, "learning_rate": 0.01, "loss": 2.0515, "step": 18180 }, { "epoch": 1.8660714285714286, "grad_norm": 0.08589319884777069, "learning_rate": 0.01, "loss": 2.0456, "step": 18183 }, { "epoch": 1.8663793103448276, "grad_norm": 0.06245085969567299, "learning_rate": 0.01, "loss": 2.0257, "step": 18186 }, { "epoch": 1.8666871921182266, "grad_norm": 0.07419238984584808, "learning_rate": 0.01, "loss": 2.0501, "step": 18189 }, { "epoch": 1.8669950738916257, "grad_norm": 0.06336040049791336, "learning_rate": 0.01, "loss": 2.0822, "step": 18192 }, { "epoch": 1.8673029556650245, "grad_norm": 0.09494315087795258, "learning_rate": 0.01, "loss": 2.0523, "step": 18195 }, { "epoch": 1.8676108374384235, "grad_norm": 0.06543273478746414, "learning_rate": 0.01, "loss": 2.0289, "step": 18198 }, { "epoch": 1.8679187192118225, "grad_norm": 0.05602452531456947, "learning_rate": 0.01, "loss": 2.0626, "step": 18201 }, { "epoch": 1.8682266009852215, "grad_norm": 0.041735779494047165, "learning_rate": 0.01, "loss": 2.0137, "step": 18204 }, { "epoch": 1.8685344827586206, "grad_norm": 0.03998032957315445, "learning_rate": 0.01, "loss": 2.0341, "step": 18207 }, { "epoch": 1.8688423645320196, "grad_norm": 0.07010776549577713, "learning_rate": 0.01, "loss": 2.0525, "step": 18210 }, { "epoch": 1.8691502463054186, "grad_norm": 0.08516181260347366, "learning_rate": 0.01, "loss": 2.0065, "step": 18213 }, { "epoch": 1.8694581280788176, "grad_norm": 0.08233955502510071, "learning_rate": 0.01, "loss": 2.0391, "step": 18216 }, { "epoch": 1.8697660098522166, "grad_norm": 0.06725854426622391, "learning_rate": 0.01, "loss": 2.0224, "step": 18219 }, { "epoch": 1.8700738916256157, "grad_norm": 0.03345496207475662, "learning_rate": 0.01, "loss": 2.0327, "step": 18222 }, { "epoch": 1.8703817733990147, "grad_norm": 0.09758662432432175, "learning_rate": 0.01, "loss": 2.0258, "step": 18225 }, { "epoch": 1.8706896551724137, "grad_norm": 0.05376002565026283, "learning_rate": 0.01, "loss": 2.0592, "step": 18228 }, { "epoch": 1.8709975369458127, "grad_norm": 0.04620193690061569, "learning_rate": 0.01, "loss": 2.023, "step": 18231 }, { "epoch": 1.8713054187192117, "grad_norm": 0.0853218212723732, "learning_rate": 0.01, "loss": 2.0352, "step": 18234 }, { "epoch": 1.8716133004926108, "grad_norm": 0.15689584612846375, "learning_rate": 0.01, "loss": 2.0163, "step": 18237 }, { "epoch": 1.8719211822660098, "grad_norm": 0.05037194490432739, "learning_rate": 0.01, "loss": 2.0541, "step": 18240 }, { "epoch": 1.8722290640394088, "grad_norm": 0.031591251492500305, "learning_rate": 0.01, "loss": 2.0502, "step": 18243 }, { "epoch": 1.8725369458128078, "grad_norm": 0.05832947790622711, "learning_rate": 0.01, "loss": 2.0284, "step": 18246 }, { "epoch": 1.8728448275862069, "grad_norm": 0.059619709849357605, "learning_rate": 0.01, "loss": 2.053, "step": 18249 }, { "epoch": 1.8731527093596059, "grad_norm": 0.04232211783528328, "learning_rate": 0.01, "loss": 2.0456, "step": 18252 }, { "epoch": 1.873460591133005, "grad_norm": 0.03756287693977356, "learning_rate": 0.01, "loss": 2.0183, "step": 18255 }, { "epoch": 1.873768472906404, "grad_norm": 0.04177022725343704, "learning_rate": 0.01, "loss": 2.0213, "step": 18258 }, { "epoch": 1.874076354679803, "grad_norm": 0.054342493414878845, "learning_rate": 0.01, "loss": 2.0246, "step": 18261 }, { "epoch": 1.874384236453202, "grad_norm": 0.11872408539056778, "learning_rate": 0.01, "loss": 2.0259, "step": 18264 }, { "epoch": 1.874692118226601, "grad_norm": 0.05143645405769348, "learning_rate": 0.01, "loss": 2.0473, "step": 18267 }, { "epoch": 1.875, "grad_norm": 0.06726546585559845, "learning_rate": 0.01, "loss": 2.0413, "step": 18270 }, { "epoch": 1.875307881773399, "grad_norm": 0.10031245648860931, "learning_rate": 0.01, "loss": 2.0315, "step": 18273 }, { "epoch": 1.875615763546798, "grad_norm": 0.09145006537437439, "learning_rate": 0.01, "loss": 2.0359, "step": 18276 }, { "epoch": 1.875923645320197, "grad_norm": 0.0797610953450203, "learning_rate": 0.01, "loss": 2.0507, "step": 18279 }, { "epoch": 1.876231527093596, "grad_norm": 0.07170062512159348, "learning_rate": 0.01, "loss": 2.0369, "step": 18282 }, { "epoch": 1.876539408866995, "grad_norm": 0.043757934123277664, "learning_rate": 0.01, "loss": 2.0825, "step": 18285 }, { "epoch": 1.8768472906403941, "grad_norm": 0.059610821306705475, "learning_rate": 0.01, "loss": 2.0275, "step": 18288 }, { "epoch": 1.8771551724137931, "grad_norm": 0.06014898791909218, "learning_rate": 0.01, "loss": 2.0498, "step": 18291 }, { "epoch": 1.8774630541871922, "grad_norm": 0.0823112353682518, "learning_rate": 0.01, "loss": 1.9977, "step": 18294 }, { "epoch": 1.8777709359605912, "grad_norm": 0.11342828720808029, "learning_rate": 0.01, "loss": 2.0259, "step": 18297 }, { "epoch": 1.8780788177339902, "grad_norm": 0.1533091962337494, "learning_rate": 0.01, "loss": 2.0593, "step": 18300 }, { "epoch": 1.8783866995073892, "grad_norm": 0.09665971249341965, "learning_rate": 0.01, "loss": 2.0304, "step": 18303 }, { "epoch": 1.8786945812807883, "grad_norm": 0.043992988765239716, "learning_rate": 0.01, "loss": 2.0412, "step": 18306 }, { "epoch": 1.8790024630541873, "grad_norm": 0.03452041372656822, "learning_rate": 0.01, "loss": 2.009, "step": 18309 }, { "epoch": 1.8793103448275863, "grad_norm": 0.05596618726849556, "learning_rate": 0.01, "loss": 2.0498, "step": 18312 }, { "epoch": 1.8796182266009853, "grad_norm": 0.0542016327381134, "learning_rate": 0.01, "loss": 2.033, "step": 18315 }, { "epoch": 1.8799261083743843, "grad_norm": 0.049744654446840286, "learning_rate": 0.01, "loss": 2.0625, "step": 18318 }, { "epoch": 1.8802339901477834, "grad_norm": 0.04219472035765648, "learning_rate": 0.01, "loss": 2.0458, "step": 18321 }, { "epoch": 1.8805418719211824, "grad_norm": 0.11828272044658661, "learning_rate": 0.01, "loss": 2.047, "step": 18324 }, { "epoch": 1.8808497536945814, "grad_norm": 0.042180564254522324, "learning_rate": 0.01, "loss": 2.0477, "step": 18327 }, { "epoch": 1.8811576354679804, "grad_norm": 0.05486786365509033, "learning_rate": 0.01, "loss": 2.034, "step": 18330 }, { "epoch": 1.8814655172413794, "grad_norm": 0.09456659108400345, "learning_rate": 0.01, "loss": 2.059, "step": 18333 }, { "epoch": 1.8817733990147785, "grad_norm": 0.03962776064872742, "learning_rate": 0.01, "loss": 2.0569, "step": 18336 }, { "epoch": 1.8820812807881775, "grad_norm": 0.06588723510503769, "learning_rate": 0.01, "loss": 2.0131, "step": 18339 }, { "epoch": 1.8823891625615765, "grad_norm": 0.0490611270070076, "learning_rate": 0.01, "loss": 2.0558, "step": 18342 }, { "epoch": 1.8826970443349755, "grad_norm": 0.10546906292438507, "learning_rate": 0.01, "loss": 2.0354, "step": 18345 }, { "epoch": 1.8830049261083743, "grad_norm": 0.05751054733991623, "learning_rate": 0.01, "loss": 2.0321, "step": 18348 }, { "epoch": 1.8833128078817734, "grad_norm": 0.102676160633564, "learning_rate": 0.01, "loss": 2.0116, "step": 18351 }, { "epoch": 1.8836206896551724, "grad_norm": 0.0536825954914093, "learning_rate": 0.01, "loss": 2.0677, "step": 18354 }, { "epoch": 1.8839285714285714, "grad_norm": 0.039357978850603104, "learning_rate": 0.01, "loss": 2.0217, "step": 18357 }, { "epoch": 1.8842364532019704, "grad_norm": 0.03515158221125603, "learning_rate": 0.01, "loss": 2.0332, "step": 18360 }, { "epoch": 1.8845443349753694, "grad_norm": 0.04234091565012932, "learning_rate": 0.01, "loss": 2.0329, "step": 18363 }, { "epoch": 1.8848522167487685, "grad_norm": 0.06893119215965271, "learning_rate": 0.01, "loss": 2.0241, "step": 18366 }, { "epoch": 1.8851600985221675, "grad_norm": 0.08353175222873688, "learning_rate": 0.01, "loss": 2.0236, "step": 18369 }, { "epoch": 1.8854679802955665, "grad_norm": 0.0567467026412487, "learning_rate": 0.01, "loss": 2.0672, "step": 18372 }, { "epoch": 1.8857758620689655, "grad_norm": 0.05529101565480232, "learning_rate": 0.01, "loss": 2.0364, "step": 18375 }, { "epoch": 1.8860837438423645, "grad_norm": 0.09581262618303299, "learning_rate": 0.01, "loss": 2.0395, "step": 18378 }, { "epoch": 1.8863916256157636, "grad_norm": 0.052034202963113785, "learning_rate": 0.01, "loss": 2.0415, "step": 18381 }, { "epoch": 1.8866995073891626, "grad_norm": 0.10314558446407318, "learning_rate": 0.01, "loss": 2.0095, "step": 18384 }, { "epoch": 1.8870073891625616, "grad_norm": 0.04631441831588745, "learning_rate": 0.01, "loss": 2.0504, "step": 18387 }, { "epoch": 1.8873152709359606, "grad_norm": 0.08067111670970917, "learning_rate": 0.01, "loss": 2.0504, "step": 18390 }, { "epoch": 1.8876231527093597, "grad_norm": 0.07892802357673645, "learning_rate": 0.01, "loss": 2.0495, "step": 18393 }, { "epoch": 1.8879310344827587, "grad_norm": 0.06167163327336311, "learning_rate": 0.01, "loss": 2.0546, "step": 18396 }, { "epoch": 1.8882389162561575, "grad_norm": 0.06746269762516022, "learning_rate": 0.01, "loss": 2.0187, "step": 18399 }, { "epoch": 1.8885467980295565, "grad_norm": 0.06389199942350388, "learning_rate": 0.01, "loss": 2.0556, "step": 18402 }, { "epoch": 1.8888546798029555, "grad_norm": 0.05448369309306145, "learning_rate": 0.01, "loss": 2.0607, "step": 18405 }, { "epoch": 1.8891625615763545, "grad_norm": 0.05535599961876869, "learning_rate": 0.01, "loss": 2.0347, "step": 18408 }, { "epoch": 1.8894704433497536, "grad_norm": 0.0533415786921978, "learning_rate": 0.01, "loss": 2.0648, "step": 18411 }, { "epoch": 1.8897783251231526, "grad_norm": 0.06423043459653854, "learning_rate": 0.01, "loss": 2.0554, "step": 18414 }, { "epoch": 1.8900862068965516, "grad_norm": 0.04682399705052376, "learning_rate": 0.01, "loss": 2.0402, "step": 18417 }, { "epoch": 1.8903940886699506, "grad_norm": 0.09395507723093033, "learning_rate": 0.01, "loss": 2.0417, "step": 18420 }, { "epoch": 1.8907019704433496, "grad_norm": 0.07777848839759827, "learning_rate": 0.01, "loss": 2.068, "step": 18423 }, { "epoch": 1.8910098522167487, "grad_norm": 0.052006904035806656, "learning_rate": 0.01, "loss": 2.0232, "step": 18426 }, { "epoch": 1.8913177339901477, "grad_norm": 0.05325109511613846, "learning_rate": 0.01, "loss": 2.0292, "step": 18429 }, { "epoch": 1.8916256157635467, "grad_norm": 0.05496850982308388, "learning_rate": 0.01, "loss": 2.0531, "step": 18432 }, { "epoch": 1.8919334975369457, "grad_norm": 0.0395922027528286, "learning_rate": 0.01, "loss": 2.0504, "step": 18435 }, { "epoch": 1.8922413793103448, "grad_norm": 0.0664554312825203, "learning_rate": 0.01, "loss": 2.0555, "step": 18438 }, { "epoch": 1.8925492610837438, "grad_norm": 0.06475098431110382, "learning_rate": 0.01, "loss": 2.0361, "step": 18441 }, { "epoch": 1.8928571428571428, "grad_norm": 0.06291298568248749, "learning_rate": 0.01, "loss": 2.0083, "step": 18444 }, { "epoch": 1.8931650246305418, "grad_norm": 0.07891640067100525, "learning_rate": 0.01, "loss": 2.0338, "step": 18447 }, { "epoch": 1.8934729064039408, "grad_norm": 0.10065143555402756, "learning_rate": 0.01, "loss": 2.0372, "step": 18450 }, { "epoch": 1.8937807881773399, "grad_norm": 0.0962565690279007, "learning_rate": 0.01, "loss": 2.0369, "step": 18453 }, { "epoch": 1.8940886699507389, "grad_norm": 0.06286763399839401, "learning_rate": 0.01, "loss": 2.0416, "step": 18456 }, { "epoch": 1.894396551724138, "grad_norm": 0.05067887529730797, "learning_rate": 0.01, "loss": 2.0582, "step": 18459 }, { "epoch": 1.894704433497537, "grad_norm": 0.03956342115998268, "learning_rate": 0.01, "loss": 2.0633, "step": 18462 }, { "epoch": 1.895012315270936, "grad_norm": 0.06670361012220383, "learning_rate": 0.01, "loss": 2.0569, "step": 18465 }, { "epoch": 1.895320197044335, "grad_norm": 0.07848145067691803, "learning_rate": 0.01, "loss": 2.037, "step": 18468 }, { "epoch": 1.895628078817734, "grad_norm": 0.05415938422083855, "learning_rate": 0.01, "loss": 2.0699, "step": 18471 }, { "epoch": 1.895935960591133, "grad_norm": 0.07549092918634415, "learning_rate": 0.01, "loss": 2.0369, "step": 18474 }, { "epoch": 1.896243842364532, "grad_norm": 0.057871196419000626, "learning_rate": 0.01, "loss": 2.0434, "step": 18477 }, { "epoch": 1.896551724137931, "grad_norm": 0.059748612344264984, "learning_rate": 0.01, "loss": 2.0374, "step": 18480 }, { "epoch": 1.89685960591133, "grad_norm": 0.04501016065478325, "learning_rate": 0.01, "loss": 2.034, "step": 18483 }, { "epoch": 1.897167487684729, "grad_norm": 0.06361118704080582, "learning_rate": 0.01, "loss": 2.0398, "step": 18486 }, { "epoch": 1.8974753694581281, "grad_norm": 0.09649393707513809, "learning_rate": 0.01, "loss": 2.0311, "step": 18489 }, { "epoch": 1.8977832512315271, "grad_norm": 0.08288730680942535, "learning_rate": 0.01, "loss": 2.0585, "step": 18492 }, { "epoch": 1.8980911330049262, "grad_norm": 0.037788692861795425, "learning_rate": 0.01, "loss": 2.028, "step": 18495 }, { "epoch": 1.8983990147783252, "grad_norm": 0.05678097531199455, "learning_rate": 0.01, "loss": 2.029, "step": 18498 }, { "epoch": 1.8987068965517242, "grad_norm": 0.05753886699676514, "learning_rate": 0.01, "loss": 2.0523, "step": 18501 }, { "epoch": 1.8990147783251232, "grad_norm": 0.0542941652238369, "learning_rate": 0.01, "loss": 2.0334, "step": 18504 }, { "epoch": 1.8993226600985222, "grad_norm": 0.06856728345155716, "learning_rate": 0.01, "loss": 2.0179, "step": 18507 }, { "epoch": 1.8996305418719213, "grad_norm": 0.09270088374614716, "learning_rate": 0.01, "loss": 2.0258, "step": 18510 }, { "epoch": 1.8999384236453203, "grad_norm": 0.04473109543323517, "learning_rate": 0.01, "loss": 2.0036, "step": 18513 }, { "epoch": 1.9002463054187193, "grad_norm": 0.06040007993578911, "learning_rate": 0.01, "loss": 2.0344, "step": 18516 }, { "epoch": 1.9005541871921183, "grad_norm": 0.032143257558345795, "learning_rate": 0.01, "loss": 2.0478, "step": 18519 }, { "epoch": 1.9008620689655173, "grad_norm": 0.04205821454524994, "learning_rate": 0.01, "loss": 2.0562, "step": 18522 }, { "epoch": 1.9011699507389164, "grad_norm": 0.03920583799481392, "learning_rate": 0.01, "loss": 2.0561, "step": 18525 }, { "epoch": 1.9014778325123154, "grad_norm": 0.17323125898838043, "learning_rate": 0.01, "loss": 2.0243, "step": 18528 }, { "epoch": 1.9017857142857144, "grad_norm": 0.04595707729458809, "learning_rate": 0.01, "loss": 2.0515, "step": 18531 }, { "epoch": 1.9020935960591134, "grad_norm": 0.03803316131234169, "learning_rate": 0.01, "loss": 2.0268, "step": 18534 }, { "epoch": 1.9024014778325125, "grad_norm": 0.04623658210039139, "learning_rate": 0.01, "loss": 2.0423, "step": 18537 }, { "epoch": 1.9027093596059115, "grad_norm": 0.04388248175382614, "learning_rate": 0.01, "loss": 2.0207, "step": 18540 }, { "epoch": 1.9030172413793105, "grad_norm": 0.03582540154457092, "learning_rate": 0.01, "loss": 2.0307, "step": 18543 }, { "epoch": 1.9033251231527095, "grad_norm": 0.033453166484832764, "learning_rate": 0.01, "loss": 2.0514, "step": 18546 }, { "epoch": 1.9036330049261085, "grad_norm": 0.04929531365633011, "learning_rate": 0.01, "loss": 2.022, "step": 18549 }, { "epoch": 1.9039408866995073, "grad_norm": 0.0575069934129715, "learning_rate": 0.01, "loss": 2.0136, "step": 18552 }, { "epoch": 1.9042487684729064, "grad_norm": 0.044883664697408676, "learning_rate": 0.01, "loss": 2.0267, "step": 18555 }, { "epoch": 1.9045566502463054, "grad_norm": 0.06335309147834778, "learning_rate": 0.01, "loss": 2.013, "step": 18558 }, { "epoch": 1.9048645320197044, "grad_norm": 0.07315582036972046, "learning_rate": 0.01, "loss": 2.0331, "step": 18561 }, { "epoch": 1.9051724137931034, "grad_norm": 0.08378446102142334, "learning_rate": 0.01, "loss": 2.0154, "step": 18564 }, { "epoch": 1.9054802955665024, "grad_norm": 0.09492503106594086, "learning_rate": 0.01, "loss": 2.0239, "step": 18567 }, { "epoch": 1.9057881773399015, "grad_norm": 0.0497819185256958, "learning_rate": 0.01, "loss": 2.0551, "step": 18570 }, { "epoch": 1.9060960591133005, "grad_norm": 0.06625241041183472, "learning_rate": 0.01, "loss": 2.0477, "step": 18573 }, { "epoch": 1.9064039408866995, "grad_norm": 0.13533645868301392, "learning_rate": 0.01, "loss": 2.0288, "step": 18576 }, { "epoch": 1.9067118226600985, "grad_norm": 0.129546657204628, "learning_rate": 0.01, "loss": 1.9943, "step": 18579 }, { "epoch": 1.9070197044334976, "grad_norm": 0.0862266942858696, "learning_rate": 0.01, "loss": 2.0273, "step": 18582 }, { "epoch": 1.9073275862068966, "grad_norm": 0.04262632504105568, "learning_rate": 0.01, "loss": 2.0289, "step": 18585 }, { "epoch": 1.9076354679802956, "grad_norm": 0.06536297500133514, "learning_rate": 0.01, "loss": 2.0453, "step": 18588 }, { "epoch": 1.9079433497536946, "grad_norm": 0.04408801719546318, "learning_rate": 0.01, "loss": 2.045, "step": 18591 }, { "epoch": 1.9082512315270936, "grad_norm": 0.0382089763879776, "learning_rate": 0.01, "loss": 2.057, "step": 18594 }, { "epoch": 1.9085591133004927, "grad_norm": 0.05695042014122009, "learning_rate": 0.01, "loss": 2.0468, "step": 18597 }, { "epoch": 1.9088669950738915, "grad_norm": 0.06890982389450073, "learning_rate": 0.01, "loss": 2.0631, "step": 18600 }, { "epoch": 1.9091748768472905, "grad_norm": 0.06517864018678665, "learning_rate": 0.01, "loss": 2.0288, "step": 18603 }, { "epoch": 1.9094827586206895, "grad_norm": 0.03709007799625397, "learning_rate": 0.01, "loss": 2.0579, "step": 18606 }, { "epoch": 1.9097906403940885, "grad_norm": 0.040355831384658813, "learning_rate": 0.01, "loss": 2.023, "step": 18609 }, { "epoch": 1.9100985221674875, "grad_norm": 0.08973202854394913, "learning_rate": 0.01, "loss": 2.0495, "step": 18612 }, { "epoch": 1.9104064039408866, "grad_norm": 0.08074682205915451, "learning_rate": 0.01, "loss": 2.0352, "step": 18615 }, { "epoch": 1.9107142857142856, "grad_norm": 0.07134959101676941, "learning_rate": 0.01, "loss": 2.0234, "step": 18618 }, { "epoch": 1.9110221674876846, "grad_norm": 0.10389960557222366, "learning_rate": 0.01, "loss": 2.0456, "step": 18621 }, { "epoch": 1.9113300492610836, "grad_norm": 0.052665699273347855, "learning_rate": 0.01, "loss": 2.019, "step": 18624 }, { "epoch": 1.9116379310344827, "grad_norm": 0.06355523318052292, "learning_rate": 0.01, "loss": 2.0328, "step": 18627 }, { "epoch": 1.9119458128078817, "grad_norm": 0.06806465983390808, "learning_rate": 0.01, "loss": 2.0364, "step": 18630 }, { "epoch": 1.9122536945812807, "grad_norm": 0.08892465382814407, "learning_rate": 0.01, "loss": 2.0436, "step": 18633 }, { "epoch": 1.9125615763546797, "grad_norm": 0.09806855767965317, "learning_rate": 0.01, "loss": 2.0494, "step": 18636 }, { "epoch": 1.9128694581280787, "grad_norm": 0.036283593624830246, "learning_rate": 0.01, "loss": 2.03, "step": 18639 }, { "epoch": 1.9131773399014778, "grad_norm": 0.06654248386621475, "learning_rate": 0.01, "loss": 2.0177, "step": 18642 }, { "epoch": 1.9134852216748768, "grad_norm": 0.07729227095842361, "learning_rate": 0.01, "loss": 2.041, "step": 18645 }, { "epoch": 1.9137931034482758, "grad_norm": 0.05296695604920387, "learning_rate": 0.01, "loss": 2.0512, "step": 18648 }, { "epoch": 1.9141009852216748, "grad_norm": 0.05579183250665665, "learning_rate": 0.01, "loss": 2.0171, "step": 18651 }, { "epoch": 1.9144088669950738, "grad_norm": 0.04230615124106407, "learning_rate": 0.01, "loss": 2.0178, "step": 18654 }, { "epoch": 1.9147167487684729, "grad_norm": 0.0412709042429924, "learning_rate": 0.01, "loss": 2.0356, "step": 18657 }, { "epoch": 1.9150246305418719, "grad_norm": 0.056640543043613434, "learning_rate": 0.01, "loss": 2.0506, "step": 18660 }, { "epoch": 1.915332512315271, "grad_norm": 0.04353609308600426, "learning_rate": 0.01, "loss": 2.0703, "step": 18663 }, { "epoch": 1.91564039408867, "grad_norm": 0.04212663322687149, "learning_rate": 0.01, "loss": 2.0333, "step": 18666 }, { "epoch": 1.915948275862069, "grad_norm": 0.07639022916555405, "learning_rate": 0.01, "loss": 2.0267, "step": 18669 }, { "epoch": 1.916256157635468, "grad_norm": 0.09405479580163956, "learning_rate": 0.01, "loss": 2.0532, "step": 18672 }, { "epoch": 1.916564039408867, "grad_norm": 0.07501058280467987, "learning_rate": 0.01, "loss": 2.0281, "step": 18675 }, { "epoch": 1.916871921182266, "grad_norm": 0.06343735009431839, "learning_rate": 0.01, "loss": 2.0387, "step": 18678 }, { "epoch": 1.917179802955665, "grad_norm": 0.07794613391160965, "learning_rate": 0.01, "loss": 2.0189, "step": 18681 }, { "epoch": 1.917487684729064, "grad_norm": 0.1063399538397789, "learning_rate": 0.01, "loss": 2.0522, "step": 18684 }, { "epoch": 1.917795566502463, "grad_norm": 0.06528618931770325, "learning_rate": 0.01, "loss": 2.0411, "step": 18687 }, { "epoch": 1.918103448275862, "grad_norm": 0.063084177672863, "learning_rate": 0.01, "loss": 2.0405, "step": 18690 }, { "epoch": 1.9184113300492611, "grad_norm": 0.06663991510868073, "learning_rate": 0.01, "loss": 2.0547, "step": 18693 }, { "epoch": 1.9187192118226601, "grad_norm": 0.09827464818954468, "learning_rate": 0.01, "loss": 2.0285, "step": 18696 }, { "epoch": 1.9190270935960592, "grad_norm": 0.052305273711681366, "learning_rate": 0.01, "loss": 2.0205, "step": 18699 }, { "epoch": 1.9193349753694582, "grad_norm": 0.07126889377832413, "learning_rate": 0.01, "loss": 2.0331, "step": 18702 }, { "epoch": 1.9196428571428572, "grad_norm": 0.06262009590864182, "learning_rate": 0.01, "loss": 2.0217, "step": 18705 }, { "epoch": 1.9199507389162562, "grad_norm": 0.056034356355667114, "learning_rate": 0.01, "loss": 2.0136, "step": 18708 }, { "epoch": 1.9202586206896552, "grad_norm": 0.07673577219247818, "learning_rate": 0.01, "loss": 2.0617, "step": 18711 }, { "epoch": 1.9205665024630543, "grad_norm": 0.06006854772567749, "learning_rate": 0.01, "loss": 2.0493, "step": 18714 }, { "epoch": 1.9208743842364533, "grad_norm": 0.07149071991443634, "learning_rate": 0.01, "loss": 2.0339, "step": 18717 }, { "epoch": 1.9211822660098523, "grad_norm": 0.04935576766729355, "learning_rate": 0.01, "loss": 2.0228, "step": 18720 }, { "epoch": 1.9214901477832513, "grad_norm": 0.1052050068974495, "learning_rate": 0.01, "loss": 2.0384, "step": 18723 }, { "epoch": 1.9217980295566504, "grad_norm": 0.07589028030633926, "learning_rate": 0.01, "loss": 2.0349, "step": 18726 }, { "epoch": 1.9221059113300494, "grad_norm": 0.0862005278468132, "learning_rate": 0.01, "loss": 2.0357, "step": 18729 }, { "epoch": 1.9224137931034484, "grad_norm": 0.07210662961006165, "learning_rate": 0.01, "loss": 2.0344, "step": 18732 }, { "epoch": 1.9227216748768474, "grad_norm": 0.0924825370311737, "learning_rate": 0.01, "loss": 2.0069, "step": 18735 }, { "epoch": 1.9230295566502464, "grad_norm": 0.05819706991314888, "learning_rate": 0.01, "loss": 2.0266, "step": 18738 }, { "epoch": 1.9233374384236455, "grad_norm": 0.04784362018108368, "learning_rate": 0.01, "loss": 2.0633, "step": 18741 }, { "epoch": 1.9236453201970445, "grad_norm": 0.07216835021972656, "learning_rate": 0.01, "loss": 2.0107, "step": 18744 }, { "epoch": 1.9239532019704435, "grad_norm": 0.05539752170443535, "learning_rate": 0.01, "loss": 2.0222, "step": 18747 }, { "epoch": 1.9242610837438425, "grad_norm": 0.07037390768527985, "learning_rate": 0.01, "loss": 2.0331, "step": 18750 }, { "epoch": 1.9245689655172413, "grad_norm": 0.0941152572631836, "learning_rate": 0.01, "loss": 2.0432, "step": 18753 }, { "epoch": 1.9248768472906403, "grad_norm": 0.05315488949418068, "learning_rate": 0.01, "loss": 2.036, "step": 18756 }, { "epoch": 1.9251847290640394, "grad_norm": 0.04986554756760597, "learning_rate": 0.01, "loss": 2.0249, "step": 18759 }, { "epoch": 1.9254926108374384, "grad_norm": 0.0750490128993988, "learning_rate": 0.01, "loss": 2.0448, "step": 18762 }, { "epoch": 1.9258004926108374, "grad_norm": 0.13903938233852386, "learning_rate": 0.01, "loss": 2.0433, "step": 18765 }, { "epoch": 1.9261083743842364, "grad_norm": 0.08733932673931122, "learning_rate": 0.01, "loss": 2.0223, "step": 18768 }, { "epoch": 1.9264162561576355, "grad_norm": 0.04527903348207474, "learning_rate": 0.01, "loss": 2.0112, "step": 18771 }, { "epoch": 1.9267241379310345, "grad_norm": 0.036207813769578934, "learning_rate": 0.01, "loss": 2.0353, "step": 18774 }, { "epoch": 1.9270320197044335, "grad_norm": 0.04572034999728203, "learning_rate": 0.01, "loss": 2.0388, "step": 18777 }, { "epoch": 1.9273399014778325, "grad_norm": 0.03662864491343498, "learning_rate": 0.01, "loss": 2.0023, "step": 18780 }, { "epoch": 1.9276477832512315, "grad_norm": 0.12049257755279541, "learning_rate": 0.01, "loss": 2.027, "step": 18783 }, { "epoch": 1.9279556650246306, "grad_norm": 0.1005631759762764, "learning_rate": 0.01, "loss": 2.0537, "step": 18786 }, { "epoch": 1.9282635467980296, "grad_norm": 0.13943985104560852, "learning_rate": 0.01, "loss": 2.0129, "step": 18789 }, { "epoch": 1.9285714285714286, "grad_norm": 0.13312341272830963, "learning_rate": 0.01, "loss": 2.0196, "step": 18792 }, { "epoch": 1.9288793103448276, "grad_norm": 0.0517788864672184, "learning_rate": 0.01, "loss": 2.0291, "step": 18795 }, { "epoch": 1.9291871921182266, "grad_norm": 0.05525217577815056, "learning_rate": 0.01, "loss": 2.0707, "step": 18798 }, { "epoch": 1.9294950738916257, "grad_norm": 0.04876135662198067, "learning_rate": 0.01, "loss": 2.0441, "step": 18801 }, { "epoch": 1.9298029556650245, "grad_norm": 0.04944787919521332, "learning_rate": 0.01, "loss": 2.0479, "step": 18804 }, { "epoch": 1.9301108374384235, "grad_norm": 0.06437812745571136, "learning_rate": 0.01, "loss": 2.0316, "step": 18807 }, { "epoch": 1.9304187192118225, "grad_norm": 0.04027709737420082, "learning_rate": 0.01, "loss": 2.0443, "step": 18810 }, { "epoch": 1.9307266009852215, "grad_norm": 0.05178974196314812, "learning_rate": 0.01, "loss": 2.0127, "step": 18813 }, { "epoch": 1.9310344827586206, "grad_norm": 0.05347009375691414, "learning_rate": 0.01, "loss": 2.0426, "step": 18816 }, { "epoch": 1.9313423645320196, "grad_norm": 0.03055960312485695, "learning_rate": 0.01, "loss": 2.0369, "step": 18819 }, { "epoch": 1.9316502463054186, "grad_norm": 0.10861945152282715, "learning_rate": 0.01, "loss": 2.0398, "step": 18822 }, { "epoch": 1.9319581280788176, "grad_norm": 0.05932777374982834, "learning_rate": 0.01, "loss": 2.0272, "step": 18825 }, { "epoch": 1.9322660098522166, "grad_norm": 0.046545740216970444, "learning_rate": 0.01, "loss": 2.0171, "step": 18828 }, { "epoch": 1.9325738916256157, "grad_norm": 0.07582221925258636, "learning_rate": 0.01, "loss": 2.0239, "step": 18831 }, { "epoch": 1.9328817733990147, "grad_norm": 0.07975540310144424, "learning_rate": 0.01, "loss": 2.0073, "step": 18834 }, { "epoch": 1.9331896551724137, "grad_norm": 0.07365059852600098, "learning_rate": 0.01, "loss": 2.0188, "step": 18837 }, { "epoch": 1.9334975369458127, "grad_norm": 0.09160298854112625, "learning_rate": 0.01, "loss": 2.027, "step": 18840 }, { "epoch": 1.9338054187192117, "grad_norm": 0.07767198234796524, "learning_rate": 0.01, "loss": 2.0157, "step": 18843 }, { "epoch": 1.9341133004926108, "grad_norm": 0.07545919716358185, "learning_rate": 0.01, "loss": 2.0334, "step": 18846 }, { "epoch": 1.9344211822660098, "grad_norm": 0.06564575433731079, "learning_rate": 0.01, "loss": 2.0199, "step": 18849 }, { "epoch": 1.9347290640394088, "grad_norm": 0.04205799475312233, "learning_rate": 0.01, "loss": 2.0275, "step": 18852 }, { "epoch": 1.9350369458128078, "grad_norm": 0.07428024709224701, "learning_rate": 0.01, "loss": 2.0258, "step": 18855 }, { "epoch": 1.9353448275862069, "grad_norm": 0.08150817453861237, "learning_rate": 0.01, "loss": 2.0404, "step": 18858 }, { "epoch": 1.9356527093596059, "grad_norm": 0.07489453256130219, "learning_rate": 0.01, "loss": 2.0489, "step": 18861 }, { "epoch": 1.935960591133005, "grad_norm": 0.09983116388320923, "learning_rate": 0.01, "loss": 2.0101, "step": 18864 }, { "epoch": 1.936268472906404, "grad_norm": 0.09074544906616211, "learning_rate": 0.01, "loss": 2.0385, "step": 18867 }, { "epoch": 1.936576354679803, "grad_norm": 0.056056223809719086, "learning_rate": 0.01, "loss": 2.0531, "step": 18870 }, { "epoch": 1.936884236453202, "grad_norm": 0.04818575084209442, "learning_rate": 0.01, "loss": 2.029, "step": 18873 }, { "epoch": 1.937192118226601, "grad_norm": 0.04811173304915428, "learning_rate": 0.01, "loss": 2.0147, "step": 18876 }, { "epoch": 1.9375, "grad_norm": 0.05799747258424759, "learning_rate": 0.01, "loss": 2.0466, "step": 18879 }, { "epoch": 1.937807881773399, "grad_norm": 0.07357611507177353, "learning_rate": 0.01, "loss": 2.0404, "step": 18882 }, { "epoch": 1.938115763546798, "grad_norm": 0.10275068879127502, "learning_rate": 0.01, "loss": 2.0468, "step": 18885 }, { "epoch": 1.938423645320197, "grad_norm": 0.03685866296291351, "learning_rate": 0.01, "loss": 2.0165, "step": 18888 }, { "epoch": 1.938731527093596, "grad_norm": 0.03603344038128853, "learning_rate": 0.01, "loss": 2.0403, "step": 18891 }, { "epoch": 1.939039408866995, "grad_norm": 0.06932532042264938, "learning_rate": 0.01, "loss": 2.0359, "step": 18894 }, { "epoch": 1.9393472906403941, "grad_norm": 0.05983889847993851, "learning_rate": 0.01, "loss": 2.0299, "step": 18897 }, { "epoch": 1.9396551724137931, "grad_norm": 0.1199260875582695, "learning_rate": 0.01, "loss": 2.0467, "step": 18900 }, { "epoch": 1.9399630541871922, "grad_norm": 0.06222264841198921, "learning_rate": 0.01, "loss": 2.0387, "step": 18903 }, { "epoch": 1.9402709359605912, "grad_norm": 0.0764993354678154, "learning_rate": 0.01, "loss": 2.0014, "step": 18906 }, { "epoch": 1.9405788177339902, "grad_norm": 0.04790098965167999, "learning_rate": 0.01, "loss": 2.0399, "step": 18909 }, { "epoch": 1.9408866995073892, "grad_norm": 0.03822425380349159, "learning_rate": 0.01, "loss": 2.0325, "step": 18912 }, { "epoch": 1.9411945812807883, "grad_norm": 0.05336176976561546, "learning_rate": 0.01, "loss": 2.0307, "step": 18915 }, { "epoch": 1.9415024630541873, "grad_norm": 0.08732246607542038, "learning_rate": 0.01, "loss": 2.0373, "step": 18918 }, { "epoch": 1.9418103448275863, "grad_norm": 0.08886411786079407, "learning_rate": 0.01, "loss": 2.0682, "step": 18921 }, { "epoch": 1.9421182266009853, "grad_norm": 0.08069706708192825, "learning_rate": 0.01, "loss": 2.0382, "step": 18924 }, { "epoch": 1.9424261083743843, "grad_norm": 0.08464798331260681, "learning_rate": 0.01, "loss": 2.0207, "step": 18927 }, { "epoch": 1.9427339901477834, "grad_norm": 0.07051963359117508, "learning_rate": 0.01, "loss": 2.0375, "step": 18930 }, { "epoch": 1.9430418719211824, "grad_norm": 0.037250157445669174, "learning_rate": 0.01, "loss": 2.0146, "step": 18933 }, { "epoch": 1.9433497536945814, "grad_norm": 0.07512888312339783, "learning_rate": 0.01, "loss": 2.0279, "step": 18936 }, { "epoch": 1.9436576354679804, "grad_norm": 0.12079732865095139, "learning_rate": 0.01, "loss": 2.0457, "step": 18939 }, { "epoch": 1.9439655172413794, "grad_norm": 0.0511600561439991, "learning_rate": 0.01, "loss": 2.0558, "step": 18942 }, { "epoch": 1.9442733990147785, "grad_norm": 0.06442293524742126, "learning_rate": 0.01, "loss": 2.07, "step": 18945 }, { "epoch": 1.9445812807881775, "grad_norm": 0.04928497597575188, "learning_rate": 0.01, "loss": 2.0088, "step": 18948 }, { "epoch": 1.9448891625615765, "grad_norm": 0.07882185280323029, "learning_rate": 0.01, "loss": 2.0192, "step": 18951 }, { "epoch": 1.9451970443349755, "grad_norm": 0.03649712726473808, "learning_rate": 0.01, "loss": 2.0319, "step": 18954 }, { "epoch": 1.9455049261083743, "grad_norm": 0.10654021054506302, "learning_rate": 0.01, "loss": 2.038, "step": 18957 }, { "epoch": 1.9458128078817734, "grad_norm": 0.09655455499887466, "learning_rate": 0.01, "loss": 2.0284, "step": 18960 }, { "epoch": 1.9461206896551724, "grad_norm": 0.06114486977458, "learning_rate": 0.01, "loss": 2.0578, "step": 18963 }, { "epoch": 1.9464285714285714, "grad_norm": 0.04167640954256058, "learning_rate": 0.01, "loss": 2.0229, "step": 18966 }, { "epoch": 1.9467364532019704, "grad_norm": 0.054138265550136566, "learning_rate": 0.01, "loss": 2.0439, "step": 18969 }, { "epoch": 1.9470443349753694, "grad_norm": 0.04728518798947334, "learning_rate": 0.01, "loss": 2.0485, "step": 18972 }, { "epoch": 1.9473522167487685, "grad_norm": 0.09992729872465134, "learning_rate": 0.01, "loss": 2.0581, "step": 18975 }, { "epoch": 1.9476600985221675, "grad_norm": 0.039344482123851776, "learning_rate": 0.01, "loss": 2.0213, "step": 18978 }, { "epoch": 1.9479679802955665, "grad_norm": 0.10921066254377365, "learning_rate": 0.01, "loss": 2.0538, "step": 18981 }, { "epoch": 1.9482758620689655, "grad_norm": 0.03921816125512123, "learning_rate": 0.01, "loss": 2.011, "step": 18984 }, { "epoch": 1.9485837438423645, "grad_norm": 0.08293361961841583, "learning_rate": 0.01, "loss": 2.0186, "step": 18987 }, { "epoch": 1.9488916256157636, "grad_norm": 0.08310680091381073, "learning_rate": 0.01, "loss": 2.065, "step": 18990 }, { "epoch": 1.9491995073891626, "grad_norm": 0.05105976015329361, "learning_rate": 0.01, "loss": 2.0461, "step": 18993 }, { "epoch": 1.9495073891625616, "grad_norm": 0.0387946255505085, "learning_rate": 0.01, "loss": 2.0227, "step": 18996 }, { "epoch": 1.9498152709359606, "grad_norm": 0.039592646062374115, "learning_rate": 0.01, "loss": 2.0409, "step": 18999 }, { "epoch": 1.9501231527093597, "grad_norm": 0.042499393224716187, "learning_rate": 0.01, "loss": 2.0388, "step": 19002 }, { "epoch": 1.9504310344827587, "grad_norm": 0.1202671155333519, "learning_rate": 0.01, "loss": 2.0185, "step": 19005 }, { "epoch": 1.9507389162561575, "grad_norm": 0.05047677457332611, "learning_rate": 0.01, "loss": 2.0534, "step": 19008 }, { "epoch": 1.9510467980295565, "grad_norm": 0.13210178911685944, "learning_rate": 0.01, "loss": 2.0502, "step": 19011 }, { "epoch": 1.9513546798029555, "grad_norm": 0.07093524187803268, "learning_rate": 0.01, "loss": 2.0426, "step": 19014 }, { "epoch": 1.9516625615763545, "grad_norm": 0.05528571456670761, "learning_rate": 0.01, "loss": 2.0348, "step": 19017 }, { "epoch": 1.9519704433497536, "grad_norm": 0.08988847583532333, "learning_rate": 0.01, "loss": 2.0214, "step": 19020 }, { "epoch": 1.9522783251231526, "grad_norm": 0.05767255648970604, "learning_rate": 0.01, "loss": 2.0335, "step": 19023 }, { "epoch": 1.9525862068965516, "grad_norm": 0.07641880214214325, "learning_rate": 0.01, "loss": 2.0278, "step": 19026 }, { "epoch": 1.9528940886699506, "grad_norm": 0.08751394599676132, "learning_rate": 0.01, "loss": 2.0298, "step": 19029 }, { "epoch": 1.9532019704433496, "grad_norm": 0.06144971400499344, "learning_rate": 0.01, "loss": 2.0433, "step": 19032 }, { "epoch": 1.9535098522167487, "grad_norm": 0.04502955824136734, "learning_rate": 0.01, "loss": 2.0442, "step": 19035 }, { "epoch": 1.9538177339901477, "grad_norm": 0.05031814053654671, "learning_rate": 0.01, "loss": 2.0491, "step": 19038 }, { "epoch": 1.9541256157635467, "grad_norm": 0.08885148912668228, "learning_rate": 0.01, "loss": 2.043, "step": 19041 }, { "epoch": 1.9544334975369457, "grad_norm": 0.05610232427716255, "learning_rate": 0.01, "loss": 2.0515, "step": 19044 }, { "epoch": 1.9547413793103448, "grad_norm": 0.07169227302074432, "learning_rate": 0.01, "loss": 2.0248, "step": 19047 }, { "epoch": 1.9550492610837438, "grad_norm": 0.07136573642492294, "learning_rate": 0.01, "loss": 2.0344, "step": 19050 }, { "epoch": 1.9553571428571428, "grad_norm": 0.07761941850185394, "learning_rate": 0.01, "loss": 2.0358, "step": 19053 }, { "epoch": 1.9556650246305418, "grad_norm": 0.062269363552331924, "learning_rate": 0.01, "loss": 2.0169, "step": 19056 }, { "epoch": 1.9559729064039408, "grad_norm": 0.08797021210193634, "learning_rate": 0.01, "loss": 1.9928, "step": 19059 }, { "epoch": 1.9562807881773399, "grad_norm": 0.04401189833879471, "learning_rate": 0.01, "loss": 2.0615, "step": 19062 }, { "epoch": 1.9565886699507389, "grad_norm": 0.08460985869169235, "learning_rate": 0.01, "loss": 2.0126, "step": 19065 }, { "epoch": 1.956896551724138, "grad_norm": 0.07027498632669449, "learning_rate": 0.01, "loss": 2.0038, "step": 19068 }, { "epoch": 1.957204433497537, "grad_norm": 0.06747457385063171, "learning_rate": 0.01, "loss": 2.0089, "step": 19071 }, { "epoch": 1.957512315270936, "grad_norm": 0.10890615731477737, "learning_rate": 0.01, "loss": 2.0418, "step": 19074 }, { "epoch": 1.957820197044335, "grad_norm": 0.1049666553735733, "learning_rate": 0.01, "loss": 2.0683, "step": 19077 }, { "epoch": 1.958128078817734, "grad_norm": 0.04320647194981575, "learning_rate": 0.01, "loss": 2.0371, "step": 19080 }, { "epoch": 1.958435960591133, "grad_norm": 0.06038364768028259, "learning_rate": 0.01, "loss": 2.0417, "step": 19083 }, { "epoch": 1.958743842364532, "grad_norm": 0.04486788064241409, "learning_rate": 0.01, "loss": 2.0153, "step": 19086 }, { "epoch": 1.959051724137931, "grad_norm": 0.045702993869781494, "learning_rate": 0.01, "loss": 2.0447, "step": 19089 }, { "epoch": 1.95935960591133, "grad_norm": 0.10784240067005157, "learning_rate": 0.01, "loss": 2.0231, "step": 19092 }, { "epoch": 1.959667487684729, "grad_norm": 0.07740773260593414, "learning_rate": 0.01, "loss": 2.079, "step": 19095 }, { "epoch": 1.9599753694581281, "grad_norm": 0.0517832413315773, "learning_rate": 0.01, "loss": 2.061, "step": 19098 }, { "epoch": 1.9602832512315271, "grad_norm": 0.04660172387957573, "learning_rate": 0.01, "loss": 2.026, "step": 19101 }, { "epoch": 1.9605911330049262, "grad_norm": 0.084842748939991, "learning_rate": 0.01, "loss": 2.0418, "step": 19104 }, { "epoch": 1.9608990147783252, "grad_norm": 0.10866342484951019, "learning_rate": 0.01, "loss": 2.0361, "step": 19107 }, { "epoch": 1.9612068965517242, "grad_norm": 0.060094647109508514, "learning_rate": 0.01, "loss": 2.0207, "step": 19110 }, { "epoch": 1.9615147783251232, "grad_norm": 0.04082890599966049, "learning_rate": 0.01, "loss": 1.9837, "step": 19113 }, { "epoch": 1.9618226600985222, "grad_norm": 0.09193670004606247, "learning_rate": 0.01, "loss": 2.045, "step": 19116 }, { "epoch": 1.9621305418719213, "grad_norm": 0.049036990851163864, "learning_rate": 0.01, "loss": 2.0014, "step": 19119 }, { "epoch": 1.9624384236453203, "grad_norm": 0.06658133864402771, "learning_rate": 0.01, "loss": 2.0256, "step": 19122 }, { "epoch": 1.9627463054187193, "grad_norm": 0.07290081679821014, "learning_rate": 0.01, "loss": 2.022, "step": 19125 }, { "epoch": 1.9630541871921183, "grad_norm": 0.05635548382997513, "learning_rate": 0.01, "loss": 2.0439, "step": 19128 }, { "epoch": 1.9633620689655173, "grad_norm": 0.07143761217594147, "learning_rate": 0.01, "loss": 2.009, "step": 19131 }, { "epoch": 1.9636699507389164, "grad_norm": 0.15296097099781036, "learning_rate": 0.01, "loss": 2.0539, "step": 19134 }, { "epoch": 1.9639778325123154, "grad_norm": 0.1795274019241333, "learning_rate": 0.01, "loss": 2.0418, "step": 19137 }, { "epoch": 1.9642857142857144, "grad_norm": 0.04691818729043007, "learning_rate": 0.01, "loss": 2.0114, "step": 19140 }, { "epoch": 1.9645935960591134, "grad_norm": 0.05018999055027962, "learning_rate": 0.01, "loss": 2.0558, "step": 19143 }, { "epoch": 1.9649014778325125, "grad_norm": 0.0349762961268425, "learning_rate": 0.01, "loss": 2.0409, "step": 19146 }, { "epoch": 1.9652093596059115, "grad_norm": 0.04055612534284592, "learning_rate": 0.01, "loss": 2.033, "step": 19149 }, { "epoch": 1.9655172413793105, "grad_norm": 0.04818587005138397, "learning_rate": 0.01, "loss": 2.0522, "step": 19152 }, { "epoch": 1.9658251231527095, "grad_norm": 0.03579457104206085, "learning_rate": 0.01, "loss": 2.0295, "step": 19155 }, { "epoch": 1.9661330049261085, "grad_norm": 0.04382238909602165, "learning_rate": 0.01, "loss": 2.0011, "step": 19158 }, { "epoch": 1.9664408866995073, "grad_norm": 0.03784547746181488, "learning_rate": 0.01, "loss": 2.0332, "step": 19161 }, { "epoch": 1.9667487684729064, "grad_norm": 0.049413155764341354, "learning_rate": 0.01, "loss": 2.0276, "step": 19164 }, { "epoch": 1.9670566502463054, "grad_norm": 0.10560319572687149, "learning_rate": 0.01, "loss": 2.012, "step": 19167 }, { "epoch": 1.9673645320197044, "grad_norm": 0.07912679761648178, "learning_rate": 0.01, "loss": 2.0233, "step": 19170 }, { "epoch": 1.9676724137931034, "grad_norm": 0.051868222653865814, "learning_rate": 0.01, "loss": 2.0399, "step": 19173 }, { "epoch": 1.9679802955665024, "grad_norm": 0.09925144910812378, "learning_rate": 0.01, "loss": 2.0382, "step": 19176 }, { "epoch": 1.9682881773399015, "grad_norm": 0.09824500232934952, "learning_rate": 0.01, "loss": 2.022, "step": 19179 }, { "epoch": 1.9685960591133005, "grad_norm": 0.04710378497838974, "learning_rate": 0.01, "loss": 2.018, "step": 19182 }, { "epoch": 1.9689039408866995, "grad_norm": 0.09339728951454163, "learning_rate": 0.01, "loss": 2.0623, "step": 19185 }, { "epoch": 1.9692118226600985, "grad_norm": 0.04485667496919632, "learning_rate": 0.01, "loss": 2.0361, "step": 19188 }, { "epoch": 1.9695197044334976, "grad_norm": 0.06367155909538269, "learning_rate": 0.01, "loss": 2.0269, "step": 19191 }, { "epoch": 1.9698275862068966, "grad_norm": 0.06692302227020264, "learning_rate": 0.01, "loss": 2.0475, "step": 19194 }, { "epoch": 1.9701354679802956, "grad_norm": 0.06107610464096069, "learning_rate": 0.01, "loss": 2.046, "step": 19197 }, { "epoch": 1.9704433497536946, "grad_norm": 0.06362861394882202, "learning_rate": 0.01, "loss": 2.0515, "step": 19200 }, { "epoch": 1.9707512315270936, "grad_norm": 0.07524324208498001, "learning_rate": 0.01, "loss": 2.044, "step": 19203 }, { "epoch": 1.9710591133004927, "grad_norm": 0.09118182212114334, "learning_rate": 0.01, "loss": 2.0501, "step": 19206 }, { "epoch": 1.9713669950738915, "grad_norm": 0.0823112204670906, "learning_rate": 0.01, "loss": 2.0305, "step": 19209 }, { "epoch": 1.9716748768472905, "grad_norm": 0.061318982392549515, "learning_rate": 0.01, "loss": 2.0499, "step": 19212 }, { "epoch": 1.9719827586206895, "grad_norm": 0.09838750958442688, "learning_rate": 0.01, "loss": 2.0211, "step": 19215 }, { "epoch": 1.9722906403940885, "grad_norm": 0.061727046966552734, "learning_rate": 0.01, "loss": 2.0671, "step": 19218 }, { "epoch": 1.9725985221674875, "grad_norm": 0.044177260249853134, "learning_rate": 0.01, "loss": 2.0429, "step": 19221 }, { "epoch": 1.9729064039408866, "grad_norm": 0.031012659892439842, "learning_rate": 0.01, "loss": 2.0204, "step": 19224 }, { "epoch": 1.9732142857142856, "grad_norm": 0.0593150295317173, "learning_rate": 0.01, "loss": 2.0418, "step": 19227 }, { "epoch": 1.9735221674876846, "grad_norm": 0.09283222258090973, "learning_rate": 0.01, "loss": 2.0363, "step": 19230 }, { "epoch": 1.9738300492610836, "grad_norm": 0.07416541129350662, "learning_rate": 0.01, "loss": 2.0101, "step": 19233 }, { "epoch": 1.9741379310344827, "grad_norm": 0.08513590693473816, "learning_rate": 0.01, "loss": 2.0284, "step": 19236 }, { "epoch": 1.9744458128078817, "grad_norm": 0.08401728421449661, "learning_rate": 0.01, "loss": 2.0356, "step": 19239 }, { "epoch": 1.9747536945812807, "grad_norm": 0.08488047868013382, "learning_rate": 0.01, "loss": 2.0408, "step": 19242 }, { "epoch": 1.9750615763546797, "grad_norm": 0.11438726633787155, "learning_rate": 0.01, "loss": 2.0439, "step": 19245 }, { "epoch": 1.9753694581280787, "grad_norm": 0.0416182205080986, "learning_rate": 0.01, "loss": 2.0034, "step": 19248 }, { "epoch": 1.9756773399014778, "grad_norm": 0.046806883066892624, "learning_rate": 0.01, "loss": 2.0307, "step": 19251 }, { "epoch": 1.9759852216748768, "grad_norm": 0.04319307208061218, "learning_rate": 0.01, "loss": 2.0404, "step": 19254 }, { "epoch": 1.9762931034482758, "grad_norm": 0.11832991987466812, "learning_rate": 0.01, "loss": 2.0338, "step": 19257 }, { "epoch": 1.9766009852216748, "grad_norm": 0.04716213047504425, "learning_rate": 0.01, "loss": 2.005, "step": 19260 }, { "epoch": 1.9769088669950738, "grad_norm": 0.08626002073287964, "learning_rate": 0.01, "loss": 2.0178, "step": 19263 }, { "epoch": 1.9772167487684729, "grad_norm": 0.0981634259223938, "learning_rate": 0.01, "loss": 2.0502, "step": 19266 }, { "epoch": 1.9775246305418719, "grad_norm": 0.0657229796051979, "learning_rate": 0.01, "loss": 2.065, "step": 19269 }, { "epoch": 1.977832512315271, "grad_norm": 0.0652332603931427, "learning_rate": 0.01, "loss": 2.0395, "step": 19272 }, { "epoch": 1.97814039408867, "grad_norm": 0.06810397654771805, "learning_rate": 0.01, "loss": 2.0418, "step": 19275 }, { "epoch": 1.978448275862069, "grad_norm": 0.04740637540817261, "learning_rate": 0.01, "loss": 2.0456, "step": 19278 }, { "epoch": 1.978756157635468, "grad_norm": 0.039233241230249405, "learning_rate": 0.01, "loss": 2.0348, "step": 19281 }, { "epoch": 1.979064039408867, "grad_norm": 0.07533819228410721, "learning_rate": 0.01, "loss": 2.0411, "step": 19284 }, { "epoch": 1.979371921182266, "grad_norm": 0.0820235162973404, "learning_rate": 0.01, "loss": 2.0299, "step": 19287 }, { "epoch": 1.979679802955665, "grad_norm": 0.057419124990701675, "learning_rate": 0.01, "loss": 2.0692, "step": 19290 }, { "epoch": 1.979987684729064, "grad_norm": 0.10119790583848953, "learning_rate": 0.01, "loss": 2.0752, "step": 19293 }, { "epoch": 1.980295566502463, "grad_norm": 0.116152822971344, "learning_rate": 0.01, "loss": 2.0377, "step": 19296 }, { "epoch": 1.980603448275862, "grad_norm": 0.05364501103758812, "learning_rate": 0.01, "loss": 2.0118, "step": 19299 }, { "epoch": 1.9809113300492611, "grad_norm": 0.09089913219213486, "learning_rate": 0.01, "loss": 2.0445, "step": 19302 }, { "epoch": 1.9812192118226601, "grad_norm": 0.06570890545845032, "learning_rate": 0.01, "loss": 2.048, "step": 19305 }, { "epoch": 1.9815270935960592, "grad_norm": 0.10739763081073761, "learning_rate": 0.01, "loss": 2.0527, "step": 19308 }, { "epoch": 1.9818349753694582, "grad_norm": 0.0396854430437088, "learning_rate": 0.01, "loss": 2.0414, "step": 19311 }, { "epoch": 1.9821428571428572, "grad_norm": 0.11273244023323059, "learning_rate": 0.01, "loss": 2.0447, "step": 19314 }, { "epoch": 1.9824507389162562, "grad_norm": 0.10009465366601944, "learning_rate": 0.01, "loss": 2.0512, "step": 19317 }, { "epoch": 1.9827586206896552, "grad_norm": 0.053756825625896454, "learning_rate": 0.01, "loss": 2.0731, "step": 19320 }, { "epoch": 1.9830665024630543, "grad_norm": 0.06603456288576126, "learning_rate": 0.01, "loss": 2.0399, "step": 19323 }, { "epoch": 1.9833743842364533, "grad_norm": 0.038810715079307556, "learning_rate": 0.01, "loss": 2.0272, "step": 19326 }, { "epoch": 1.9836822660098523, "grad_norm": 0.04284658655524254, "learning_rate": 0.01, "loss": 2.035, "step": 19329 }, { "epoch": 1.9839901477832513, "grad_norm": 0.04441271349787712, "learning_rate": 0.01, "loss": 2.0448, "step": 19332 }, { "epoch": 1.9842980295566504, "grad_norm": 0.04501213878393173, "learning_rate": 0.01, "loss": 2.0517, "step": 19335 }, { "epoch": 1.9846059113300494, "grad_norm": 0.05109642818570137, "learning_rate": 0.01, "loss": 2.0168, "step": 19338 }, { "epoch": 1.9849137931034484, "grad_norm": 0.03543083369731903, "learning_rate": 0.01, "loss": 2.0396, "step": 19341 }, { "epoch": 1.9852216748768474, "grad_norm": 0.04665149003267288, "learning_rate": 0.01, "loss": 2.0285, "step": 19344 }, { "epoch": 1.9855295566502464, "grad_norm": 0.035318441689014435, "learning_rate": 0.01, "loss": 2.0321, "step": 19347 }, { "epoch": 1.9858374384236455, "grad_norm": 0.035862043499946594, "learning_rate": 0.01, "loss": 2.0461, "step": 19350 }, { "epoch": 1.9861453201970445, "grad_norm": 0.128739133477211, "learning_rate": 0.01, "loss": 2.0561, "step": 19353 }, { "epoch": 1.9864532019704435, "grad_norm": 0.08115250617265701, "learning_rate": 0.01, "loss": 2.0364, "step": 19356 }, { "epoch": 1.9867610837438425, "grad_norm": 0.04203096404671669, "learning_rate": 0.01, "loss": 2.0298, "step": 19359 }, { "epoch": 1.9870689655172413, "grad_norm": 0.03801970183849335, "learning_rate": 0.01, "loss": 2.034, "step": 19362 }, { "epoch": 1.9873768472906403, "grad_norm": 0.05322232097387314, "learning_rate": 0.01, "loss": 2.0519, "step": 19365 }, { "epoch": 1.9876847290640394, "grad_norm": 0.037100568413734436, "learning_rate": 0.01, "loss": 2.0087, "step": 19368 }, { "epoch": 1.9879926108374384, "grad_norm": 0.03714398667216301, "learning_rate": 0.01, "loss": 2.0187, "step": 19371 }, { "epoch": 1.9883004926108374, "grad_norm": 0.050371263176202774, "learning_rate": 0.01, "loss": 2.0318, "step": 19374 }, { "epoch": 1.9886083743842364, "grad_norm": 0.03875119984149933, "learning_rate": 0.01, "loss": 2.0224, "step": 19377 }, { "epoch": 1.9889162561576355, "grad_norm": 0.06838756054639816, "learning_rate": 0.01, "loss": 2.0458, "step": 19380 }, { "epoch": 1.9892241379310345, "grad_norm": 0.04749476909637451, "learning_rate": 0.01, "loss": 2.022, "step": 19383 }, { "epoch": 1.9895320197044335, "grad_norm": 0.041247084736824036, "learning_rate": 0.01, "loss": 2.0088, "step": 19386 }, { "epoch": 1.9898399014778325, "grad_norm": 0.08582460135221481, "learning_rate": 0.01, "loss": 2.0061, "step": 19389 }, { "epoch": 1.9901477832512315, "grad_norm": 0.042033273726701736, "learning_rate": 0.01, "loss": 2.0516, "step": 19392 }, { "epoch": 1.9904556650246306, "grad_norm": 0.08395756036043167, "learning_rate": 0.01, "loss": 2.0338, "step": 19395 }, { "epoch": 1.9907635467980296, "grad_norm": 0.07154903560876846, "learning_rate": 0.01, "loss": 2.0168, "step": 19398 }, { "epoch": 1.9910714285714286, "grad_norm": 0.06137581169605255, "learning_rate": 0.01, "loss": 2.0046, "step": 19401 }, { "epoch": 1.9913793103448276, "grad_norm": 0.1226835623383522, "learning_rate": 0.01, "loss": 2.0314, "step": 19404 }, { "epoch": 1.9916871921182266, "grad_norm": 0.06524399667978287, "learning_rate": 0.01, "loss": 2.0581, "step": 19407 }, { "epoch": 1.9919950738916257, "grad_norm": 0.060310300439596176, "learning_rate": 0.01, "loss": 2.0205, "step": 19410 }, { "epoch": 1.9923029556650245, "grad_norm": 0.10605314373970032, "learning_rate": 0.01, "loss": 2.0461, "step": 19413 }, { "epoch": 1.9926108374384235, "grad_norm": 0.07056690007448196, "learning_rate": 0.01, "loss": 2.0603, "step": 19416 }, { "epoch": 1.9929187192118225, "grad_norm": 0.04367789626121521, "learning_rate": 0.01, "loss": 2.0358, "step": 19419 }, { "epoch": 1.9932266009852215, "grad_norm": 0.07856806367635727, "learning_rate": 0.01, "loss": 2.061, "step": 19422 }, { "epoch": 1.9935344827586206, "grad_norm": 0.07237541675567627, "learning_rate": 0.01, "loss": 2.0218, "step": 19425 }, { "epoch": 1.9938423645320196, "grad_norm": 0.04983443021774292, "learning_rate": 0.01, "loss": 2.0204, "step": 19428 }, { "epoch": 1.9941502463054186, "grad_norm": 0.049045633524656296, "learning_rate": 0.01, "loss": 2.0089, "step": 19431 }, { "epoch": 1.9944581280788176, "grad_norm": 0.07521536946296692, "learning_rate": 0.01, "loss": 2.021, "step": 19434 }, { "epoch": 1.9947660098522166, "grad_norm": 0.03521602228283882, "learning_rate": 0.01, "loss": 2.0386, "step": 19437 }, { "epoch": 1.9950738916256157, "grad_norm": 0.06075441092252731, "learning_rate": 0.01, "loss": 2.045, "step": 19440 }, { "epoch": 1.9953817733990147, "grad_norm": 0.08800282329320908, "learning_rate": 0.01, "loss": 2.0511, "step": 19443 }, { "epoch": 1.9956896551724137, "grad_norm": 0.04632639139890671, "learning_rate": 0.01, "loss": 2.0434, "step": 19446 }, { "epoch": 1.9959975369458127, "grad_norm": 0.05275778844952583, "learning_rate": 0.01, "loss": 2.0036, "step": 19449 }, { "epoch": 1.9963054187192117, "grad_norm": 0.04615132138133049, "learning_rate": 0.01, "loss": 1.9958, "step": 19452 }, { "epoch": 1.9966133004926108, "grad_norm": 0.12586715817451477, "learning_rate": 0.01, "loss": 2.0311, "step": 19455 }, { "epoch": 1.9969211822660098, "grad_norm": 0.09406362473964691, "learning_rate": 0.01, "loss": 2.028, "step": 19458 }, { "epoch": 1.9972290640394088, "grad_norm": 0.032408129423856735, "learning_rate": 0.01, "loss": 2.0311, "step": 19461 }, { "epoch": 1.9975369458128078, "grad_norm": 0.08810164034366608, "learning_rate": 0.01, "loss": 2.0364, "step": 19464 }, { "epoch": 1.9978448275862069, "grad_norm": 0.11263968795537949, "learning_rate": 0.01, "loss": 2.0263, "step": 19467 }, { "epoch": 1.9981527093596059, "grad_norm": 0.06618282198905945, "learning_rate": 0.01, "loss": 2.0435, "step": 19470 }, { "epoch": 1.998460591133005, "grad_norm": 0.03649067133665085, "learning_rate": 0.01, "loss": 2.0131, "step": 19473 }, { "epoch": 1.998768472906404, "grad_norm": 0.03718538209795952, "learning_rate": 0.01, "loss": 2.0517, "step": 19476 }, { "epoch": 1.999076354679803, "grad_norm": 0.047908563166856766, "learning_rate": 0.01, "loss": 2.0162, "step": 19479 }, { "epoch": 1.999384236453202, "grad_norm": 0.04926212877035141, "learning_rate": 0.01, "loss": 2.0401, "step": 19482 }, { "epoch": 1.999692118226601, "grad_norm": 0.08558470755815506, "learning_rate": 0.01, "loss": 2.0611, "step": 19485 }, { "epoch": 2.0, "grad_norm": 0.07099032402038574, "learning_rate": 0.01, "loss": 2.0408, "step": 19488 }, { "epoch": 2.003391921060746, "grad_norm": 0.08354249596595764, "learning_rate": 0.01, "loss": 2.065, "step": 19491 }, { "epoch": 2.003700277520814, "grad_norm": 0.09245558828115463, "learning_rate": 0.01, "loss": 2.0571, "step": 19494 }, { "epoch": 2.004008633980882, "grad_norm": 0.09228463470935822, "learning_rate": 0.01, "loss": 2.0749, "step": 19497 }, { "epoch": 2.00431699044095, "grad_norm": 0.05558445304632187, "learning_rate": 0.01, "loss": 2.0754, "step": 19500 }, { "epoch": 2.0046253469010176, "grad_norm": 0.04827789589762688, "learning_rate": 0.01, "loss": 2.0431, "step": 19503 }, { "epoch": 2.0049337033610852, "grad_norm": 0.063465915620327, "learning_rate": 0.01, "loss": 2.067, "step": 19506 }, { "epoch": 2.0052420598211533, "grad_norm": 0.047648850828409195, "learning_rate": 0.01, "loss": 2.0614, "step": 19509 }, { "epoch": 2.005550416281221, "grad_norm": 0.038050852715969086, "learning_rate": 0.01, "loss": 2.0699, "step": 19512 }, { "epoch": 2.005858772741289, "grad_norm": 0.036580201238393784, "learning_rate": 0.01, "loss": 2.0832, "step": 19515 }, { "epoch": 2.0061671292013568, "grad_norm": 0.033919982612133026, "learning_rate": 0.01, "loss": 2.0492, "step": 19518 }, { "epoch": 2.0064754856614244, "grad_norm": 0.05007147789001465, "learning_rate": 0.01, "loss": 2.0662, "step": 19521 }, { "epoch": 2.0067838421214925, "grad_norm": 0.042562540620565414, "learning_rate": 0.01, "loss": 2.0508, "step": 19524 }, { "epoch": 2.00709219858156, "grad_norm": 0.0872044712305069, "learning_rate": 0.01, "loss": 2.0628, "step": 19527 }, { "epoch": 2.0074005550416283, "grad_norm": 0.06331472843885422, "learning_rate": 0.01, "loss": 2.0479, "step": 19530 }, { "epoch": 2.007708911501696, "grad_norm": 0.08928905427455902, "learning_rate": 0.01, "loss": 2.051, "step": 19533 }, { "epoch": 2.0080172679617636, "grad_norm": 0.0869508758187294, "learning_rate": 0.01, "loss": 2.0497, "step": 19536 }, { "epoch": 2.0083256244218317, "grad_norm": 0.04267793521285057, "learning_rate": 0.01, "loss": 2.0457, "step": 19539 }, { "epoch": 2.0086339808818994, "grad_norm": 0.046922095119953156, "learning_rate": 0.01, "loss": 2.0488, "step": 19542 }, { "epoch": 2.0089423373419675, "grad_norm": 0.07374055683612823, "learning_rate": 0.01, "loss": 2.0571, "step": 19545 }, { "epoch": 2.009250693802035, "grad_norm": 0.042078323662281036, "learning_rate": 0.01, "loss": 2.0501, "step": 19548 }, { "epoch": 2.009559050262103, "grad_norm": 0.052491504698991776, "learning_rate": 0.01, "loss": 2.0996, "step": 19551 }, { "epoch": 2.009867406722171, "grad_norm": 0.04900294169783592, "learning_rate": 0.01, "loss": 2.0476, "step": 19554 }, { "epoch": 2.0101757631822386, "grad_norm": 0.13067513704299927, "learning_rate": 0.01, "loss": 2.066, "step": 19557 }, { "epoch": 2.0104841196423067, "grad_norm": 0.09229371696710587, "learning_rate": 0.01, "loss": 2.0708, "step": 19560 }, { "epoch": 2.0107924761023743, "grad_norm": 0.05014317110180855, "learning_rate": 0.01, "loss": 2.0474, "step": 19563 }, { "epoch": 2.011100832562442, "grad_norm": 0.06385400146245956, "learning_rate": 0.01, "loss": 2.0427, "step": 19566 }, { "epoch": 2.01140918902251, "grad_norm": 0.04037034139037132, "learning_rate": 0.01, "loss": 2.0508, "step": 19569 }, { "epoch": 2.0117175454825778, "grad_norm": 0.02967817150056362, "learning_rate": 0.01, "loss": 2.0548, "step": 19572 }, { "epoch": 2.012025901942646, "grad_norm": 0.04519663751125336, "learning_rate": 0.01, "loss": 2.0497, "step": 19575 }, { "epoch": 2.0123342584027135, "grad_norm": 0.07825223356485367, "learning_rate": 0.01, "loss": 2.06, "step": 19578 }, { "epoch": 2.012642614862781, "grad_norm": 0.13088425993919373, "learning_rate": 0.01, "loss": 2.0546, "step": 19581 }, { "epoch": 2.0129509713228493, "grad_norm": 0.10113450884819031, "learning_rate": 0.01, "loss": 2.0628, "step": 19584 }, { "epoch": 2.013259327782917, "grad_norm": 0.06662772595882416, "learning_rate": 0.01, "loss": 2.0655, "step": 19587 }, { "epoch": 2.013567684242985, "grad_norm": 0.04824177175760269, "learning_rate": 0.01, "loss": 2.0853, "step": 19590 }, { "epoch": 2.0138760407030527, "grad_norm": 0.07255363464355469, "learning_rate": 0.01, "loss": 2.0614, "step": 19593 }, { "epoch": 2.0141843971631204, "grad_norm": 0.061763517558574677, "learning_rate": 0.01, "loss": 2.0761, "step": 19596 }, { "epoch": 2.0144927536231885, "grad_norm": 0.058266837149858475, "learning_rate": 0.01, "loss": 2.0495, "step": 19599 }, { "epoch": 2.014801110083256, "grad_norm": 0.05644237995147705, "learning_rate": 0.01, "loss": 2.0416, "step": 19602 }, { "epoch": 2.0151094665433242, "grad_norm": 0.102548748254776, "learning_rate": 0.01, "loss": 2.0581, "step": 19605 }, { "epoch": 2.015417823003392, "grad_norm": 0.06959159672260284, "learning_rate": 0.01, "loss": 2.0288, "step": 19608 }, { "epoch": 2.0157261794634596, "grad_norm": 0.09066049009561539, "learning_rate": 0.01, "loss": 2.045, "step": 19611 }, { "epoch": 2.0160345359235277, "grad_norm": 0.061073388904333115, "learning_rate": 0.01, "loss": 2.0523, "step": 19614 }, { "epoch": 2.0163428923835953, "grad_norm": 0.09000861644744873, "learning_rate": 0.01, "loss": 2.0501, "step": 19617 }, { "epoch": 2.0166512488436634, "grad_norm": 0.040078576654195786, "learning_rate": 0.01, "loss": 2.0409, "step": 19620 }, { "epoch": 2.016959605303731, "grad_norm": 0.045984748750925064, "learning_rate": 0.01, "loss": 2.0456, "step": 19623 }, { "epoch": 2.0172679617637987, "grad_norm": 0.045942965894937515, "learning_rate": 0.01, "loss": 2.0319, "step": 19626 }, { "epoch": 2.017576318223867, "grad_norm": 0.059133514761924744, "learning_rate": 0.01, "loss": 2.0504, "step": 19629 }, { "epoch": 2.0178846746839345, "grad_norm": 0.07386631518602371, "learning_rate": 0.01, "loss": 2.0367, "step": 19632 }, { "epoch": 2.0181930311440026, "grad_norm": 0.06906817853450775, "learning_rate": 0.01, "loss": 2.0491, "step": 19635 }, { "epoch": 2.0185013876040703, "grad_norm": 0.06259379535913467, "learning_rate": 0.01, "loss": 2.0812, "step": 19638 }, { "epoch": 2.018809744064138, "grad_norm": 0.07011371850967407, "learning_rate": 0.01, "loss": 2.0716, "step": 19641 }, { "epoch": 2.019118100524206, "grad_norm": 0.05763932690024376, "learning_rate": 0.01, "loss": 2.0618, "step": 19644 }, { "epoch": 2.0194264569842737, "grad_norm": 0.09810350090265274, "learning_rate": 0.01, "loss": 2.046, "step": 19647 }, { "epoch": 2.019734813444342, "grad_norm": 0.11434987187385559, "learning_rate": 0.01, "loss": 2.057, "step": 19650 }, { "epoch": 2.0200431699044095, "grad_norm": 0.10692505538463593, "learning_rate": 0.01, "loss": 2.0669, "step": 19653 }, { "epoch": 2.020351526364477, "grad_norm": 0.06918302178382874, "learning_rate": 0.01, "loss": 2.0636, "step": 19656 }, { "epoch": 2.0206598828245452, "grad_norm": 0.06661045551300049, "learning_rate": 0.01, "loss": 2.0366, "step": 19659 }, { "epoch": 2.020968239284613, "grad_norm": 0.03996479883790016, "learning_rate": 0.01, "loss": 2.0552, "step": 19662 }, { "epoch": 2.021276595744681, "grad_norm": 0.041359271854162216, "learning_rate": 0.01, "loss": 2.0542, "step": 19665 }, { "epoch": 2.0215849522047487, "grad_norm": 0.046693217009305954, "learning_rate": 0.01, "loss": 2.0448, "step": 19668 }, { "epoch": 2.0218933086648163, "grad_norm": 0.031555816531181335, "learning_rate": 0.01, "loss": 2.0146, "step": 19671 }, { "epoch": 2.0222016651248844, "grad_norm": 0.036573588848114014, "learning_rate": 0.01, "loss": 2.038, "step": 19674 }, { "epoch": 2.022510021584952, "grad_norm": 0.09064050763845444, "learning_rate": 0.01, "loss": 2.0609, "step": 19677 }, { "epoch": 2.02281837804502, "grad_norm": 0.11865704506635666, "learning_rate": 0.01, "loss": 2.0412, "step": 19680 }, { "epoch": 2.023126734505088, "grad_norm": 0.08720502257347107, "learning_rate": 0.01, "loss": 2.0403, "step": 19683 }, { "epoch": 2.0234350909651555, "grad_norm": 0.06953457742929459, "learning_rate": 0.01, "loss": 2.0769, "step": 19686 }, { "epoch": 2.0237434474252236, "grad_norm": 0.04386308416724205, "learning_rate": 0.01, "loss": 2.0595, "step": 19689 }, { "epoch": 2.0240518038852913, "grad_norm": 0.047490183264017105, "learning_rate": 0.01, "loss": 2.0478, "step": 19692 }, { "epoch": 2.0243601603453594, "grad_norm": 0.061406608670949936, "learning_rate": 0.01, "loss": 2.0582, "step": 19695 }, { "epoch": 2.024668516805427, "grad_norm": 0.0626315325498581, "learning_rate": 0.01, "loss": 2.0908, "step": 19698 }, { "epoch": 2.0249768732654947, "grad_norm": 0.048075366765260696, "learning_rate": 0.01, "loss": 2.0703, "step": 19701 }, { "epoch": 2.025285229725563, "grad_norm": 0.06243044510483742, "learning_rate": 0.01, "loss": 2.0452, "step": 19704 }, { "epoch": 2.0255935861856305, "grad_norm": 0.06498084217309952, "learning_rate": 0.01, "loss": 2.0812, "step": 19707 }, { "epoch": 2.0259019426456986, "grad_norm": 0.05091014504432678, "learning_rate": 0.01, "loss": 2.0451, "step": 19710 }, { "epoch": 2.026210299105766, "grad_norm": 0.04733705893158913, "learning_rate": 0.01, "loss": 2.059, "step": 19713 }, { "epoch": 2.026518655565834, "grad_norm": 0.10866537690162659, "learning_rate": 0.01, "loss": 2.0505, "step": 19716 }, { "epoch": 2.026827012025902, "grad_norm": 0.07504774630069733, "learning_rate": 0.01, "loss": 2.0318, "step": 19719 }, { "epoch": 2.0271353684859696, "grad_norm": 0.07938455790281296, "learning_rate": 0.01, "loss": 2.0643, "step": 19722 }, { "epoch": 2.0274437249460378, "grad_norm": 0.10090157389640808, "learning_rate": 0.01, "loss": 2.0381, "step": 19725 }, { "epoch": 2.0277520814061054, "grad_norm": 0.04133126139640808, "learning_rate": 0.01, "loss": 2.0126, "step": 19728 }, { "epoch": 2.0280604378661735, "grad_norm": 0.12022694200277328, "learning_rate": 0.01, "loss": 2.0463, "step": 19731 }, { "epoch": 2.028368794326241, "grad_norm": 0.05904841795563698, "learning_rate": 0.01, "loss": 2.0547, "step": 19734 }, { "epoch": 2.028677150786309, "grad_norm": 0.08344896882772446, "learning_rate": 0.01, "loss": 2.0721, "step": 19737 }, { "epoch": 2.028985507246377, "grad_norm": 0.045264534652233124, "learning_rate": 0.01, "loss": 2.0597, "step": 19740 }, { "epoch": 2.0292938637064446, "grad_norm": 0.05907116085290909, "learning_rate": 0.01, "loss": 2.0561, "step": 19743 }, { "epoch": 2.0296022201665127, "grad_norm": 0.04975851625204086, "learning_rate": 0.01, "loss": 2.0642, "step": 19746 }, { "epoch": 2.0299105766265804, "grad_norm": 0.08190937340259552, "learning_rate": 0.01, "loss": 2.0429, "step": 19749 }, { "epoch": 2.030218933086648, "grad_norm": 0.14594541490077972, "learning_rate": 0.01, "loss": 2.0438, "step": 19752 }, { "epoch": 2.030527289546716, "grad_norm": 0.11920581012964249, "learning_rate": 0.01, "loss": 2.0549, "step": 19755 }, { "epoch": 2.030835646006784, "grad_norm": 0.04334663227200508, "learning_rate": 0.01, "loss": 2.0835, "step": 19758 }, { "epoch": 2.031144002466852, "grad_norm": 0.05323721095919609, "learning_rate": 0.01, "loss": 2.0546, "step": 19761 }, { "epoch": 2.0314523589269196, "grad_norm": 0.09565315395593643, "learning_rate": 0.01, "loss": 2.0622, "step": 19764 }, { "epoch": 2.031760715386987, "grad_norm": 0.14498768746852875, "learning_rate": 0.01, "loss": 2.0336, "step": 19767 }, { "epoch": 2.0320690718470553, "grad_norm": 0.16146855056285858, "learning_rate": 0.01, "loss": 2.0444, "step": 19770 }, { "epoch": 2.032377428307123, "grad_norm": 0.09023015946149826, "learning_rate": 0.01, "loss": 2.0341, "step": 19773 }, { "epoch": 2.032685784767191, "grad_norm": 0.05290327966213226, "learning_rate": 0.01, "loss": 2.0442, "step": 19776 }, { "epoch": 2.0329941412272587, "grad_norm": 0.06551158428192139, "learning_rate": 0.01, "loss": 2.062, "step": 19779 }, { "epoch": 2.0333024976873264, "grad_norm": 0.09268030524253845, "learning_rate": 0.01, "loss": 2.0668, "step": 19782 }, { "epoch": 2.0336108541473945, "grad_norm": 0.05402594432234764, "learning_rate": 0.01, "loss": 2.0739, "step": 19785 }, { "epoch": 2.033919210607462, "grad_norm": 0.052478570491075516, "learning_rate": 0.01, "loss": 2.0709, "step": 19788 }, { "epoch": 2.0342275670675303, "grad_norm": 0.03243448957800865, "learning_rate": 0.01, "loss": 2.049, "step": 19791 }, { "epoch": 2.034535923527598, "grad_norm": 0.08627558499574661, "learning_rate": 0.01, "loss": 2.058, "step": 19794 }, { "epoch": 2.0348442799876656, "grad_norm": 0.04757314547896385, "learning_rate": 0.01, "loss": 2.0637, "step": 19797 }, { "epoch": 2.0351526364477337, "grad_norm": 0.11217369884252548, "learning_rate": 0.01, "loss": 2.0581, "step": 19800 }, { "epoch": 2.0354609929078014, "grad_norm": 0.07525690644979477, "learning_rate": 0.01, "loss": 2.0782, "step": 19803 }, { "epoch": 2.0357693493678695, "grad_norm": 0.0945955365896225, "learning_rate": 0.01, "loss": 2.0594, "step": 19806 }, { "epoch": 2.036077705827937, "grad_norm": 0.07789472490549088, "learning_rate": 0.01, "loss": 2.0444, "step": 19809 }, { "epoch": 2.036386062288005, "grad_norm": 0.06672658026218414, "learning_rate": 0.01, "loss": 2.0392, "step": 19812 }, { "epoch": 2.036694418748073, "grad_norm": 0.06361529976129532, "learning_rate": 0.01, "loss": 2.0504, "step": 19815 }, { "epoch": 2.0370027752081405, "grad_norm": 0.03530391305685043, "learning_rate": 0.01, "loss": 2.0453, "step": 19818 }, { "epoch": 2.0373111316682087, "grad_norm": 0.08201812207698822, "learning_rate": 0.01, "loss": 2.023, "step": 19821 }, { "epoch": 2.0376194881282763, "grad_norm": 0.09198293834924698, "learning_rate": 0.01, "loss": 2.0422, "step": 19824 }, { "epoch": 2.037927844588344, "grad_norm": 0.058875374495983124, "learning_rate": 0.01, "loss": 2.0484, "step": 19827 }, { "epoch": 2.038236201048412, "grad_norm": 0.04453382268548012, "learning_rate": 0.01, "loss": 2.0322, "step": 19830 }, { "epoch": 2.0385445575084797, "grad_norm": 0.03713817149400711, "learning_rate": 0.01, "loss": 2.0151, "step": 19833 }, { "epoch": 2.038852913968548, "grad_norm": 0.056827936321496964, "learning_rate": 0.01, "loss": 2.0532, "step": 19836 }, { "epoch": 2.0391612704286155, "grad_norm": 0.08166830986738205, "learning_rate": 0.01, "loss": 2.0602, "step": 19839 }, { "epoch": 2.039469626888683, "grad_norm": 0.06837287545204163, "learning_rate": 0.01, "loss": 2.0567, "step": 19842 }, { "epoch": 2.0397779833487513, "grad_norm": 0.08867949992418289, "learning_rate": 0.01, "loss": 2.0559, "step": 19845 }, { "epoch": 2.040086339808819, "grad_norm": 0.07119370251893997, "learning_rate": 0.01, "loss": 2.0357, "step": 19848 }, { "epoch": 2.040394696268887, "grad_norm": 0.07701986283063889, "learning_rate": 0.01, "loss": 2.0695, "step": 19851 }, { "epoch": 2.0407030527289547, "grad_norm": 0.04700729623436928, "learning_rate": 0.01, "loss": 2.0266, "step": 19854 }, { "epoch": 2.0410114091890224, "grad_norm": 0.05898338556289673, "learning_rate": 0.01, "loss": 2.0519, "step": 19857 }, { "epoch": 2.0413197656490905, "grad_norm": 0.11953815072774887, "learning_rate": 0.01, "loss": 2.0487, "step": 19860 }, { "epoch": 2.041628122109158, "grad_norm": 0.09704854339361191, "learning_rate": 0.01, "loss": 2.0511, "step": 19863 }, { "epoch": 2.041936478569226, "grad_norm": 0.1362537145614624, "learning_rate": 0.01, "loss": 2.0354, "step": 19866 }, { "epoch": 2.042244835029294, "grad_norm": 0.09366025030612946, "learning_rate": 0.01, "loss": 2.0628, "step": 19869 }, { "epoch": 2.0425531914893615, "grad_norm": 0.05397522822022438, "learning_rate": 0.01, "loss": 2.0646, "step": 19872 }, { "epoch": 2.0428615479494296, "grad_norm": 0.07723390311002731, "learning_rate": 0.01, "loss": 2.0523, "step": 19875 }, { "epoch": 2.0431699044094973, "grad_norm": 0.08418615907430649, "learning_rate": 0.01, "loss": 2.0878, "step": 19878 }, { "epoch": 2.0434782608695654, "grad_norm": 0.06149798631668091, "learning_rate": 0.01, "loss": 2.0547, "step": 19881 }, { "epoch": 2.043786617329633, "grad_norm": 0.0474097803235054, "learning_rate": 0.01, "loss": 2.0539, "step": 19884 }, { "epoch": 2.0440949737897007, "grad_norm": 0.04854200407862663, "learning_rate": 0.01, "loss": 2.0637, "step": 19887 }, { "epoch": 2.044403330249769, "grad_norm": 0.04509511590003967, "learning_rate": 0.01, "loss": 2.0679, "step": 19890 }, { "epoch": 2.0447116867098365, "grad_norm": 0.05422825738787651, "learning_rate": 0.01, "loss": 2.0449, "step": 19893 }, { "epoch": 2.0450200431699046, "grad_norm": 0.06556607037782669, "learning_rate": 0.01, "loss": 2.0429, "step": 19896 }, { "epoch": 2.0453283996299723, "grad_norm": 0.03906751424074173, "learning_rate": 0.01, "loss": 2.0694, "step": 19899 }, { "epoch": 2.04563675609004, "grad_norm": 0.05207069590687752, "learning_rate": 0.01, "loss": 2.0363, "step": 19902 }, { "epoch": 2.045945112550108, "grad_norm": 0.04187217727303505, "learning_rate": 0.01, "loss": 2.0413, "step": 19905 }, { "epoch": 2.0462534690101757, "grad_norm": 0.04163263365626335, "learning_rate": 0.01, "loss": 2.0535, "step": 19908 }, { "epoch": 2.046561825470244, "grad_norm": 0.037544943392276764, "learning_rate": 0.01, "loss": 2.033, "step": 19911 }, { "epoch": 2.0468701819303114, "grad_norm": 0.03623516857624054, "learning_rate": 0.01, "loss": 2.0758, "step": 19914 }, { "epoch": 2.047178538390379, "grad_norm": 0.08026546239852905, "learning_rate": 0.01, "loss": 2.0654, "step": 19917 }, { "epoch": 2.047486894850447, "grad_norm": 0.05316372588276863, "learning_rate": 0.01, "loss": 2.0751, "step": 19920 }, { "epoch": 2.047795251310515, "grad_norm": 0.062127552926540375, "learning_rate": 0.01, "loss": 2.0458, "step": 19923 }, { "epoch": 2.048103607770583, "grad_norm": 0.049675267189741135, "learning_rate": 0.01, "loss": 2.0741, "step": 19926 }, { "epoch": 2.0484119642306506, "grad_norm": 0.0425347164273262, "learning_rate": 0.01, "loss": 2.0573, "step": 19929 }, { "epoch": 2.0487203206907183, "grad_norm": 0.03532329574227333, "learning_rate": 0.01, "loss": 2.0367, "step": 19932 }, { "epoch": 2.0490286771507864, "grad_norm": 0.05779660493135452, "learning_rate": 0.01, "loss": 2.0801, "step": 19935 }, { "epoch": 2.049337033610854, "grad_norm": 0.07841507345438004, "learning_rate": 0.01, "loss": 2.0603, "step": 19938 }, { "epoch": 2.049645390070922, "grad_norm": 0.0883709266781807, "learning_rate": 0.01, "loss": 2.0594, "step": 19941 }, { "epoch": 2.04995374653099, "grad_norm": 0.09949532151222229, "learning_rate": 0.01, "loss": 2.0422, "step": 19944 }, { "epoch": 2.0502621029910575, "grad_norm": 0.04350358247756958, "learning_rate": 0.01, "loss": 2.0439, "step": 19947 }, { "epoch": 2.0505704594511256, "grad_norm": 0.042655814439058304, "learning_rate": 0.01, "loss": 2.0821, "step": 19950 }, { "epoch": 2.0508788159111933, "grad_norm": 0.060070816427469254, "learning_rate": 0.01, "loss": 2.0495, "step": 19953 }, { "epoch": 2.0511871723712614, "grad_norm": 0.06479921191930771, "learning_rate": 0.01, "loss": 2.0783, "step": 19956 }, { "epoch": 2.051495528831329, "grad_norm": 0.0982329398393631, "learning_rate": 0.01, "loss": 2.0756, "step": 19959 }, { "epoch": 2.0518038852913967, "grad_norm": 0.10483184456825256, "learning_rate": 0.01, "loss": 2.035, "step": 19962 }, { "epoch": 2.052112241751465, "grad_norm": 0.06383049488067627, "learning_rate": 0.01, "loss": 2.0252, "step": 19965 }, { "epoch": 2.0524205982115324, "grad_norm": 0.13797828555107117, "learning_rate": 0.01, "loss": 2.0604, "step": 19968 }, { "epoch": 2.0527289546716005, "grad_norm": 0.037840090692043304, "learning_rate": 0.01, "loss": 2.0604, "step": 19971 }, { "epoch": 2.053037311131668, "grad_norm": 0.043872520327568054, "learning_rate": 0.01, "loss": 2.0609, "step": 19974 }, { "epoch": 2.053345667591736, "grad_norm": 0.03223152458667755, "learning_rate": 0.01, "loss": 2.0198, "step": 19977 }, { "epoch": 2.053654024051804, "grad_norm": 0.05935351178050041, "learning_rate": 0.01, "loss": 2.066, "step": 19980 }, { "epoch": 2.0539623805118716, "grad_norm": 0.054079607129096985, "learning_rate": 0.01, "loss": 2.0665, "step": 19983 }, { "epoch": 2.0542707369719397, "grad_norm": 0.04307890310883522, "learning_rate": 0.01, "loss": 2.0145, "step": 19986 }, { "epoch": 2.0545790934320074, "grad_norm": 0.06624720245599747, "learning_rate": 0.01, "loss": 2.052, "step": 19989 }, { "epoch": 2.054887449892075, "grad_norm": 0.08096028864383698, "learning_rate": 0.01, "loss": 2.0947, "step": 19992 }, { "epoch": 2.055195806352143, "grad_norm": 0.0872364342212677, "learning_rate": 0.01, "loss": 2.0675, "step": 19995 }, { "epoch": 2.055504162812211, "grad_norm": 0.04538879171013832, "learning_rate": 0.01, "loss": 2.031, "step": 19998 }, { "epoch": 2.055812519272279, "grad_norm": 0.11873256415128708, "learning_rate": 0.01, "loss": 2.0682, "step": 20001 }, { "epoch": 2.0561208757323466, "grad_norm": 0.05929452180862427, "learning_rate": 0.01, "loss": 2.0602, "step": 20004 }, { "epoch": 2.0564292321924142, "grad_norm": 0.05131294205784798, "learning_rate": 0.01, "loss": 2.0569, "step": 20007 }, { "epoch": 2.0567375886524824, "grad_norm": 0.05690256133675575, "learning_rate": 0.01, "loss": 2.0779, "step": 20010 }, { "epoch": 2.05704594511255, "grad_norm": 0.04414551705121994, "learning_rate": 0.01, "loss": 2.0168, "step": 20013 }, { "epoch": 2.057354301572618, "grad_norm": 0.04017036780714989, "learning_rate": 0.01, "loss": 2.0349, "step": 20016 }, { "epoch": 2.0576626580326858, "grad_norm": 0.06785457581281662, "learning_rate": 0.01, "loss": 2.058, "step": 20019 }, { "epoch": 2.0579710144927534, "grad_norm": 0.06258828938007355, "learning_rate": 0.01, "loss": 2.0484, "step": 20022 }, { "epoch": 2.0582793709528215, "grad_norm": 0.11196446418762207, "learning_rate": 0.01, "loss": 2.0624, "step": 20025 }, { "epoch": 2.058587727412889, "grad_norm": 0.08678428828716278, "learning_rate": 0.01, "loss": 2.0667, "step": 20028 }, { "epoch": 2.0588960838729573, "grad_norm": 0.13598018884658813, "learning_rate": 0.01, "loss": 2.0239, "step": 20031 }, { "epoch": 2.059204440333025, "grad_norm": 0.06666143238544464, "learning_rate": 0.01, "loss": 2.037, "step": 20034 }, { "epoch": 2.0595127967930926, "grad_norm": 0.05994727462530136, "learning_rate": 0.01, "loss": 2.0674, "step": 20037 }, { "epoch": 2.0598211532531607, "grad_norm": 0.03867008537054062, "learning_rate": 0.01, "loss": 2.056, "step": 20040 }, { "epoch": 2.0601295097132284, "grad_norm": 0.13077500462532043, "learning_rate": 0.01, "loss": 2.0345, "step": 20043 }, { "epoch": 2.0604378661732965, "grad_norm": 0.057164691388607025, "learning_rate": 0.01, "loss": 2.0436, "step": 20046 }, { "epoch": 2.060746222633364, "grad_norm": 0.07206998765468597, "learning_rate": 0.01, "loss": 2.0472, "step": 20049 }, { "epoch": 2.061054579093432, "grad_norm": 0.08844766765832901, "learning_rate": 0.01, "loss": 2.0535, "step": 20052 }, { "epoch": 2.0613629355535, "grad_norm": 0.07533573359251022, "learning_rate": 0.01, "loss": 2.0698, "step": 20055 }, { "epoch": 2.0616712920135676, "grad_norm": 0.03260966017842293, "learning_rate": 0.01, "loss": 2.0494, "step": 20058 }, { "epoch": 2.0619796484736357, "grad_norm": 0.03491971641778946, "learning_rate": 0.01, "loss": 2.0612, "step": 20061 }, { "epoch": 2.0622880049337033, "grad_norm": 0.04022398218512535, "learning_rate": 0.01, "loss": 2.0343, "step": 20064 }, { "epoch": 2.062596361393771, "grad_norm": 0.06325655430555344, "learning_rate": 0.01, "loss": 2.0409, "step": 20067 }, { "epoch": 2.062904717853839, "grad_norm": 0.06704667955636978, "learning_rate": 0.01, "loss": 2.0641, "step": 20070 }, { "epoch": 2.0632130743139068, "grad_norm": 0.06883389502763748, "learning_rate": 0.01, "loss": 2.0462, "step": 20073 }, { "epoch": 2.063521430773975, "grad_norm": 0.05242495611310005, "learning_rate": 0.01, "loss": 2.0701, "step": 20076 }, { "epoch": 2.0638297872340425, "grad_norm": 0.06587128341197968, "learning_rate": 0.01, "loss": 2.0443, "step": 20079 }, { "epoch": 2.06413814369411, "grad_norm": 0.03571178764104843, "learning_rate": 0.01, "loss": 2.0519, "step": 20082 }, { "epoch": 2.0644465001541783, "grad_norm": 0.061605412513017654, "learning_rate": 0.01, "loss": 2.0357, "step": 20085 }, { "epoch": 2.064754856614246, "grad_norm": 0.05552279204130173, "learning_rate": 0.01, "loss": 2.0258, "step": 20088 }, { "epoch": 2.065063213074314, "grad_norm": 0.047950152307748795, "learning_rate": 0.01, "loss": 2.0354, "step": 20091 }, { "epoch": 2.0653715695343817, "grad_norm": 0.09466604888439178, "learning_rate": 0.01, "loss": 2.0523, "step": 20094 }, { "epoch": 2.0656799259944494, "grad_norm": 0.04828859865665436, "learning_rate": 0.01, "loss": 2.0466, "step": 20097 }, { "epoch": 2.0659882824545175, "grad_norm": 0.03933820128440857, "learning_rate": 0.01, "loss": 2.0458, "step": 20100 }, { "epoch": 2.066296638914585, "grad_norm": 0.05044875666499138, "learning_rate": 0.01, "loss": 2.0396, "step": 20103 }, { "epoch": 2.0666049953746533, "grad_norm": 0.04152398556470871, "learning_rate": 0.01, "loss": 2.0269, "step": 20106 }, { "epoch": 2.066913351834721, "grad_norm": 0.10098916292190552, "learning_rate": 0.01, "loss": 2.0726, "step": 20109 }, { "epoch": 2.0672217082947886, "grad_norm": 0.06381060183048248, "learning_rate": 0.01, "loss": 2.0276, "step": 20112 }, { "epoch": 2.0675300647548567, "grad_norm": 0.13991308212280273, "learning_rate": 0.01, "loss": 2.0508, "step": 20115 }, { "epoch": 2.0678384212149243, "grad_norm": 0.061171598732471466, "learning_rate": 0.01, "loss": 2.0232, "step": 20118 }, { "epoch": 2.0681467776749924, "grad_norm": 0.04276692867279053, "learning_rate": 0.01, "loss": 2.0667, "step": 20121 }, { "epoch": 2.06845513413506, "grad_norm": 0.03582247719168663, "learning_rate": 0.01, "loss": 2.0511, "step": 20124 }, { "epoch": 2.0687634905951278, "grad_norm": 0.037077244371175766, "learning_rate": 0.01, "loss": 2.0275, "step": 20127 }, { "epoch": 2.069071847055196, "grad_norm": 0.11291185766458511, "learning_rate": 0.01, "loss": 2.0207, "step": 20130 }, { "epoch": 2.0693802035152635, "grad_norm": 0.06811921298503876, "learning_rate": 0.01, "loss": 2.0437, "step": 20133 }, { "epoch": 2.0696885599753316, "grad_norm": 0.049292147159576416, "learning_rate": 0.01, "loss": 2.0327, "step": 20136 }, { "epoch": 2.0699969164353993, "grad_norm": 0.08937390893697739, "learning_rate": 0.01, "loss": 2.0548, "step": 20139 }, { "epoch": 2.070305272895467, "grad_norm": 0.04353107511997223, "learning_rate": 0.01, "loss": 2.0522, "step": 20142 }, { "epoch": 2.070613629355535, "grad_norm": 0.03737090900540352, "learning_rate": 0.01, "loss": 2.0525, "step": 20145 }, { "epoch": 2.0709219858156027, "grad_norm": 0.038217127323150635, "learning_rate": 0.01, "loss": 2.0285, "step": 20148 }, { "epoch": 2.071230342275671, "grad_norm": 0.07162989675998688, "learning_rate": 0.01, "loss": 2.0556, "step": 20151 }, { "epoch": 2.0715386987357385, "grad_norm": 0.06507647782564163, "learning_rate": 0.01, "loss": 2.0348, "step": 20154 }, { "epoch": 2.071847055195806, "grad_norm": 0.07880635559558868, "learning_rate": 0.01, "loss": 2.0599, "step": 20157 }, { "epoch": 2.0721554116558742, "grad_norm": 0.11247913539409637, "learning_rate": 0.01, "loss": 2.042, "step": 20160 }, { "epoch": 2.072463768115942, "grad_norm": 0.04084709286689758, "learning_rate": 0.01, "loss": 2.0369, "step": 20163 }, { "epoch": 2.07277212457601, "grad_norm": 0.06349261850118637, "learning_rate": 0.01, "loss": 2.0656, "step": 20166 }, { "epoch": 2.0730804810360777, "grad_norm": 0.03916813060641289, "learning_rate": 0.01, "loss": 2.0142, "step": 20169 }, { "epoch": 2.0733888374961453, "grad_norm": 0.041867464780807495, "learning_rate": 0.01, "loss": 2.0613, "step": 20172 }, { "epoch": 2.0736971939562134, "grad_norm": 0.09670063108205795, "learning_rate": 0.01, "loss": 2.0254, "step": 20175 }, { "epoch": 2.074005550416281, "grad_norm": 0.05259916931390762, "learning_rate": 0.01, "loss": 2.059, "step": 20178 }, { "epoch": 2.074313906876349, "grad_norm": 0.0970730185508728, "learning_rate": 0.01, "loss": 2.0557, "step": 20181 }, { "epoch": 2.074622263336417, "grad_norm": 0.1317344307899475, "learning_rate": 0.01, "loss": 2.0225, "step": 20184 }, { "epoch": 2.0749306197964845, "grad_norm": 0.0787033885717392, "learning_rate": 0.01, "loss": 2.072, "step": 20187 }, { "epoch": 2.0752389762565526, "grad_norm": 0.04037567600607872, "learning_rate": 0.01, "loss": 2.0422, "step": 20190 }, { "epoch": 2.0755473327166203, "grad_norm": 0.03588324785232544, "learning_rate": 0.01, "loss": 2.0396, "step": 20193 }, { "epoch": 2.0758556891766884, "grad_norm": 0.05277855321764946, "learning_rate": 0.01, "loss": 2.0702, "step": 20196 }, { "epoch": 2.076164045636756, "grad_norm": 0.050833381712436676, "learning_rate": 0.01, "loss": 2.0748, "step": 20199 }, { "epoch": 2.076472402096824, "grad_norm": 0.089606374502182, "learning_rate": 0.01, "loss": 2.0482, "step": 20202 }, { "epoch": 2.076780758556892, "grad_norm": 0.05270789936184883, "learning_rate": 0.01, "loss": 2.0555, "step": 20205 }, { "epoch": 2.0770891150169595, "grad_norm": 0.06895376741886139, "learning_rate": 0.01, "loss": 2.0509, "step": 20208 }, { "epoch": 2.0773974714770276, "grad_norm": 0.11967889964580536, "learning_rate": 0.01, "loss": 2.0512, "step": 20211 }, { "epoch": 2.0777058279370952, "grad_norm": 0.11328759789466858, "learning_rate": 0.01, "loss": 2.002, "step": 20214 }, { "epoch": 2.078014184397163, "grad_norm": 0.045189183205366135, "learning_rate": 0.01, "loss": 2.0211, "step": 20217 }, { "epoch": 2.078322540857231, "grad_norm": 0.05716565251350403, "learning_rate": 0.01, "loss": 2.0413, "step": 20220 }, { "epoch": 2.0786308973172987, "grad_norm": 0.07340056449174881, "learning_rate": 0.01, "loss": 2.0457, "step": 20223 }, { "epoch": 2.0789392537773668, "grad_norm": 0.05395069718360901, "learning_rate": 0.01, "loss": 2.0582, "step": 20226 }, { "epoch": 2.0792476102374344, "grad_norm": 0.03723681718111038, "learning_rate": 0.01, "loss": 2.0581, "step": 20229 }, { "epoch": 2.0795559666975025, "grad_norm": 0.10626024752855301, "learning_rate": 0.01, "loss": 2.0351, "step": 20232 }, { "epoch": 2.07986432315757, "grad_norm": 0.09987606853246689, "learning_rate": 0.01, "loss": 2.067, "step": 20235 }, { "epoch": 2.080172679617638, "grad_norm": 0.07282060384750366, "learning_rate": 0.01, "loss": 2.0511, "step": 20238 }, { "epoch": 2.080481036077706, "grad_norm": 0.04192940518260002, "learning_rate": 0.01, "loss": 2.0519, "step": 20241 }, { "epoch": 2.0807893925377736, "grad_norm": 0.06585846096277237, "learning_rate": 0.01, "loss": 2.0282, "step": 20244 }, { "epoch": 2.0810977489978413, "grad_norm": 0.04427814856171608, "learning_rate": 0.01, "loss": 2.017, "step": 20247 }, { "epoch": 2.0814061054579094, "grad_norm": 0.05114896968007088, "learning_rate": 0.01, "loss": 2.059, "step": 20250 }, { "epoch": 2.081714461917977, "grad_norm": 0.0445995107293129, "learning_rate": 0.01, "loss": 2.0544, "step": 20253 }, { "epoch": 2.082022818378045, "grad_norm": 0.04904405400156975, "learning_rate": 0.01, "loss": 2.0428, "step": 20256 }, { "epoch": 2.082331174838113, "grad_norm": 0.03620357811450958, "learning_rate": 0.01, "loss": 2.0493, "step": 20259 }, { "epoch": 2.082639531298181, "grad_norm": 0.10994633287191391, "learning_rate": 0.01, "loss": 2.0719, "step": 20262 }, { "epoch": 2.0829478877582486, "grad_norm": 0.05244474112987518, "learning_rate": 0.01, "loss": 2.0559, "step": 20265 }, { "epoch": 2.0832562442183162, "grad_norm": 0.05937792733311653, "learning_rate": 0.01, "loss": 2.0593, "step": 20268 }, { "epoch": 2.0835646006783843, "grad_norm": 0.08669353276491165, "learning_rate": 0.01, "loss": 2.0539, "step": 20271 }, { "epoch": 2.083872957138452, "grad_norm": 0.054145876318216324, "learning_rate": 0.01, "loss": 2.0281, "step": 20274 }, { "epoch": 2.08418131359852, "grad_norm": 0.040682870894670486, "learning_rate": 0.01, "loss": 2.0657, "step": 20277 }, { "epoch": 2.0844896700585878, "grad_norm": 0.04110307991504669, "learning_rate": 0.01, "loss": 2.053, "step": 20280 }, { "epoch": 2.0847980265186554, "grad_norm": 0.13420680165290833, "learning_rate": 0.01, "loss": 2.0568, "step": 20283 }, { "epoch": 2.0851063829787235, "grad_norm": 0.049191731959581375, "learning_rate": 0.01, "loss": 2.0498, "step": 20286 }, { "epoch": 2.085414739438791, "grad_norm": 0.04682133346796036, "learning_rate": 0.01, "loss": 2.0532, "step": 20289 }, { "epoch": 2.0857230958988593, "grad_norm": 0.043646588921546936, "learning_rate": 0.01, "loss": 2.0582, "step": 20292 }, { "epoch": 2.086031452358927, "grad_norm": 0.05107354745268822, "learning_rate": 0.01, "loss": 2.0574, "step": 20295 }, { "epoch": 2.0863398088189946, "grad_norm": 0.06274458020925522, "learning_rate": 0.01, "loss": 2.0621, "step": 20298 }, { "epoch": 2.0866481652790627, "grad_norm": 0.11294244229793549, "learning_rate": 0.01, "loss": 2.0685, "step": 20301 }, { "epoch": 2.0869565217391304, "grad_norm": 0.04948057234287262, "learning_rate": 0.01, "loss": 2.0469, "step": 20304 }, { "epoch": 2.0872648781991985, "grad_norm": 0.04451402649283409, "learning_rate": 0.01, "loss": 2.0675, "step": 20307 }, { "epoch": 2.087573234659266, "grad_norm": 0.04940638318657875, "learning_rate": 0.01, "loss": 2.0584, "step": 20310 }, { "epoch": 2.087881591119334, "grad_norm": 0.06530692428350449, "learning_rate": 0.01, "loss": 2.0541, "step": 20313 }, { "epoch": 2.088189947579402, "grad_norm": 0.13395404815673828, "learning_rate": 0.01, "loss": 2.028, "step": 20316 }, { "epoch": 2.0884983040394696, "grad_norm": 0.09222474694252014, "learning_rate": 0.01, "loss": 2.045, "step": 20319 }, { "epoch": 2.0888066604995377, "grad_norm": 0.06510595977306366, "learning_rate": 0.01, "loss": 2.0291, "step": 20322 }, { "epoch": 2.0891150169596053, "grad_norm": 0.055552888661623, "learning_rate": 0.01, "loss": 2.0612, "step": 20325 }, { "epoch": 2.089423373419673, "grad_norm": 0.04411375895142555, "learning_rate": 0.01, "loss": 2.0368, "step": 20328 }, { "epoch": 2.089731729879741, "grad_norm": 0.05151544511318207, "learning_rate": 0.01, "loss": 2.0539, "step": 20331 }, { "epoch": 2.0900400863398088, "grad_norm": 0.09836700558662415, "learning_rate": 0.01, "loss": 2.0353, "step": 20334 }, { "epoch": 2.090348442799877, "grad_norm": 0.06430090218782425, "learning_rate": 0.01, "loss": 2.043, "step": 20337 }, { "epoch": 2.0906567992599445, "grad_norm": 0.09683403372764587, "learning_rate": 0.01, "loss": 2.0459, "step": 20340 }, { "epoch": 2.090965155720012, "grad_norm": 0.08345566689968109, "learning_rate": 0.01, "loss": 2.0543, "step": 20343 }, { "epoch": 2.0912735121800803, "grad_norm": 0.045199088752269745, "learning_rate": 0.01, "loss": 2.0509, "step": 20346 }, { "epoch": 2.091581868640148, "grad_norm": 0.0399625338613987, "learning_rate": 0.01, "loss": 2.0256, "step": 20349 }, { "epoch": 2.091890225100216, "grad_norm": 0.03815968707203865, "learning_rate": 0.01, "loss": 2.0196, "step": 20352 }, { "epoch": 2.0921985815602837, "grad_norm": 0.054826896637678146, "learning_rate": 0.01, "loss": 2.035, "step": 20355 }, { "epoch": 2.0925069380203514, "grad_norm": 0.05717878043651581, "learning_rate": 0.01, "loss": 2.0613, "step": 20358 }, { "epoch": 2.0928152944804195, "grad_norm": 0.09222474694252014, "learning_rate": 0.01, "loss": 2.037, "step": 20361 }, { "epoch": 2.093123650940487, "grad_norm": 0.0983637347817421, "learning_rate": 0.01, "loss": 2.0652, "step": 20364 }, { "epoch": 2.0934320074005552, "grad_norm": 0.05338272079825401, "learning_rate": 0.01, "loss": 2.0448, "step": 20367 }, { "epoch": 2.093740363860623, "grad_norm": 0.047821927815675735, "learning_rate": 0.01, "loss": 2.0299, "step": 20370 }, { "epoch": 2.0940487203206906, "grad_norm": 0.10657365620136261, "learning_rate": 0.01, "loss": 2.0533, "step": 20373 }, { "epoch": 2.0943570767807587, "grad_norm": 0.08650174736976624, "learning_rate": 0.01, "loss": 2.0486, "step": 20376 }, { "epoch": 2.0946654332408263, "grad_norm": 0.06975332647562027, "learning_rate": 0.01, "loss": 2.0365, "step": 20379 }, { "epoch": 2.0949737897008944, "grad_norm": 0.08443387597799301, "learning_rate": 0.01, "loss": 2.0529, "step": 20382 }, { "epoch": 2.095282146160962, "grad_norm": 0.04376668483018875, "learning_rate": 0.01, "loss": 2.0238, "step": 20385 }, { "epoch": 2.0955905026210297, "grad_norm": 0.1501801759004593, "learning_rate": 0.01, "loss": 2.0461, "step": 20388 }, { "epoch": 2.095898859081098, "grad_norm": 0.08488426357507706, "learning_rate": 0.01, "loss": 2.0391, "step": 20391 }, { "epoch": 2.0962072155411655, "grad_norm": 0.05569930747151375, "learning_rate": 0.01, "loss": 2.0375, "step": 20394 }, { "epoch": 2.0965155720012336, "grad_norm": 0.059826262295246124, "learning_rate": 0.01, "loss": 2.0303, "step": 20397 }, { "epoch": 2.0968239284613013, "grad_norm": 0.0911981388926506, "learning_rate": 0.01, "loss": 2.0568, "step": 20400 }, { "epoch": 2.097132284921369, "grad_norm": 0.03921716660261154, "learning_rate": 0.01, "loss": 2.0296, "step": 20403 }, { "epoch": 2.097440641381437, "grad_norm": 0.07355164736509323, "learning_rate": 0.01, "loss": 2.0397, "step": 20406 }, { "epoch": 2.0977489978415047, "grad_norm": 0.031198322772979736, "learning_rate": 0.01, "loss": 2.0755, "step": 20409 }, { "epoch": 2.098057354301573, "grad_norm": 0.07405471056699753, "learning_rate": 0.01, "loss": 2.0174, "step": 20412 }, { "epoch": 2.0983657107616405, "grad_norm": 0.08160628378391266, "learning_rate": 0.01, "loss": 2.0509, "step": 20415 }, { "epoch": 2.098674067221708, "grad_norm": 0.11125149577856064, "learning_rate": 0.01, "loss": 2.0612, "step": 20418 }, { "epoch": 2.0989824236817762, "grad_norm": 0.04484894871711731, "learning_rate": 0.01, "loss": 2.061, "step": 20421 }, { "epoch": 2.099290780141844, "grad_norm": 0.07138056308031082, "learning_rate": 0.01, "loss": 2.0469, "step": 20424 }, { "epoch": 2.099599136601912, "grad_norm": 0.0794389471411705, "learning_rate": 0.01, "loss": 2.0423, "step": 20427 }, { "epoch": 2.0999074930619797, "grad_norm": 0.05673963576555252, "learning_rate": 0.01, "loss": 2.0708, "step": 20430 }, { "epoch": 2.1002158495220473, "grad_norm": 0.054527074098587036, "learning_rate": 0.01, "loss": 2.0304, "step": 20433 }, { "epoch": 2.1005242059821154, "grad_norm": 0.04964460805058479, "learning_rate": 0.01, "loss": 2.0302, "step": 20436 }, { "epoch": 2.100832562442183, "grad_norm": 0.07147829979658127, "learning_rate": 0.01, "loss": 2.0365, "step": 20439 }, { "epoch": 2.101140918902251, "grad_norm": 0.07474019378423691, "learning_rate": 0.01, "loss": 2.0559, "step": 20442 }, { "epoch": 2.101449275362319, "grad_norm": 0.06944689154624939, "learning_rate": 0.01, "loss": 2.0477, "step": 20445 }, { "epoch": 2.1017576318223865, "grad_norm": 0.07336383312940598, "learning_rate": 0.01, "loss": 2.0386, "step": 20448 }, { "epoch": 2.1020659882824546, "grad_norm": 0.08889491856098175, "learning_rate": 0.01, "loss": 2.0822, "step": 20451 }, { "epoch": 2.1023743447425223, "grad_norm": 0.07154878228902817, "learning_rate": 0.01, "loss": 2.0672, "step": 20454 }, { "epoch": 2.1026827012025904, "grad_norm": 0.0471792072057724, "learning_rate": 0.01, "loss": 2.026, "step": 20457 }, { "epoch": 2.102991057662658, "grad_norm": 0.10836900025606155, "learning_rate": 0.01, "loss": 2.037, "step": 20460 }, { "epoch": 2.1032994141227257, "grad_norm": 0.05292079225182533, "learning_rate": 0.01, "loss": 2.0145, "step": 20463 }, { "epoch": 2.103607770582794, "grad_norm": 0.13195812702178955, "learning_rate": 0.01, "loss": 2.0426, "step": 20466 }, { "epoch": 2.1039161270428615, "grad_norm": 0.1316298395395279, "learning_rate": 0.01, "loss": 2.0179, "step": 20469 }, { "epoch": 2.1042244835029296, "grad_norm": 0.04061713069677353, "learning_rate": 0.01, "loss": 2.0304, "step": 20472 }, { "epoch": 2.1045328399629972, "grad_norm": 0.09250857681035995, "learning_rate": 0.01, "loss": 2.0596, "step": 20475 }, { "epoch": 2.104841196423065, "grad_norm": 0.058364611119031906, "learning_rate": 0.01, "loss": 2.0442, "step": 20478 }, { "epoch": 2.105149552883133, "grad_norm": 0.046974651515483856, "learning_rate": 0.01, "loss": 2.0439, "step": 20481 }, { "epoch": 2.1054579093432007, "grad_norm": 0.04835136979818344, "learning_rate": 0.01, "loss": 2.044, "step": 20484 }, { "epoch": 2.1057662658032688, "grad_norm": 0.04643654450774193, "learning_rate": 0.01, "loss": 2.0276, "step": 20487 }, { "epoch": 2.1060746222633364, "grad_norm": 0.10667752474546432, "learning_rate": 0.01, "loss": 2.0356, "step": 20490 }, { "epoch": 2.106382978723404, "grad_norm": 0.07521391659975052, "learning_rate": 0.01, "loss": 2.0552, "step": 20493 }, { "epoch": 2.106691335183472, "grad_norm": 0.10269229114055634, "learning_rate": 0.01, "loss": 2.0452, "step": 20496 }, { "epoch": 2.10699969164354, "grad_norm": 0.040783487260341644, "learning_rate": 0.01, "loss": 2.0567, "step": 20499 }, { "epoch": 2.107308048103608, "grad_norm": 0.05012373626232147, "learning_rate": 0.01, "loss": 2.0359, "step": 20502 }, { "epoch": 2.1076164045636756, "grad_norm": 0.042675044387578964, "learning_rate": 0.01, "loss": 2.0367, "step": 20505 }, { "epoch": 2.1079247610237433, "grad_norm": 0.06164225935935974, "learning_rate": 0.01, "loss": 2.0324, "step": 20508 }, { "epoch": 2.1082331174838114, "grad_norm": 0.06368019431829453, "learning_rate": 0.01, "loss": 2.0228, "step": 20511 }, { "epoch": 2.108541473943879, "grad_norm": 0.049319278448820114, "learning_rate": 0.01, "loss": 2.0443, "step": 20514 }, { "epoch": 2.108849830403947, "grad_norm": 0.06070362776517868, "learning_rate": 0.01, "loss": 2.045, "step": 20517 }, { "epoch": 2.109158186864015, "grad_norm": 0.07560203224420547, "learning_rate": 0.01, "loss": 2.0469, "step": 20520 }, { "epoch": 2.1094665433240825, "grad_norm": 0.05563758686184883, "learning_rate": 0.01, "loss": 2.0415, "step": 20523 }, { "epoch": 2.1097748997841506, "grad_norm": 0.04349389672279358, "learning_rate": 0.01, "loss": 2.0396, "step": 20526 }, { "epoch": 2.110083256244218, "grad_norm": 0.05849798396229744, "learning_rate": 0.01, "loss": 2.0235, "step": 20529 }, { "epoch": 2.1103916127042863, "grad_norm": 0.06813669949769974, "learning_rate": 0.01, "loss": 2.0541, "step": 20532 }, { "epoch": 2.110699969164354, "grad_norm": 0.08951954543590546, "learning_rate": 0.01, "loss": 2.0232, "step": 20535 }, { "epoch": 2.1110083256244216, "grad_norm": 0.08673957735300064, "learning_rate": 0.01, "loss": 2.0602, "step": 20538 }, { "epoch": 2.1113166820844897, "grad_norm": 0.06135937571525574, "learning_rate": 0.01, "loss": 2.0416, "step": 20541 }, { "epoch": 2.1116250385445574, "grad_norm": 0.08979734778404236, "learning_rate": 0.01, "loss": 2.0504, "step": 20544 }, { "epoch": 2.1119333950046255, "grad_norm": 0.07878229022026062, "learning_rate": 0.01, "loss": 2.0194, "step": 20547 }, { "epoch": 2.112241751464693, "grad_norm": 0.07120572775602341, "learning_rate": 0.01, "loss": 2.0414, "step": 20550 }, { "epoch": 2.112550107924761, "grad_norm": 0.11559943854808807, "learning_rate": 0.01, "loss": 2.0334, "step": 20553 }, { "epoch": 2.112858464384829, "grad_norm": 0.09005344659090042, "learning_rate": 0.01, "loss": 2.0457, "step": 20556 }, { "epoch": 2.1131668208448966, "grad_norm": 0.05143802613019943, "learning_rate": 0.01, "loss": 2.0364, "step": 20559 }, { "epoch": 2.1134751773049647, "grad_norm": 0.03870050981640816, "learning_rate": 0.01, "loss": 2.0216, "step": 20562 }, { "epoch": 2.1137835337650324, "grad_norm": 0.06471268832683563, "learning_rate": 0.01, "loss": 2.0116, "step": 20565 }, { "epoch": 2.1140918902251, "grad_norm": 0.07047493010759354, "learning_rate": 0.01, "loss": 2.052, "step": 20568 }, { "epoch": 2.114400246685168, "grad_norm": 0.08189850300550461, "learning_rate": 0.01, "loss": 2.012, "step": 20571 }, { "epoch": 2.114708603145236, "grad_norm": 0.04041110724210739, "learning_rate": 0.01, "loss": 2.0604, "step": 20574 }, { "epoch": 2.115016959605304, "grad_norm": 0.06573761254549026, "learning_rate": 0.01, "loss": 2.0466, "step": 20577 }, { "epoch": 2.1153253160653716, "grad_norm": 0.06190131977200508, "learning_rate": 0.01, "loss": 2.0475, "step": 20580 }, { "epoch": 2.115633672525439, "grad_norm": 0.044485028833150864, "learning_rate": 0.01, "loss": 2.0336, "step": 20583 }, { "epoch": 2.1159420289855073, "grad_norm": 0.03833581507205963, "learning_rate": 0.01, "loss": 2.0446, "step": 20586 }, { "epoch": 2.116250385445575, "grad_norm": 0.07705090194940567, "learning_rate": 0.01, "loss": 2.0546, "step": 20589 }, { "epoch": 2.116558741905643, "grad_norm": 0.12635158002376556, "learning_rate": 0.01, "loss": 2.0399, "step": 20592 }, { "epoch": 2.1168670983657107, "grad_norm": 0.06076742708683014, "learning_rate": 0.01, "loss": 2.0409, "step": 20595 }, { "epoch": 2.1171754548257784, "grad_norm": 0.06634259968996048, "learning_rate": 0.01, "loss": 2.0603, "step": 20598 }, { "epoch": 2.1174838112858465, "grad_norm": 0.06167810782790184, "learning_rate": 0.01, "loss": 2.0404, "step": 20601 }, { "epoch": 2.117792167745914, "grad_norm": 0.0474831759929657, "learning_rate": 0.01, "loss": 2.0413, "step": 20604 }, { "epoch": 2.1181005242059823, "grad_norm": 0.04799888655543327, "learning_rate": 0.01, "loss": 2.0628, "step": 20607 }, { "epoch": 2.11840888066605, "grad_norm": 0.055144913494586945, "learning_rate": 0.01, "loss": 2.0126, "step": 20610 }, { "epoch": 2.1187172371261176, "grad_norm": 0.1116517037153244, "learning_rate": 0.01, "loss": 2.0467, "step": 20613 }, { "epoch": 2.1190255935861857, "grad_norm": 0.06136411800980568, "learning_rate": 0.01, "loss": 2.0489, "step": 20616 }, { "epoch": 2.1193339500462534, "grad_norm": 0.09005284309387207, "learning_rate": 0.01, "loss": 2.0185, "step": 20619 }, { "epoch": 2.1196423065063215, "grad_norm": 0.19345355033874512, "learning_rate": 0.01, "loss": 2.0588, "step": 20622 }, { "epoch": 2.119950662966389, "grad_norm": 0.14011520147323608, "learning_rate": 0.01, "loss": 2.0264, "step": 20625 }, { "epoch": 2.120259019426457, "grad_norm": 0.06036897376179695, "learning_rate": 0.01, "loss": 2.0566, "step": 20628 }, { "epoch": 2.120567375886525, "grad_norm": 0.05589490756392479, "learning_rate": 0.01, "loss": 2.0455, "step": 20631 }, { "epoch": 2.1208757323465925, "grad_norm": 0.07571965456008911, "learning_rate": 0.01, "loss": 2.0403, "step": 20634 }, { "epoch": 2.1211840888066607, "grad_norm": 0.05558032542467117, "learning_rate": 0.01, "loss": 2.0373, "step": 20637 }, { "epoch": 2.1214924452667283, "grad_norm": 0.048844993114471436, "learning_rate": 0.01, "loss": 2.0209, "step": 20640 }, { "epoch": 2.121800801726796, "grad_norm": 0.04454483836889267, "learning_rate": 0.01, "loss": 2.0431, "step": 20643 }, { "epoch": 2.122109158186864, "grad_norm": 0.04565678536891937, "learning_rate": 0.01, "loss": 2.0498, "step": 20646 }, { "epoch": 2.1224175146469317, "grad_norm": 0.04082358628511429, "learning_rate": 0.01, "loss": 2.0201, "step": 20649 }, { "epoch": 2.122725871107, "grad_norm": 0.03753536194562912, "learning_rate": 0.01, "loss": 2.0324, "step": 20652 }, { "epoch": 2.1230342275670675, "grad_norm": 0.04898180440068245, "learning_rate": 0.01, "loss": 2.0503, "step": 20655 }, { "epoch": 2.123342584027135, "grad_norm": 0.0748276561498642, "learning_rate": 0.01, "loss": 2.0632, "step": 20658 }, { "epoch": 2.1236509404872033, "grad_norm": 0.19312037527561188, "learning_rate": 0.01, "loss": 2.0409, "step": 20661 }, { "epoch": 2.123959296947271, "grad_norm": 0.1444995105266571, "learning_rate": 0.01, "loss": 2.0713, "step": 20664 }, { "epoch": 2.124267653407339, "grad_norm": 0.03868752345442772, "learning_rate": 0.01, "loss": 2.045, "step": 20667 }, { "epoch": 2.1245760098674067, "grad_norm": 0.04851067438721657, "learning_rate": 0.01, "loss": 2.0347, "step": 20670 }, { "epoch": 2.1248843663274743, "grad_norm": 0.03709336742758751, "learning_rate": 0.01, "loss": 2.0288, "step": 20673 }, { "epoch": 2.1251927227875425, "grad_norm": 0.03498173505067825, "learning_rate": 0.01, "loss": 2.0262, "step": 20676 }, { "epoch": 2.12550107924761, "grad_norm": 0.04838285222649574, "learning_rate": 0.01, "loss": 2.0391, "step": 20679 }, { "epoch": 2.125809435707678, "grad_norm": 0.04562096297740936, "learning_rate": 0.01, "loss": 2.0226, "step": 20682 }, { "epoch": 2.126117792167746, "grad_norm": 0.06128044053912163, "learning_rate": 0.01, "loss": 2.0452, "step": 20685 }, { "epoch": 2.1264261486278135, "grad_norm": 0.04903801530599594, "learning_rate": 0.01, "loss": 2.049, "step": 20688 }, { "epoch": 2.1267345050878816, "grad_norm": 0.038953814655542374, "learning_rate": 0.01, "loss": 2.0559, "step": 20691 }, { "epoch": 2.1270428615479493, "grad_norm": 0.06137779355049133, "learning_rate": 0.01, "loss": 2.0458, "step": 20694 }, { "epoch": 2.1273512180080174, "grad_norm": 0.11357403546571732, "learning_rate": 0.01, "loss": 2.0572, "step": 20697 }, { "epoch": 2.127659574468085, "grad_norm": 0.16223950684070587, "learning_rate": 0.01, "loss": 2.0158, "step": 20700 }, { "epoch": 2.127967930928153, "grad_norm": 0.06391070038080215, "learning_rate": 0.01, "loss": 2.0682, "step": 20703 }, { "epoch": 2.128276287388221, "grad_norm": 0.03951489180326462, "learning_rate": 0.01, "loss": 2.0645, "step": 20706 }, { "epoch": 2.1285846438482885, "grad_norm": 0.03921591490507126, "learning_rate": 0.01, "loss": 2.0538, "step": 20709 }, { "epoch": 2.1288930003083566, "grad_norm": 0.0479004830121994, "learning_rate": 0.01, "loss": 2.0587, "step": 20712 }, { "epoch": 2.1292013567684243, "grad_norm": 0.04370121285319328, "learning_rate": 0.01, "loss": 2.0249, "step": 20715 }, { "epoch": 2.129509713228492, "grad_norm": 0.05750906467437744, "learning_rate": 0.01, "loss": 2.0452, "step": 20718 }, { "epoch": 2.12981806968856, "grad_norm": 0.06549614667892456, "learning_rate": 0.01, "loss": 2.0173, "step": 20721 }, { "epoch": 2.1301264261486277, "grad_norm": 0.05763638764619827, "learning_rate": 0.01, "loss": 2.0315, "step": 20724 }, { "epoch": 2.130434782608696, "grad_norm": 0.16448546946048737, "learning_rate": 0.01, "loss": 2.0352, "step": 20727 }, { "epoch": 2.1307431390687634, "grad_norm": 0.057152118533849716, "learning_rate": 0.01, "loss": 2.0361, "step": 20730 }, { "epoch": 2.1310514955288316, "grad_norm": 0.054196059703826904, "learning_rate": 0.01, "loss": 2.0387, "step": 20733 }, { "epoch": 2.131359851988899, "grad_norm": 0.03887069597840309, "learning_rate": 0.01, "loss": 2.031, "step": 20736 }, { "epoch": 2.131668208448967, "grad_norm": 0.03529683127999306, "learning_rate": 0.01, "loss": 2.0466, "step": 20739 }, { "epoch": 2.131976564909035, "grad_norm": 0.053463347256183624, "learning_rate": 0.01, "loss": 2.011, "step": 20742 }, { "epoch": 2.1322849213691026, "grad_norm": 0.03482777252793312, "learning_rate": 0.01, "loss": 2.0421, "step": 20745 }, { "epoch": 2.1325932778291703, "grad_norm": 0.056043028831481934, "learning_rate": 0.01, "loss": 2.0442, "step": 20748 }, { "epoch": 2.1329016342892384, "grad_norm": 0.04648544266819954, "learning_rate": 0.01, "loss": 2.0502, "step": 20751 }, { "epoch": 2.133209990749306, "grad_norm": 0.0878090187907219, "learning_rate": 0.01, "loss": 2.0092, "step": 20754 }, { "epoch": 2.133518347209374, "grad_norm": 0.059173766523599625, "learning_rate": 0.01, "loss": 2.0469, "step": 20757 }, { "epoch": 2.133826703669442, "grad_norm": 0.09119824320077896, "learning_rate": 0.01, "loss": 2.0448, "step": 20760 }, { "epoch": 2.13413506012951, "grad_norm": 0.04082552343606949, "learning_rate": 0.01, "loss": 2.0501, "step": 20763 }, { "epoch": 2.1344434165895776, "grad_norm": 0.0831313356757164, "learning_rate": 0.01, "loss": 2.0293, "step": 20766 }, { "epoch": 2.1347517730496453, "grad_norm": 0.0635252296924591, "learning_rate": 0.01, "loss": 2.0495, "step": 20769 }, { "epoch": 2.1350601295097134, "grad_norm": 0.09516814351081848, "learning_rate": 0.01, "loss": 2.0284, "step": 20772 }, { "epoch": 2.135368485969781, "grad_norm": 0.05734236165881157, "learning_rate": 0.01, "loss": 2.0341, "step": 20775 }, { "epoch": 2.1356768424298487, "grad_norm": 0.07606332749128342, "learning_rate": 0.01, "loss": 2.0388, "step": 20778 }, { "epoch": 2.135985198889917, "grad_norm": 0.041968777775764465, "learning_rate": 0.01, "loss": 2.0172, "step": 20781 }, { "epoch": 2.1362935553499844, "grad_norm": 0.10549111664295197, "learning_rate": 0.01, "loss": 2.0695, "step": 20784 }, { "epoch": 2.1366019118100525, "grad_norm": 0.0783652663230896, "learning_rate": 0.01, "loss": 2.0416, "step": 20787 }, { "epoch": 2.13691026827012, "grad_norm": 0.08309295773506165, "learning_rate": 0.01, "loss": 2.0455, "step": 20790 }, { "epoch": 2.1372186247301883, "grad_norm": 0.038408368825912476, "learning_rate": 0.01, "loss": 2.0298, "step": 20793 }, { "epoch": 2.137526981190256, "grad_norm": 0.1311808079481125, "learning_rate": 0.01, "loss": 2.0608, "step": 20796 }, { "epoch": 2.1378353376503236, "grad_norm": 0.08983028680086136, "learning_rate": 0.01, "loss": 2.0357, "step": 20799 }, { "epoch": 2.1381436941103917, "grad_norm": 0.12422356754541397, "learning_rate": 0.01, "loss": 2.0443, "step": 20802 }, { "epoch": 2.1384520505704594, "grad_norm": 0.0687076672911644, "learning_rate": 0.01, "loss": 2.0224, "step": 20805 }, { "epoch": 2.138760407030527, "grad_norm": 0.04577179625630379, "learning_rate": 0.01, "loss": 2.0195, "step": 20808 }, { "epoch": 2.139068763490595, "grad_norm": 0.046294886618852615, "learning_rate": 0.01, "loss": 2.028, "step": 20811 }, { "epoch": 2.139377119950663, "grad_norm": 0.048477739095687866, "learning_rate": 0.01, "loss": 2.055, "step": 20814 }, { "epoch": 2.139685476410731, "grad_norm": 0.035206399857997894, "learning_rate": 0.01, "loss": 2.0242, "step": 20817 }, { "epoch": 2.1399938328707986, "grad_norm": 0.0451078936457634, "learning_rate": 0.01, "loss": 2.0316, "step": 20820 }, { "epoch": 2.1403021893308667, "grad_norm": 0.09469619393348694, "learning_rate": 0.01, "loss": 2.0259, "step": 20823 }, { "epoch": 2.1406105457909343, "grad_norm": 0.03966006636619568, "learning_rate": 0.01, "loss": 2.0427, "step": 20826 }, { "epoch": 2.140918902251002, "grad_norm": 0.0793539360165596, "learning_rate": 0.01, "loss": 2.0469, "step": 20829 }, { "epoch": 2.14122725871107, "grad_norm": 0.08547502756118774, "learning_rate": 0.01, "loss": 2.0189, "step": 20832 }, { "epoch": 2.1415356151711378, "grad_norm": 0.055916544049978256, "learning_rate": 0.01, "loss": 2.042, "step": 20835 }, { "epoch": 2.141843971631206, "grad_norm": 0.0369451642036438, "learning_rate": 0.01, "loss": 2.0346, "step": 20838 }, { "epoch": 2.1421523280912735, "grad_norm": 0.05001531541347504, "learning_rate": 0.01, "loss": 2.06, "step": 20841 }, { "epoch": 2.142460684551341, "grad_norm": 0.09491688758134842, "learning_rate": 0.01, "loss": 2.0092, "step": 20844 }, { "epoch": 2.1427690410114093, "grad_norm": 0.05615471303462982, "learning_rate": 0.01, "loss": 2.0575, "step": 20847 }, { "epoch": 2.143077397471477, "grad_norm": 0.08531193435192108, "learning_rate": 0.01, "loss": 2.0562, "step": 20850 }, { "epoch": 2.143385753931545, "grad_norm": 0.08138839155435562, "learning_rate": 0.01, "loss": 2.0482, "step": 20853 }, { "epoch": 2.1436941103916127, "grad_norm": 0.04965364560484886, "learning_rate": 0.01, "loss": 2.0487, "step": 20856 }, { "epoch": 2.1440024668516804, "grad_norm": 0.07006839662790298, "learning_rate": 0.01, "loss": 2.0469, "step": 20859 }, { "epoch": 2.1443108233117485, "grad_norm": 0.07477371394634247, "learning_rate": 0.01, "loss": 2.0452, "step": 20862 }, { "epoch": 2.144619179771816, "grad_norm": 0.07817060500383377, "learning_rate": 0.01, "loss": 2.0239, "step": 20865 }, { "epoch": 2.1449275362318843, "grad_norm": 0.04393570497632027, "learning_rate": 0.01, "loss": 2.0261, "step": 20868 }, { "epoch": 2.145235892691952, "grad_norm": 0.0777692198753357, "learning_rate": 0.01, "loss": 2.0313, "step": 20871 }, { "epoch": 2.1455442491520196, "grad_norm": 0.05267953500151634, "learning_rate": 0.01, "loss": 2.0496, "step": 20874 }, { "epoch": 2.1458526056120877, "grad_norm": 0.053839076310396194, "learning_rate": 0.01, "loss": 2.0593, "step": 20877 }, { "epoch": 2.1461609620721553, "grad_norm": 0.05793678015470505, "learning_rate": 0.01, "loss": 2.0333, "step": 20880 }, { "epoch": 2.1464693185322234, "grad_norm": 0.045539624989032745, "learning_rate": 0.01, "loss": 2.0504, "step": 20883 }, { "epoch": 2.146777674992291, "grad_norm": 0.062209442257881165, "learning_rate": 0.01, "loss": 2.0273, "step": 20886 }, { "epoch": 2.1470860314523588, "grad_norm": 0.11053802073001862, "learning_rate": 0.01, "loss": 2.0764, "step": 20889 }, { "epoch": 2.147394387912427, "grad_norm": 0.037597738206386566, "learning_rate": 0.01, "loss": 2.046, "step": 20892 }, { "epoch": 2.1477027443724945, "grad_norm": 0.050160784274339676, "learning_rate": 0.01, "loss": 2.0797, "step": 20895 }, { "epoch": 2.1480111008325626, "grad_norm": 0.053438689559698105, "learning_rate": 0.01, "loss": 2.0611, "step": 20898 }, { "epoch": 2.1483194572926303, "grad_norm": 0.06175898388028145, "learning_rate": 0.01, "loss": 2.0558, "step": 20901 }, { "epoch": 2.148627813752698, "grad_norm": 0.062430836260318756, "learning_rate": 0.01, "loss": 2.0405, "step": 20904 }, { "epoch": 2.148936170212766, "grad_norm": 0.09347888082265854, "learning_rate": 0.01, "loss": 2.0482, "step": 20907 }, { "epoch": 2.1492445266728337, "grad_norm": 0.13494189083576202, "learning_rate": 0.01, "loss": 2.0483, "step": 20910 }, { "epoch": 2.149552883132902, "grad_norm": 0.06735777854919434, "learning_rate": 0.01, "loss": 2.0799, "step": 20913 }, { "epoch": 2.1498612395929695, "grad_norm": 0.08108525723218918, "learning_rate": 0.01, "loss": 2.036, "step": 20916 }, { "epoch": 2.150169596053037, "grad_norm": 0.043221328407526016, "learning_rate": 0.01, "loss": 2.0401, "step": 20919 }, { "epoch": 2.1504779525131053, "grad_norm": 0.046568017452955246, "learning_rate": 0.01, "loss": 2.0393, "step": 20922 }, { "epoch": 2.150786308973173, "grad_norm": 0.040786582976579666, "learning_rate": 0.01, "loss": 2.0527, "step": 20925 }, { "epoch": 2.151094665433241, "grad_norm": 0.043856412172317505, "learning_rate": 0.01, "loss": 2.0405, "step": 20928 }, { "epoch": 2.1514030218933087, "grad_norm": 0.03569160774350166, "learning_rate": 0.01, "loss": 2.0431, "step": 20931 }, { "epoch": 2.1517113783533763, "grad_norm": 0.059598151594400406, "learning_rate": 0.01, "loss": 2.0316, "step": 20934 }, { "epoch": 2.1520197348134444, "grad_norm": 0.12518006563186646, "learning_rate": 0.01, "loss": 2.0389, "step": 20937 }, { "epoch": 2.152328091273512, "grad_norm": 0.04442603886127472, "learning_rate": 0.01, "loss": 2.0413, "step": 20940 }, { "epoch": 2.15263644773358, "grad_norm": 0.1175926923751831, "learning_rate": 0.01, "loss": 2.0307, "step": 20943 }, { "epoch": 2.152944804193648, "grad_norm": 0.09045865386724472, "learning_rate": 0.01, "loss": 2.0291, "step": 20946 }, { "epoch": 2.1532531606537155, "grad_norm": 0.07519534230232239, "learning_rate": 0.01, "loss": 2.048, "step": 20949 }, { "epoch": 2.1535615171137836, "grad_norm": 0.03551176190376282, "learning_rate": 0.01, "loss": 2.0396, "step": 20952 }, { "epoch": 2.1538698735738513, "grad_norm": 0.11210478097200394, "learning_rate": 0.01, "loss": 2.0453, "step": 20955 }, { "epoch": 2.1541782300339194, "grad_norm": 0.09921340644359589, "learning_rate": 0.01, "loss": 2.0295, "step": 20958 }, { "epoch": 2.154486586493987, "grad_norm": 0.04131443426012993, "learning_rate": 0.01, "loss": 2.0295, "step": 20961 }, { "epoch": 2.1547949429540547, "grad_norm": 0.07579990476369858, "learning_rate": 0.01, "loss": 2.041, "step": 20964 }, { "epoch": 2.155103299414123, "grad_norm": 0.04955494403839111, "learning_rate": 0.01, "loss": 2.0409, "step": 20967 }, { "epoch": 2.1554116558741905, "grad_norm": 0.038999974727630615, "learning_rate": 0.01, "loss": 2.0267, "step": 20970 }, { "epoch": 2.1557200123342586, "grad_norm": 0.04656311497092247, "learning_rate": 0.01, "loss": 2.0319, "step": 20973 }, { "epoch": 2.1560283687943262, "grad_norm": 0.08853477984666824, "learning_rate": 0.01, "loss": 2.0573, "step": 20976 }, { "epoch": 2.156336725254394, "grad_norm": 0.07657559216022491, "learning_rate": 0.01, "loss": 2.0455, "step": 20979 }, { "epoch": 2.156645081714462, "grad_norm": 0.03593476489186287, "learning_rate": 0.01, "loss": 2.0365, "step": 20982 }, { "epoch": 2.1569534381745297, "grad_norm": 0.08656775951385498, "learning_rate": 0.01, "loss": 2.0286, "step": 20985 }, { "epoch": 2.1572617946345978, "grad_norm": 0.07760220021009445, "learning_rate": 0.01, "loss": 2.068, "step": 20988 }, { "epoch": 2.1575701510946654, "grad_norm": 0.09167666733264923, "learning_rate": 0.01, "loss": 2.0165, "step": 20991 }, { "epoch": 2.157878507554733, "grad_norm": 0.052361227571964264, "learning_rate": 0.01, "loss": 2.0424, "step": 20994 }, { "epoch": 2.158186864014801, "grad_norm": 0.05431961268186569, "learning_rate": 0.01, "loss": 2.0475, "step": 20997 }, { "epoch": 2.158495220474869, "grad_norm": 0.03956061974167824, "learning_rate": 0.01, "loss": 2.0299, "step": 21000 }, { "epoch": 2.158803576934937, "grad_norm": 0.03906402364373207, "learning_rate": 0.01, "loss": 2.0442, "step": 21003 }, { "epoch": 2.1591119333950046, "grad_norm": 0.07876679301261902, "learning_rate": 0.01, "loss": 2.0369, "step": 21006 }, { "epoch": 2.1594202898550723, "grad_norm": 0.051768235862255096, "learning_rate": 0.01, "loss": 2.0404, "step": 21009 }, { "epoch": 2.1597286463151404, "grad_norm": 0.05242472141981125, "learning_rate": 0.01, "loss": 2.0514, "step": 21012 }, { "epoch": 2.160037002775208, "grad_norm": 0.07014864683151245, "learning_rate": 0.01, "loss": 2.0285, "step": 21015 }, { "epoch": 2.160345359235276, "grad_norm": 0.1024969145655632, "learning_rate": 0.01, "loss": 2.031, "step": 21018 }, { "epoch": 2.160653715695344, "grad_norm": 0.059565525501966476, "learning_rate": 0.01, "loss": 2.0334, "step": 21021 }, { "epoch": 2.1609620721554115, "grad_norm": 0.04438649117946625, "learning_rate": 0.01, "loss": 2.0736, "step": 21024 }, { "epoch": 2.1612704286154796, "grad_norm": 0.06552638858556747, "learning_rate": 0.01, "loss": 2.0449, "step": 21027 }, { "epoch": 2.1615787850755472, "grad_norm": 0.044327035546302795, "learning_rate": 0.01, "loss": 2.0512, "step": 21030 }, { "epoch": 2.1618871415356153, "grad_norm": 0.04915094003081322, "learning_rate": 0.01, "loss": 2.0634, "step": 21033 }, { "epoch": 2.162195497995683, "grad_norm": 0.052909743040800095, "learning_rate": 0.01, "loss": 2.0408, "step": 21036 }, { "epoch": 2.1625038544557507, "grad_norm": 0.09883973747491837, "learning_rate": 0.01, "loss": 2.0107, "step": 21039 }, { "epoch": 2.1628122109158188, "grad_norm": 0.07719819992780685, "learning_rate": 0.01, "loss": 2.0357, "step": 21042 }, { "epoch": 2.1631205673758864, "grad_norm": 0.09743590652942657, "learning_rate": 0.01, "loss": 2.0263, "step": 21045 }, { "epoch": 2.1634289238359545, "grad_norm": 0.06583153456449509, "learning_rate": 0.01, "loss": 2.0249, "step": 21048 }, { "epoch": 2.163737280296022, "grad_norm": 0.10464660078287125, "learning_rate": 0.01, "loss": 2.0154, "step": 21051 }, { "epoch": 2.16404563675609, "grad_norm": 0.057555560022592545, "learning_rate": 0.01, "loss": 2.0545, "step": 21054 }, { "epoch": 2.164353993216158, "grad_norm": 0.04753732308745384, "learning_rate": 0.01, "loss": 2.054, "step": 21057 }, { "epoch": 2.1646623496762256, "grad_norm": 0.05236852541565895, "learning_rate": 0.01, "loss": 2.0336, "step": 21060 }, { "epoch": 2.1649707061362937, "grad_norm": 0.05231897532939911, "learning_rate": 0.01, "loss": 2.0303, "step": 21063 }, { "epoch": 2.1652790625963614, "grad_norm": 0.058431778103113174, "learning_rate": 0.01, "loss": 2.0387, "step": 21066 }, { "epoch": 2.165587419056429, "grad_norm": 0.04048459604382515, "learning_rate": 0.01, "loss": 2.0317, "step": 21069 }, { "epoch": 2.165895775516497, "grad_norm": 0.0941488966345787, "learning_rate": 0.01, "loss": 2.0113, "step": 21072 }, { "epoch": 2.166204131976565, "grad_norm": 0.04498032480478287, "learning_rate": 0.01, "loss": 2.0178, "step": 21075 }, { "epoch": 2.166512488436633, "grad_norm": 0.03668253496289253, "learning_rate": 0.01, "loss": 2.0354, "step": 21078 }, { "epoch": 2.1668208448967006, "grad_norm": 0.06661085039377213, "learning_rate": 0.01, "loss": 2.0221, "step": 21081 }, { "epoch": 2.1671292013567682, "grad_norm": 0.06425791233778, "learning_rate": 0.01, "loss": 2.0301, "step": 21084 }, { "epoch": 2.1674375578168363, "grad_norm": 0.046152301132678986, "learning_rate": 0.01, "loss": 2.0287, "step": 21087 }, { "epoch": 2.167745914276904, "grad_norm": 0.12584535777568817, "learning_rate": 0.01, "loss": 2.0364, "step": 21090 }, { "epoch": 2.168054270736972, "grad_norm": 0.04133312404155731, "learning_rate": 0.01, "loss": 2.0403, "step": 21093 }, { "epoch": 2.1683626271970398, "grad_norm": 0.04181768745183945, "learning_rate": 0.01, "loss": 2.0241, "step": 21096 }, { "epoch": 2.1686709836571074, "grad_norm": 0.04648204892873764, "learning_rate": 0.01, "loss": 2.0307, "step": 21099 }, { "epoch": 2.1689793401171755, "grad_norm": 0.044040024280548096, "learning_rate": 0.01, "loss": 2.0495, "step": 21102 }, { "epoch": 2.169287696577243, "grad_norm": 0.04033771902322769, "learning_rate": 0.01, "loss": 2.0496, "step": 21105 }, { "epoch": 2.1695960530373113, "grad_norm": 0.04693743214011192, "learning_rate": 0.01, "loss": 2.0492, "step": 21108 }, { "epoch": 2.169904409497379, "grad_norm": 0.07494813948869705, "learning_rate": 0.01, "loss": 2.0462, "step": 21111 }, { "epoch": 2.1702127659574466, "grad_norm": 0.09254445135593414, "learning_rate": 0.01, "loss": 2.0478, "step": 21114 }, { "epoch": 2.1705211224175147, "grad_norm": 0.13350296020507812, "learning_rate": 0.01, "loss": 2.0277, "step": 21117 }, { "epoch": 2.1708294788775824, "grad_norm": 0.051686085760593414, "learning_rate": 0.01, "loss": 2.0507, "step": 21120 }, { "epoch": 2.1711378353376505, "grad_norm": 0.043330930173397064, "learning_rate": 0.01, "loss": 2.0261, "step": 21123 }, { "epoch": 2.171446191797718, "grad_norm": 0.05219477042555809, "learning_rate": 0.01, "loss": 2.0299, "step": 21126 }, { "epoch": 2.171754548257786, "grad_norm": 0.04195651784539223, "learning_rate": 0.01, "loss": 2.0441, "step": 21129 }, { "epoch": 2.172062904717854, "grad_norm": 0.15802520513534546, "learning_rate": 0.01, "loss": 2.0609, "step": 21132 }, { "epoch": 2.1723712611779216, "grad_norm": 0.08619748800992966, "learning_rate": 0.01, "loss": 2.0542, "step": 21135 }, { "epoch": 2.1726796176379897, "grad_norm": 0.08852280676364899, "learning_rate": 0.01, "loss": 2.0287, "step": 21138 }, { "epoch": 2.1729879740980573, "grad_norm": 0.040646422654390335, "learning_rate": 0.01, "loss": 2.0289, "step": 21141 }, { "epoch": 2.173296330558125, "grad_norm": 0.06913924962282181, "learning_rate": 0.01, "loss": 2.0463, "step": 21144 }, { "epoch": 2.173604687018193, "grad_norm": 0.05461576208472252, "learning_rate": 0.01, "loss": 2.0382, "step": 21147 }, { "epoch": 2.1739130434782608, "grad_norm": 0.08613748103380203, "learning_rate": 0.01, "loss": 2.0412, "step": 21150 }, { "epoch": 2.174221399938329, "grad_norm": 0.06459856033325195, "learning_rate": 0.01, "loss": 2.0445, "step": 21153 }, { "epoch": 2.1745297563983965, "grad_norm": 0.08450223505496979, "learning_rate": 0.01, "loss": 2.0237, "step": 21156 }, { "epoch": 2.174838112858464, "grad_norm": 0.10049585998058319, "learning_rate": 0.01, "loss": 2.0458, "step": 21159 }, { "epoch": 2.1751464693185323, "grad_norm": 0.06031005084514618, "learning_rate": 0.01, "loss": 2.0668, "step": 21162 }, { "epoch": 2.1754548257786, "grad_norm": 0.06921012699604034, "learning_rate": 0.01, "loss": 2.053, "step": 21165 }, { "epoch": 2.175763182238668, "grad_norm": 0.044479697942733765, "learning_rate": 0.01, "loss": 2.0275, "step": 21168 }, { "epoch": 2.1760715386987357, "grad_norm": 0.0857187807559967, "learning_rate": 0.01, "loss": 2.0513, "step": 21171 }, { "epoch": 2.176379895158804, "grad_norm": 0.057432882487773895, "learning_rate": 0.01, "loss": 2.0622, "step": 21174 }, { "epoch": 2.1766882516188715, "grad_norm": 0.0905427411198616, "learning_rate": 0.01, "loss": 2.0574, "step": 21177 }, { "epoch": 2.176996608078939, "grad_norm": 0.05289644002914429, "learning_rate": 0.01, "loss": 2.0348, "step": 21180 }, { "epoch": 2.1773049645390072, "grad_norm": 0.06351148337125778, "learning_rate": 0.01, "loss": 2.0223, "step": 21183 }, { "epoch": 2.177613320999075, "grad_norm": 0.1098824068903923, "learning_rate": 0.01, "loss": 2.0534, "step": 21186 }, { "epoch": 2.1779216774591426, "grad_norm": 0.03698734566569328, "learning_rate": 0.01, "loss": 2.0265, "step": 21189 }, { "epoch": 2.1782300339192107, "grad_norm": 0.09595025330781937, "learning_rate": 0.01, "loss": 2.0596, "step": 21192 }, { "epoch": 2.1785383903792783, "grad_norm": 0.05725647136569023, "learning_rate": 0.01, "loss": 2.0568, "step": 21195 }, { "epoch": 2.1788467468393464, "grad_norm": 0.06952492892742157, "learning_rate": 0.01, "loss": 2.0119, "step": 21198 }, { "epoch": 2.179155103299414, "grad_norm": 0.06831461936235428, "learning_rate": 0.01, "loss": 2.0484, "step": 21201 }, { "epoch": 2.179463459759482, "grad_norm": 0.05125569924712181, "learning_rate": 0.01, "loss": 2.0593, "step": 21204 }, { "epoch": 2.17977181621955, "grad_norm": 0.053290076553821564, "learning_rate": 0.01, "loss": 2.0576, "step": 21207 }, { "epoch": 2.1800801726796175, "grad_norm": 0.05718007683753967, "learning_rate": 0.01, "loss": 2.0472, "step": 21210 }, { "epoch": 2.1803885291396856, "grad_norm": 0.0895228236913681, "learning_rate": 0.01, "loss": 2.0673, "step": 21213 }, { "epoch": 2.1806968855997533, "grad_norm": 0.05205732583999634, "learning_rate": 0.01, "loss": 2.0366, "step": 21216 }, { "epoch": 2.181005242059821, "grad_norm": 0.05785641819238663, "learning_rate": 0.01, "loss": 2.0557, "step": 21219 }, { "epoch": 2.181313598519889, "grad_norm": 0.0756557285785675, "learning_rate": 0.01, "loss": 2.0431, "step": 21222 }, { "epoch": 2.1816219549799567, "grad_norm": 0.07016823440790176, "learning_rate": 0.01, "loss": 2.0508, "step": 21225 }, { "epoch": 2.181930311440025, "grad_norm": 0.06859459728002548, "learning_rate": 0.01, "loss": 2.0418, "step": 21228 }, { "epoch": 2.1822386679000925, "grad_norm": 0.14140251278877258, "learning_rate": 0.01, "loss": 2.0473, "step": 21231 }, { "epoch": 2.1825470243601606, "grad_norm": 0.0544712096452713, "learning_rate": 0.01, "loss": 2.04, "step": 21234 }, { "epoch": 2.1828553808202282, "grad_norm": 0.04346593841910362, "learning_rate": 0.01, "loss": 2.0516, "step": 21237 }, { "epoch": 2.183163737280296, "grad_norm": 0.04261700063943863, "learning_rate": 0.01, "loss": 2.0431, "step": 21240 }, { "epoch": 2.183472093740364, "grad_norm": 0.04763154685497284, "learning_rate": 0.01, "loss": 2.0196, "step": 21243 }, { "epoch": 2.1837804502004317, "grad_norm": 0.06876801699399948, "learning_rate": 0.01, "loss": 2.0343, "step": 21246 }, { "epoch": 2.1840888066604993, "grad_norm": 0.07819974422454834, "learning_rate": 0.01, "loss": 2.0506, "step": 21249 }, { "epoch": 2.1843971631205674, "grad_norm": 0.06239667907357216, "learning_rate": 0.01, "loss": 2.0268, "step": 21252 }, { "epoch": 2.184705519580635, "grad_norm": 0.1095786988735199, "learning_rate": 0.01, "loss": 2.0398, "step": 21255 }, { "epoch": 2.185013876040703, "grad_norm": 0.055070340633392334, "learning_rate": 0.01, "loss": 2.0523, "step": 21258 }, { "epoch": 2.185322232500771, "grad_norm": 0.08038482069969177, "learning_rate": 0.01, "loss": 2.0222, "step": 21261 }, { "epoch": 2.185630588960839, "grad_norm": 0.06929390877485275, "learning_rate": 0.01, "loss": 2.0405, "step": 21264 }, { "epoch": 2.1859389454209066, "grad_norm": 0.054179031401872635, "learning_rate": 0.01, "loss": 2.0554, "step": 21267 }, { "epoch": 2.1862473018809743, "grad_norm": 0.06956303864717484, "learning_rate": 0.01, "loss": 2.0586, "step": 21270 }, { "epoch": 2.1865556583410424, "grad_norm": 0.14279653131961823, "learning_rate": 0.01, "loss": 2.0381, "step": 21273 }, { "epoch": 2.18686401480111, "grad_norm": 0.04268357530236244, "learning_rate": 0.01, "loss": 2.0404, "step": 21276 }, { "epoch": 2.1871723712611777, "grad_norm": 0.05189354717731476, "learning_rate": 0.01, "loss": 2.0405, "step": 21279 }, { "epoch": 2.187480727721246, "grad_norm": 0.05047158896923065, "learning_rate": 0.01, "loss": 2.0495, "step": 21282 }, { "epoch": 2.1877890841813135, "grad_norm": 0.041077565401792526, "learning_rate": 0.01, "loss": 2.0558, "step": 21285 }, { "epoch": 2.1880974406413816, "grad_norm": 0.03880000859498978, "learning_rate": 0.01, "loss": 2.0472, "step": 21288 }, { "epoch": 2.1884057971014492, "grad_norm": 0.1096898689866066, "learning_rate": 0.01, "loss": 2.0537, "step": 21291 }, { "epoch": 2.1887141535615173, "grad_norm": 0.04502374678850174, "learning_rate": 0.01, "loss": 2.0397, "step": 21294 }, { "epoch": 2.189022510021585, "grad_norm": 0.037158042192459106, "learning_rate": 0.01, "loss": 2.0117, "step": 21297 }, { "epoch": 2.1893308664816526, "grad_norm": 0.03381425887346268, "learning_rate": 0.01, "loss": 2.0297, "step": 21300 }, { "epoch": 2.1896392229417208, "grad_norm": 0.05572035536170006, "learning_rate": 0.01, "loss": 2.0575, "step": 21303 }, { "epoch": 2.1899475794017884, "grad_norm": 0.06287326663732529, "learning_rate": 0.01, "loss": 2.0341, "step": 21306 }, { "epoch": 2.190255935861856, "grad_norm": 0.07691732794046402, "learning_rate": 0.01, "loss": 2.0317, "step": 21309 }, { "epoch": 2.190564292321924, "grad_norm": 0.058651309460401535, "learning_rate": 0.01, "loss": 2.04, "step": 21312 }, { "epoch": 2.190872648781992, "grad_norm": 0.033279962837696075, "learning_rate": 0.01, "loss": 2.0325, "step": 21315 }, { "epoch": 2.19118100524206, "grad_norm": 0.08742087334394455, "learning_rate": 0.01, "loss": 2.0303, "step": 21318 }, { "epoch": 2.1914893617021276, "grad_norm": 0.0864923968911171, "learning_rate": 0.01, "loss": 2.0492, "step": 21321 }, { "epoch": 2.1917977181621957, "grad_norm": 0.10759606957435608, "learning_rate": 0.01, "loss": 2.0582, "step": 21324 }, { "epoch": 2.1921060746222634, "grad_norm": 0.058335281908512115, "learning_rate": 0.01, "loss": 2.019, "step": 21327 }, { "epoch": 2.192414431082331, "grad_norm": 0.04506481811404228, "learning_rate": 0.01, "loss": 2.027, "step": 21330 }, { "epoch": 2.192722787542399, "grad_norm": 0.0454195998609066, "learning_rate": 0.01, "loss": 2.0297, "step": 21333 }, { "epoch": 2.193031144002467, "grad_norm": 0.051547158509492874, "learning_rate": 0.01, "loss": 1.9814, "step": 21336 }, { "epoch": 2.193339500462535, "grad_norm": 0.09826447069644928, "learning_rate": 0.01, "loss": 2.0518, "step": 21339 }, { "epoch": 2.1936478569226026, "grad_norm": 0.05799272283911705, "learning_rate": 0.01, "loss": 2.0649, "step": 21342 }, { "epoch": 2.19395621338267, "grad_norm": 0.12493482232093811, "learning_rate": 0.01, "loss": 2.0257, "step": 21345 }, { "epoch": 2.1942645698427383, "grad_norm": 0.04930184409022331, "learning_rate": 0.01, "loss": 2.0228, "step": 21348 }, { "epoch": 2.194572926302806, "grad_norm": 0.04257272928953171, "learning_rate": 0.01, "loss": 2.0579, "step": 21351 }, { "epoch": 2.194881282762874, "grad_norm": 0.04625258222222328, "learning_rate": 0.01, "loss": 2.0244, "step": 21354 }, { "epoch": 2.1951896392229417, "grad_norm": 0.04295830428600311, "learning_rate": 0.01, "loss": 2.032, "step": 21357 }, { "epoch": 2.1954979956830094, "grad_norm": 0.21663565933704376, "learning_rate": 0.01, "loss": 2.0489, "step": 21360 }, { "epoch": 2.1958063521430775, "grad_norm": 0.15513652563095093, "learning_rate": 0.01, "loss": 2.0473, "step": 21363 }, { "epoch": 2.196114708603145, "grad_norm": 0.08245841413736343, "learning_rate": 0.01, "loss": 2.0402, "step": 21366 }, { "epoch": 2.1964230650632133, "grad_norm": 0.03768035024404526, "learning_rate": 0.01, "loss": 2.0444, "step": 21369 }, { "epoch": 2.196731421523281, "grad_norm": 0.0586925707757473, "learning_rate": 0.01, "loss": 2.0299, "step": 21372 }, { "epoch": 2.1970397779833486, "grad_norm": 0.045760899782180786, "learning_rate": 0.01, "loss": 2.0484, "step": 21375 }, { "epoch": 2.1973481344434167, "grad_norm": 0.04357283189892769, "learning_rate": 0.01, "loss": 2.0231, "step": 21378 }, { "epoch": 2.1976564909034844, "grad_norm": 0.04477246478199959, "learning_rate": 0.01, "loss": 2.0211, "step": 21381 }, { "epoch": 2.1979648473635525, "grad_norm": 0.06785521656274796, "learning_rate": 0.01, "loss": 2.0532, "step": 21384 }, { "epoch": 2.19827320382362, "grad_norm": 0.04677508771419525, "learning_rate": 0.01, "loss": 2.0222, "step": 21387 }, { "epoch": 2.198581560283688, "grad_norm": 0.049355942755937576, "learning_rate": 0.01, "loss": 2.0488, "step": 21390 }, { "epoch": 2.198889916743756, "grad_norm": 0.11005277186632156, "learning_rate": 0.01, "loss": 2.0198, "step": 21393 }, { "epoch": 2.1991982732038236, "grad_norm": 0.10170245915651321, "learning_rate": 0.01, "loss": 2.0021, "step": 21396 }, { "epoch": 2.1995066296638917, "grad_norm": 0.045158207416534424, "learning_rate": 0.01, "loss": 2.0462, "step": 21399 }, { "epoch": 2.1998149861239593, "grad_norm": 0.0780436098575592, "learning_rate": 0.01, "loss": 2.0292, "step": 21402 }, { "epoch": 2.200123342584027, "grad_norm": 0.06062453240156174, "learning_rate": 0.01, "loss": 2.0437, "step": 21405 }, { "epoch": 2.200431699044095, "grad_norm": 0.09208519756793976, "learning_rate": 0.01, "loss": 2.0378, "step": 21408 }, { "epoch": 2.2007400555041627, "grad_norm": 0.05279922112822533, "learning_rate": 0.01, "loss": 2.0521, "step": 21411 }, { "epoch": 2.201048411964231, "grad_norm": 0.0831415057182312, "learning_rate": 0.01, "loss": 2.0394, "step": 21414 }, { "epoch": 2.2013567684242985, "grad_norm": 0.06481669098138809, "learning_rate": 0.01, "loss": 2.0119, "step": 21417 }, { "epoch": 2.201665124884366, "grad_norm": 0.08297551423311234, "learning_rate": 0.01, "loss": 2.0741, "step": 21420 }, { "epoch": 2.2019734813444343, "grad_norm": 0.06962350755929947, "learning_rate": 0.01, "loss": 2.0434, "step": 21423 }, { "epoch": 2.202281837804502, "grad_norm": 0.08055757731199265, "learning_rate": 0.01, "loss": 2.0334, "step": 21426 }, { "epoch": 2.20259019426457, "grad_norm": 0.09755895286798477, "learning_rate": 0.01, "loss": 2.0443, "step": 21429 }, { "epoch": 2.2028985507246377, "grad_norm": 0.04400679096579552, "learning_rate": 0.01, "loss": 2.0194, "step": 21432 }, { "epoch": 2.2032069071847054, "grad_norm": 0.04204344376921654, "learning_rate": 0.01, "loss": 2.063, "step": 21435 }, { "epoch": 2.2035152636447735, "grad_norm": 0.029974184930324554, "learning_rate": 0.01, "loss": 2.0211, "step": 21438 }, { "epoch": 2.203823620104841, "grad_norm": 0.07232589274644852, "learning_rate": 0.01, "loss": 2.0254, "step": 21441 }, { "epoch": 2.2041319765649092, "grad_norm": 0.06404844671487808, "learning_rate": 0.01, "loss": 2.0596, "step": 21444 }, { "epoch": 2.204440333024977, "grad_norm": 0.08751700818538666, "learning_rate": 0.01, "loss": 2.0576, "step": 21447 }, { "epoch": 2.2047486894850445, "grad_norm": 0.0371503084897995, "learning_rate": 0.01, "loss": 2.0424, "step": 21450 }, { "epoch": 2.2050570459451126, "grad_norm": 0.06034844368696213, "learning_rate": 0.01, "loss": 2.0306, "step": 21453 }, { "epoch": 2.2053654024051803, "grad_norm": 0.04261939972639084, "learning_rate": 0.01, "loss": 2.0323, "step": 21456 }, { "epoch": 2.2056737588652484, "grad_norm": 0.0612785667181015, "learning_rate": 0.01, "loss": 2.0349, "step": 21459 }, { "epoch": 2.205982115325316, "grad_norm": 0.05828654393553734, "learning_rate": 0.01, "loss": 2.0231, "step": 21462 }, { "epoch": 2.2062904717853837, "grad_norm": 0.06474754214286804, "learning_rate": 0.01, "loss": 2.0585, "step": 21465 }, { "epoch": 2.206598828245452, "grad_norm": 0.04646962508559227, "learning_rate": 0.01, "loss": 2.0337, "step": 21468 }, { "epoch": 2.2069071847055195, "grad_norm": 0.07051596790552139, "learning_rate": 0.01, "loss": 2.0218, "step": 21471 }, { "epoch": 2.2072155411655876, "grad_norm": 0.05658755078911781, "learning_rate": 0.01, "loss": 2.0347, "step": 21474 }, { "epoch": 2.2075238976256553, "grad_norm": 0.039348311722278595, "learning_rate": 0.01, "loss": 2.0587, "step": 21477 }, { "epoch": 2.207832254085723, "grad_norm": 0.030550241470336914, "learning_rate": 0.01, "loss": 2.0412, "step": 21480 }, { "epoch": 2.208140610545791, "grad_norm": 0.11341346800327301, "learning_rate": 0.01, "loss": 2.0416, "step": 21483 }, { "epoch": 2.2084489670058587, "grad_norm": 0.07061111927032471, "learning_rate": 0.01, "loss": 2.0117, "step": 21486 }, { "epoch": 2.208757323465927, "grad_norm": 0.10256624966859818, "learning_rate": 0.01, "loss": 2.0482, "step": 21489 }, { "epoch": 2.2090656799259945, "grad_norm": 0.06658724695444107, "learning_rate": 0.01, "loss": 2.0544, "step": 21492 }, { "epoch": 2.209374036386062, "grad_norm": 0.12220150977373123, "learning_rate": 0.01, "loss": 2.0399, "step": 21495 }, { "epoch": 2.20968239284613, "grad_norm": 0.05570116639137268, "learning_rate": 0.01, "loss": 2.0238, "step": 21498 }, { "epoch": 2.209990749306198, "grad_norm": 0.04273837059736252, "learning_rate": 0.01, "loss": 2.032, "step": 21501 }, { "epoch": 2.210299105766266, "grad_norm": 0.04138748720288277, "learning_rate": 0.01, "loss": 2.0497, "step": 21504 }, { "epoch": 2.2106074622263336, "grad_norm": 0.08082164078950882, "learning_rate": 0.01, "loss": 2.025, "step": 21507 }, { "epoch": 2.2109158186864013, "grad_norm": 0.06663049012422562, "learning_rate": 0.01, "loss": 2.0298, "step": 21510 }, { "epoch": 2.2112241751464694, "grad_norm": 0.0444667711853981, "learning_rate": 0.01, "loss": 2.0469, "step": 21513 }, { "epoch": 2.211532531606537, "grad_norm": 0.04407314583659172, "learning_rate": 0.01, "loss": 2.0667, "step": 21516 }, { "epoch": 2.211840888066605, "grad_norm": 0.03877383843064308, "learning_rate": 0.01, "loss": 2.0329, "step": 21519 }, { "epoch": 2.212149244526673, "grad_norm": 0.059297189116477966, "learning_rate": 0.01, "loss": 2.0441, "step": 21522 }, { "epoch": 2.2124576009867405, "grad_norm": 0.06609878689050674, "learning_rate": 0.01, "loss": 2.0263, "step": 21525 }, { "epoch": 2.2127659574468086, "grad_norm": 0.06935823708772659, "learning_rate": 0.01, "loss": 2.0295, "step": 21528 }, { "epoch": 2.2130743139068763, "grad_norm": 0.07610715180635452, "learning_rate": 0.01, "loss": 2.0424, "step": 21531 }, { "epoch": 2.2133826703669444, "grad_norm": 0.10587569326162338, "learning_rate": 0.01, "loss": 2.0636, "step": 21534 }, { "epoch": 2.213691026827012, "grad_norm": 0.05116620659828186, "learning_rate": 0.01, "loss": 2.0232, "step": 21537 }, { "epoch": 2.2139993832870797, "grad_norm": 0.03773776814341545, "learning_rate": 0.01, "loss": 2.0438, "step": 21540 }, { "epoch": 2.214307739747148, "grad_norm": 0.05412130430340767, "learning_rate": 0.01, "loss": 2.0522, "step": 21543 }, { "epoch": 2.2146160962072154, "grad_norm": 0.03664164990186691, "learning_rate": 0.01, "loss": 2.0535, "step": 21546 }, { "epoch": 2.2149244526672835, "grad_norm": 0.04415920004248619, "learning_rate": 0.01, "loss": 2.0113, "step": 21549 }, { "epoch": 2.215232809127351, "grad_norm": 0.05737615004181862, "learning_rate": 0.01, "loss": 2.0397, "step": 21552 }, { "epoch": 2.215541165587419, "grad_norm": 0.032385457307100296, "learning_rate": 0.01, "loss": 2.0325, "step": 21555 }, { "epoch": 2.215849522047487, "grad_norm": 0.0982925221323967, "learning_rate": 0.01, "loss": 2.0653, "step": 21558 }, { "epoch": 2.2161578785075546, "grad_norm": 0.03911735862493515, "learning_rate": 0.01, "loss": 2.0346, "step": 21561 }, { "epoch": 2.2164662349676227, "grad_norm": 0.07814744859933853, "learning_rate": 0.01, "loss": 2.0445, "step": 21564 }, { "epoch": 2.2167745914276904, "grad_norm": 0.05368256941437721, "learning_rate": 0.01, "loss": 2.0689, "step": 21567 }, { "epoch": 2.217082947887758, "grad_norm": 0.046178530901670456, "learning_rate": 0.01, "loss": 2.0402, "step": 21570 }, { "epoch": 2.217391304347826, "grad_norm": 0.047109801322221756, "learning_rate": 0.01, "loss": 2.0721, "step": 21573 }, { "epoch": 2.217699660807894, "grad_norm": 0.05443650484085083, "learning_rate": 0.01, "loss": 2.0416, "step": 21576 }, { "epoch": 2.218008017267962, "grad_norm": 0.13156400620937347, "learning_rate": 0.01, "loss": 2.0581, "step": 21579 }, { "epoch": 2.2183163737280296, "grad_norm": 0.04178638756275177, "learning_rate": 0.01, "loss": 2.0517, "step": 21582 }, { "epoch": 2.2186247301880972, "grad_norm": 0.042627740651369095, "learning_rate": 0.01, "loss": 2.0541, "step": 21585 }, { "epoch": 2.2189330866481654, "grad_norm": 0.05318658426403999, "learning_rate": 0.01, "loss": 2.0361, "step": 21588 }, { "epoch": 2.219241443108233, "grad_norm": 0.061288055032491684, "learning_rate": 0.01, "loss": 2.0425, "step": 21591 }, { "epoch": 2.219549799568301, "grad_norm": 0.06663260608911514, "learning_rate": 0.01, "loss": 2.0549, "step": 21594 }, { "epoch": 2.219858156028369, "grad_norm": 0.04567466303706169, "learning_rate": 0.01, "loss": 2.03, "step": 21597 }, { "epoch": 2.2201665124884364, "grad_norm": 0.12566886842250824, "learning_rate": 0.01, "loss": 2.0527, "step": 21600 }, { "epoch": 2.2204748689485045, "grad_norm": 0.03933155536651611, "learning_rate": 0.01, "loss": 2.022, "step": 21603 }, { "epoch": 2.220783225408572, "grad_norm": 0.04617391526699066, "learning_rate": 0.01, "loss": 2.0109, "step": 21606 }, { "epoch": 2.2210915818686403, "grad_norm": 0.05472411960363388, "learning_rate": 0.01, "loss": 2.0412, "step": 21609 }, { "epoch": 2.221399938328708, "grad_norm": 0.05556654930114746, "learning_rate": 0.01, "loss": 2.0214, "step": 21612 }, { "epoch": 2.2217082947887756, "grad_norm": 0.05096900090575218, "learning_rate": 0.01, "loss": 2.0524, "step": 21615 }, { "epoch": 2.2220166512488437, "grad_norm": 0.039425577968358994, "learning_rate": 0.01, "loss": 2.0368, "step": 21618 }, { "epoch": 2.2223250077089114, "grad_norm": 0.05080854892730713, "learning_rate": 0.01, "loss": 2.0494, "step": 21621 }, { "epoch": 2.2226333641689795, "grad_norm": 0.03824865445494652, "learning_rate": 0.01, "loss": 2.0497, "step": 21624 }, { "epoch": 2.222941720629047, "grad_norm": 0.03814932331442833, "learning_rate": 0.01, "loss": 2.0388, "step": 21627 }, { "epoch": 2.223250077089115, "grad_norm": 0.057797808200120926, "learning_rate": 0.01, "loss": 2.046, "step": 21630 }, { "epoch": 2.223558433549183, "grad_norm": 0.06596177071332932, "learning_rate": 0.01, "loss": 2.0572, "step": 21633 }, { "epoch": 2.2238667900092506, "grad_norm": 0.13438032567501068, "learning_rate": 0.01, "loss": 2.04, "step": 21636 }, { "epoch": 2.2241751464693187, "grad_norm": 0.06200256571173668, "learning_rate": 0.01, "loss": 2.0563, "step": 21639 }, { "epoch": 2.2244835029293863, "grad_norm": 0.06471807509660721, "learning_rate": 0.01, "loss": 2.0264, "step": 21642 }, { "epoch": 2.224791859389454, "grad_norm": 0.06439206004142761, "learning_rate": 0.01, "loss": 2.0425, "step": 21645 }, { "epoch": 2.225100215849522, "grad_norm": 0.0768360123038292, "learning_rate": 0.01, "loss": 2.0494, "step": 21648 }, { "epoch": 2.2254085723095898, "grad_norm": 0.10393831878900528, "learning_rate": 0.01, "loss": 2.0455, "step": 21651 }, { "epoch": 2.225716928769658, "grad_norm": 0.03999519720673561, "learning_rate": 0.01, "loss": 2.0168, "step": 21654 }, { "epoch": 2.2260252852297255, "grad_norm": 0.04620358720421791, "learning_rate": 0.01, "loss": 2.0576, "step": 21657 }, { "epoch": 2.226333641689793, "grad_norm": 0.05364964157342911, "learning_rate": 0.01, "loss": 2.0409, "step": 21660 }, { "epoch": 2.2266419981498613, "grad_norm": 0.049792349338531494, "learning_rate": 0.01, "loss": 2.0315, "step": 21663 }, { "epoch": 2.226950354609929, "grad_norm": 0.0651509091258049, "learning_rate": 0.01, "loss": 2.0176, "step": 21666 }, { "epoch": 2.227258711069997, "grad_norm": 0.035688720643520355, "learning_rate": 0.01, "loss": 2.0509, "step": 21669 }, { "epoch": 2.2275670675300647, "grad_norm": 0.06412792950868607, "learning_rate": 0.01, "loss": 2.0007, "step": 21672 }, { "epoch": 2.2278754239901324, "grad_norm": 0.08088821917772293, "learning_rate": 0.01, "loss": 2.0421, "step": 21675 }, { "epoch": 2.2281837804502005, "grad_norm": 0.060282256454229355, "learning_rate": 0.01, "loss": 2.0361, "step": 21678 }, { "epoch": 2.228492136910268, "grad_norm": 0.09816791117191315, "learning_rate": 0.01, "loss": 2.0317, "step": 21681 }, { "epoch": 2.2288004933703363, "grad_norm": 0.06672241538763046, "learning_rate": 0.01, "loss": 2.0559, "step": 21684 }, { "epoch": 2.229108849830404, "grad_norm": 0.06586040556430817, "learning_rate": 0.01, "loss": 2.0375, "step": 21687 }, { "epoch": 2.2294172062904716, "grad_norm": 0.0655137374997139, "learning_rate": 0.01, "loss": 2.0189, "step": 21690 }, { "epoch": 2.2297255627505397, "grad_norm": 0.08448750525712967, "learning_rate": 0.01, "loss": 2.0292, "step": 21693 }, { "epoch": 2.2300339192106073, "grad_norm": 0.05828822776675224, "learning_rate": 0.01, "loss": 2.043, "step": 21696 }, { "epoch": 2.2303422756706754, "grad_norm": 0.0816628485918045, "learning_rate": 0.01, "loss": 2.0362, "step": 21699 }, { "epoch": 2.230650632130743, "grad_norm": 0.038368817418813705, "learning_rate": 0.01, "loss": 2.029, "step": 21702 }, { "epoch": 2.230958988590811, "grad_norm": 0.08584286272525787, "learning_rate": 0.01, "loss": 2.0183, "step": 21705 }, { "epoch": 2.231267345050879, "grad_norm": 0.08528412878513336, "learning_rate": 0.01, "loss": 2.0185, "step": 21708 }, { "epoch": 2.2315757015109465, "grad_norm": 0.07158780843019485, "learning_rate": 0.01, "loss": 2.0437, "step": 21711 }, { "epoch": 2.2318840579710146, "grad_norm": 0.07810889184474945, "learning_rate": 0.01, "loss": 2.0374, "step": 21714 }, { "epoch": 2.2321924144310823, "grad_norm": 0.07769618928432465, "learning_rate": 0.01, "loss": 2.038, "step": 21717 }, { "epoch": 2.23250077089115, "grad_norm": 0.04515406861901283, "learning_rate": 0.01, "loss": 2.0307, "step": 21720 }, { "epoch": 2.232809127351218, "grad_norm": 0.11805865168571472, "learning_rate": 0.01, "loss": 2.0203, "step": 21723 }, { "epoch": 2.2331174838112857, "grad_norm": 0.11899860948324203, "learning_rate": 0.01, "loss": 2.0184, "step": 21726 }, { "epoch": 2.233425840271354, "grad_norm": 0.03309144452214241, "learning_rate": 0.01, "loss": 2.0307, "step": 21729 }, { "epoch": 2.2337341967314215, "grad_norm": 0.03389447182416916, "learning_rate": 0.01, "loss": 2.0257, "step": 21732 }, { "epoch": 2.2340425531914896, "grad_norm": 0.03472166880965233, "learning_rate": 0.01, "loss": 2.0386, "step": 21735 }, { "epoch": 2.2343509096515572, "grad_norm": 0.11815425753593445, "learning_rate": 0.01, "loss": 2.0302, "step": 21738 }, { "epoch": 2.234659266111625, "grad_norm": 0.08007802814245224, "learning_rate": 0.01, "loss": 2.0047, "step": 21741 }, { "epoch": 2.234967622571693, "grad_norm": 0.05053863301873207, "learning_rate": 0.01, "loss": 2.0119, "step": 21744 }, { "epoch": 2.2352759790317607, "grad_norm": 0.12116878479719162, "learning_rate": 0.01, "loss": 2.0189, "step": 21747 }, { "epoch": 2.2355843354918283, "grad_norm": 0.06733332574367523, "learning_rate": 0.01, "loss": 2.0441, "step": 21750 }, { "epoch": 2.2358926919518964, "grad_norm": 0.05141659080982208, "learning_rate": 0.01, "loss": 2.0315, "step": 21753 }, { "epoch": 2.236201048411964, "grad_norm": 0.03695325180888176, "learning_rate": 0.01, "loss": 2.0249, "step": 21756 }, { "epoch": 2.236509404872032, "grad_norm": 0.033388737589120865, "learning_rate": 0.01, "loss": 2.0238, "step": 21759 }, { "epoch": 2.2368177613321, "grad_norm": 0.03913639858365059, "learning_rate": 0.01, "loss": 2.0286, "step": 21762 }, { "epoch": 2.237126117792168, "grad_norm": 0.048513270914554596, "learning_rate": 0.01, "loss": 2.0277, "step": 21765 }, { "epoch": 2.2374344742522356, "grad_norm": 0.061340250074863434, "learning_rate": 0.01, "loss": 2.0295, "step": 21768 }, { "epoch": 2.2377428307123033, "grad_norm": 0.042784787714481354, "learning_rate": 0.01, "loss": 2.0306, "step": 21771 }, { "epoch": 2.2380511871723714, "grad_norm": 0.04124082252383232, "learning_rate": 0.01, "loss": 2.0478, "step": 21774 }, { "epoch": 2.238359543632439, "grad_norm": 0.03749304264783859, "learning_rate": 0.01, "loss": 2.0067, "step": 21777 }, { "epoch": 2.2386679000925067, "grad_norm": 0.12073452770709991, "learning_rate": 0.01, "loss": 2.0382, "step": 21780 }, { "epoch": 2.238976256552575, "grad_norm": 0.06613750755786896, "learning_rate": 0.01, "loss": 2.066, "step": 21783 }, { "epoch": 2.2392846130126425, "grad_norm": 0.11562148481607437, "learning_rate": 0.01, "loss": 2.0542, "step": 21786 }, { "epoch": 2.2395929694727106, "grad_norm": 0.06355132162570953, "learning_rate": 0.01, "loss": 2.0439, "step": 21789 }, { "epoch": 2.2399013259327782, "grad_norm": 0.09228016436100006, "learning_rate": 0.01, "loss": 2.066, "step": 21792 }, { "epoch": 2.2402096823928463, "grad_norm": 0.07648111879825592, "learning_rate": 0.01, "loss": 2.0334, "step": 21795 }, { "epoch": 2.240518038852914, "grad_norm": 0.0650177150964737, "learning_rate": 0.01, "loss": 2.0415, "step": 21798 }, { "epoch": 2.2408263953129817, "grad_norm": 0.09675853699445724, "learning_rate": 0.01, "loss": 2.0483, "step": 21801 }, { "epoch": 2.2411347517730498, "grad_norm": 0.045449864119291306, "learning_rate": 0.01, "loss": 2.0415, "step": 21804 }, { "epoch": 2.2414431082331174, "grad_norm": 0.0982547327876091, "learning_rate": 0.01, "loss": 2.0369, "step": 21807 }, { "epoch": 2.241751464693185, "grad_norm": 0.060201246291399, "learning_rate": 0.01, "loss": 2.0366, "step": 21810 }, { "epoch": 2.242059821153253, "grad_norm": 0.04503628611564636, "learning_rate": 0.01, "loss": 2.0411, "step": 21813 }, { "epoch": 2.242368177613321, "grad_norm": 0.04854295030236244, "learning_rate": 0.01, "loss": 2.0485, "step": 21816 }, { "epoch": 2.242676534073389, "grad_norm": 0.12228553742170334, "learning_rate": 0.01, "loss": 2.0204, "step": 21819 }, { "epoch": 2.2429848905334566, "grad_norm": 0.10491406172513962, "learning_rate": 0.01, "loss": 2.0546, "step": 21822 }, { "epoch": 2.2432932469935247, "grad_norm": 0.1188177838921547, "learning_rate": 0.01, "loss": 2.0324, "step": 21825 }, { "epoch": 2.2436016034535924, "grad_norm": 0.10877541452646255, "learning_rate": 0.01, "loss": 2.0433, "step": 21828 }, { "epoch": 2.24390995991366, "grad_norm": 0.03375115245580673, "learning_rate": 0.01, "loss": 2.0475, "step": 21831 }, { "epoch": 2.244218316373728, "grad_norm": 0.07588639855384827, "learning_rate": 0.01, "loss": 2.0192, "step": 21834 }, { "epoch": 2.244526672833796, "grad_norm": 0.04841979220509529, "learning_rate": 0.01, "loss": 2.0324, "step": 21837 }, { "epoch": 2.2448350292938635, "grad_norm": 0.041367027908563614, "learning_rate": 0.01, "loss": 2.0436, "step": 21840 }, { "epoch": 2.2451433857539316, "grad_norm": 0.044548399746418, "learning_rate": 0.01, "loss": 2.0349, "step": 21843 }, { "epoch": 2.2454517422139992, "grad_norm": 0.049429114907979965, "learning_rate": 0.01, "loss": 2.007, "step": 21846 }, { "epoch": 2.2457600986740673, "grad_norm": 0.07232322543859482, "learning_rate": 0.01, "loss": 2.028, "step": 21849 }, { "epoch": 2.246068455134135, "grad_norm": 0.05866502597928047, "learning_rate": 0.01, "loss": 2.0255, "step": 21852 }, { "epoch": 2.246376811594203, "grad_norm": 0.08406642079353333, "learning_rate": 0.01, "loss": 2.045, "step": 21855 }, { "epoch": 2.2466851680542708, "grad_norm": 0.08692225068807602, "learning_rate": 0.01, "loss": 2.0074, "step": 21858 }, { "epoch": 2.2469935245143384, "grad_norm": 0.09182562679052353, "learning_rate": 0.01, "loss": 2.0245, "step": 21861 }, { "epoch": 2.2473018809744065, "grad_norm": 0.07580506801605225, "learning_rate": 0.01, "loss": 2.0281, "step": 21864 }, { "epoch": 2.247610237434474, "grad_norm": 0.09604424238204956, "learning_rate": 0.01, "loss": 2.054, "step": 21867 }, { "epoch": 2.2479185938945423, "grad_norm": 0.05383382737636566, "learning_rate": 0.01, "loss": 2.0316, "step": 21870 }, { "epoch": 2.24822695035461, "grad_norm": 0.035557687282562256, "learning_rate": 0.01, "loss": 2.0246, "step": 21873 }, { "epoch": 2.2485353068146776, "grad_norm": 0.0330585315823555, "learning_rate": 0.01, "loss": 2.0335, "step": 21876 }, { "epoch": 2.2488436632747457, "grad_norm": 0.07125352323055267, "learning_rate": 0.01, "loss": 2.0058, "step": 21879 }, { "epoch": 2.2491520197348134, "grad_norm": 0.07722420245409012, "learning_rate": 0.01, "loss": 2.0428, "step": 21882 }, { "epoch": 2.2494603761948815, "grad_norm": 0.05086112394928932, "learning_rate": 0.01, "loss": 2.0444, "step": 21885 }, { "epoch": 2.249768732654949, "grad_norm": 0.055818263441324234, "learning_rate": 0.01, "loss": 2.0381, "step": 21888 }, { "epoch": 2.250077089115017, "grad_norm": 0.06524482369422913, "learning_rate": 0.01, "loss": 2.0543, "step": 21891 }, { "epoch": 2.250385445575085, "grad_norm": 0.051445212215185165, "learning_rate": 0.01, "loss": 2.0594, "step": 21894 }, { "epoch": 2.2506938020351526, "grad_norm": 0.057153645902872086, "learning_rate": 0.01, "loss": 2.0113, "step": 21897 }, { "epoch": 2.2510021584952202, "grad_norm": 0.08242445439100266, "learning_rate": 0.01, "loss": 2.0465, "step": 21900 }, { "epoch": 2.2513105149552883, "grad_norm": 0.06253752112388611, "learning_rate": 0.01, "loss": 2.0405, "step": 21903 }, { "epoch": 2.251618871415356, "grad_norm": 0.05388238653540611, "learning_rate": 0.01, "loss": 2.0548, "step": 21906 }, { "epoch": 2.251927227875424, "grad_norm": 0.05570909380912781, "learning_rate": 0.01, "loss": 2.0474, "step": 21909 }, { "epoch": 2.2522355843354918, "grad_norm": 0.038132745772600174, "learning_rate": 0.01, "loss": 2.0473, "step": 21912 }, { "epoch": 2.25254394079556, "grad_norm": 0.07434429973363876, "learning_rate": 0.01, "loss": 2.0306, "step": 21915 }, { "epoch": 2.2528522972556275, "grad_norm": 0.05396199971437454, "learning_rate": 0.01, "loss": 2.0239, "step": 21918 }, { "epoch": 2.253160653715695, "grad_norm": 0.06541625410318375, "learning_rate": 0.01, "loss": 2.0513, "step": 21921 }, { "epoch": 2.2534690101757633, "grad_norm": 0.07078956067562103, "learning_rate": 0.01, "loss": 2.0333, "step": 21924 }, { "epoch": 2.253777366635831, "grad_norm": 0.06990516930818558, "learning_rate": 0.01, "loss": 2.0078, "step": 21927 }, { "epoch": 2.254085723095899, "grad_norm": 0.049635425209999084, "learning_rate": 0.01, "loss": 2.0283, "step": 21930 }, { "epoch": 2.2543940795559667, "grad_norm": 0.04051049426198006, "learning_rate": 0.01, "loss": 2.0293, "step": 21933 }, { "epoch": 2.2547024360160344, "grad_norm": 0.11082252860069275, "learning_rate": 0.01, "loss": 2.0278, "step": 21936 }, { "epoch": 2.2550107924761025, "grad_norm": 0.0768849179148674, "learning_rate": 0.01, "loss": 2.0089, "step": 21939 }, { "epoch": 2.25531914893617, "grad_norm": 0.0420587994158268, "learning_rate": 0.01, "loss": 2.0188, "step": 21942 }, { "epoch": 2.2556275053962382, "grad_norm": 0.03924650698900223, "learning_rate": 0.01, "loss": 2.035, "step": 21945 }, { "epoch": 2.255935861856306, "grad_norm": 0.058580152690410614, "learning_rate": 0.01, "loss": 2.0358, "step": 21948 }, { "epoch": 2.2562442183163736, "grad_norm": 0.050332482904195786, "learning_rate": 0.01, "loss": 2.0261, "step": 21951 }, { "epoch": 2.2565525747764417, "grad_norm": 0.0472397580742836, "learning_rate": 0.01, "loss": 2.0196, "step": 21954 }, { "epoch": 2.2568609312365093, "grad_norm": 0.0500427670776844, "learning_rate": 0.01, "loss": 2.033, "step": 21957 }, { "epoch": 2.2571692876965774, "grad_norm": 0.08899175375699997, "learning_rate": 0.01, "loss": 2.0503, "step": 21960 }, { "epoch": 2.257477644156645, "grad_norm": 0.06647983938455582, "learning_rate": 0.01, "loss": 2.0087, "step": 21963 }, { "epoch": 2.2577860006167128, "grad_norm": 0.12703396379947662, "learning_rate": 0.01, "loss": 2.0532, "step": 21966 }, { "epoch": 2.258094357076781, "grad_norm": 0.06023648753762245, "learning_rate": 0.01, "loss": 2.0346, "step": 21969 }, { "epoch": 2.2584027135368485, "grad_norm": 0.05025608092546463, "learning_rate": 0.01, "loss": 2.0431, "step": 21972 }, { "epoch": 2.2587110699969166, "grad_norm": 0.043917398899793625, "learning_rate": 0.01, "loss": 2.0184, "step": 21975 }, { "epoch": 2.2590194264569843, "grad_norm": 0.03574421629309654, "learning_rate": 0.01, "loss": 2.021, "step": 21978 }, { "epoch": 2.259327782917052, "grad_norm": 0.03546285256743431, "learning_rate": 0.01, "loss": 2.0116, "step": 21981 }, { "epoch": 2.25963613937712, "grad_norm": 0.039241861552000046, "learning_rate": 0.01, "loss": 2.0314, "step": 21984 }, { "epoch": 2.2599444958371877, "grad_norm": 0.09643664956092834, "learning_rate": 0.01, "loss": 2.0378, "step": 21987 }, { "epoch": 2.260252852297256, "grad_norm": 0.08918357640504837, "learning_rate": 0.01, "loss": 2.0377, "step": 21990 }, { "epoch": 2.2605612087573235, "grad_norm": 0.05018826946616173, "learning_rate": 0.01, "loss": 2.0183, "step": 21993 }, { "epoch": 2.260869565217391, "grad_norm": 0.05288619175553322, "learning_rate": 0.01, "loss": 2.0333, "step": 21996 }, { "epoch": 2.2611779216774592, "grad_norm": 0.053800489753484726, "learning_rate": 0.01, "loss": 2.0359, "step": 21999 }, { "epoch": 2.261486278137527, "grad_norm": 0.11081501096487045, "learning_rate": 0.01, "loss": 2.0517, "step": 22002 }, { "epoch": 2.261794634597595, "grad_norm": 0.045440319925546646, "learning_rate": 0.01, "loss": 2.0105, "step": 22005 }, { "epoch": 2.2621029910576627, "grad_norm": 0.07007669657468796, "learning_rate": 0.01, "loss": 2.0411, "step": 22008 }, { "epoch": 2.2624113475177303, "grad_norm": 0.04412767291069031, "learning_rate": 0.01, "loss": 2.0345, "step": 22011 }, { "epoch": 2.2627197039777984, "grad_norm": 0.034647636115550995, "learning_rate": 0.01, "loss": 2.0236, "step": 22014 }, { "epoch": 2.263028060437866, "grad_norm": 0.04766898602247238, "learning_rate": 0.01, "loss": 2.0214, "step": 22017 }, { "epoch": 2.263336416897934, "grad_norm": 0.0641711950302124, "learning_rate": 0.01, "loss": 2.0195, "step": 22020 }, { "epoch": 2.263644773358002, "grad_norm": 0.05316673591732979, "learning_rate": 0.01, "loss": 2.0137, "step": 22023 }, { "epoch": 2.2639531298180695, "grad_norm": 0.03488560765981674, "learning_rate": 0.01, "loss": 2.0221, "step": 22026 }, { "epoch": 2.2642614862781376, "grad_norm": 0.05415144935250282, "learning_rate": 0.01, "loss": 2.0551, "step": 22029 }, { "epoch": 2.2645698427382053, "grad_norm": 0.08091796189546585, "learning_rate": 0.01, "loss": 2.0339, "step": 22032 }, { "epoch": 2.2648781991982734, "grad_norm": 0.0850050300359726, "learning_rate": 0.01, "loss": 2.0213, "step": 22035 }, { "epoch": 2.265186555658341, "grad_norm": 0.13837046921253204, "learning_rate": 0.01, "loss": 2.0352, "step": 22038 }, { "epoch": 2.2654949121184087, "grad_norm": 0.05676966533064842, "learning_rate": 0.01, "loss": 2.0357, "step": 22041 }, { "epoch": 2.265803268578477, "grad_norm": 0.1171020120382309, "learning_rate": 0.01, "loss": 2.0048, "step": 22044 }, { "epoch": 2.2661116250385445, "grad_norm": 0.04077135771512985, "learning_rate": 0.01, "loss": 2.0386, "step": 22047 }, { "epoch": 2.2664199814986126, "grad_norm": 0.03570380434393883, "learning_rate": 0.01, "loss": 2.0046, "step": 22050 }, { "epoch": 2.2667283379586802, "grad_norm": 0.0496290847659111, "learning_rate": 0.01, "loss": 2.0439, "step": 22053 }, { "epoch": 2.267036694418748, "grad_norm": 0.06205829232931137, "learning_rate": 0.01, "loss": 2.0346, "step": 22056 }, { "epoch": 2.267345050878816, "grad_norm": 0.04645274206995964, "learning_rate": 0.01, "loss": 2.0237, "step": 22059 }, { "epoch": 2.2676534073388837, "grad_norm": 0.04779735952615738, "learning_rate": 0.01, "loss": 2.009, "step": 22062 }, { "epoch": 2.2679617637989518, "grad_norm": 0.05166240781545639, "learning_rate": 0.01, "loss": 2.0391, "step": 22065 }, { "epoch": 2.2682701202590194, "grad_norm": 0.14004342257976532, "learning_rate": 0.01, "loss": 2.0309, "step": 22068 }, { "epoch": 2.268578476719087, "grad_norm": 0.06366194784641266, "learning_rate": 0.01, "loss": 2.0306, "step": 22071 }, { "epoch": 2.268886833179155, "grad_norm": 0.04914015159010887, "learning_rate": 0.01, "loss": 2.026, "step": 22074 }, { "epoch": 2.269195189639223, "grad_norm": 0.07121274620294571, "learning_rate": 0.01, "loss": 2.0302, "step": 22077 }, { "epoch": 2.269503546099291, "grad_norm": 0.048453398048877716, "learning_rate": 0.01, "loss": 2.047, "step": 22080 }, { "epoch": 2.2698119025593586, "grad_norm": 0.03777848929166794, "learning_rate": 0.01, "loss": 2.0643, "step": 22083 }, { "epoch": 2.2701202590194263, "grad_norm": 0.04788368567824364, "learning_rate": 0.01, "loss": 2.0296, "step": 22086 }, { "epoch": 2.2704286154794944, "grad_norm": 0.036985646933317184, "learning_rate": 0.01, "loss": 2.0124, "step": 22089 }, { "epoch": 2.270736971939562, "grad_norm": 0.08654552698135376, "learning_rate": 0.01, "loss": 2.0268, "step": 22092 }, { "epoch": 2.27104532839963, "grad_norm": 0.12740878760814667, "learning_rate": 0.01, "loss": 2.0423, "step": 22095 }, { "epoch": 2.271353684859698, "grad_norm": 0.058829743415117264, "learning_rate": 0.01, "loss": 2.0141, "step": 22098 }, { "epoch": 2.2716620413197655, "grad_norm": 0.04173153638839722, "learning_rate": 0.01, "loss": 2.0288, "step": 22101 }, { "epoch": 2.2719703977798336, "grad_norm": 0.04551135376095772, "learning_rate": 0.01, "loss": 2.0348, "step": 22104 }, { "epoch": 2.272278754239901, "grad_norm": 0.04404577612876892, "learning_rate": 0.01, "loss": 2.0043, "step": 22107 }, { "epoch": 2.2725871106999693, "grad_norm": 0.033306755125522614, "learning_rate": 0.01, "loss": 2.0333, "step": 22110 }, { "epoch": 2.272895467160037, "grad_norm": 0.03450062498450279, "learning_rate": 0.01, "loss": 2.0188, "step": 22113 }, { "epoch": 2.273203823620105, "grad_norm": 0.04665246978402138, "learning_rate": 0.01, "loss": 2.0002, "step": 22116 }, { "epoch": 2.2735121800801728, "grad_norm": 0.04256023094058037, "learning_rate": 0.01, "loss": 1.9977, "step": 22119 }, { "epoch": 2.2738205365402404, "grad_norm": 0.04408838599920273, "learning_rate": 0.01, "loss": 2.047, "step": 22122 }, { "epoch": 2.2741288930003085, "grad_norm": 0.17131304740905762, "learning_rate": 0.01, "loss": 2.0225, "step": 22125 }, { "epoch": 2.274437249460376, "grad_norm": 0.12661625444889069, "learning_rate": 0.01, "loss": 2.0417, "step": 22128 }, { "epoch": 2.274745605920444, "grad_norm": 0.1100076287984848, "learning_rate": 0.01, "loss": 2.0526, "step": 22131 }, { "epoch": 2.275053962380512, "grad_norm": 0.06313984096050262, "learning_rate": 0.01, "loss": 2.0372, "step": 22134 }, { "epoch": 2.2753623188405796, "grad_norm": 0.04563833773136139, "learning_rate": 0.01, "loss": 2.0356, "step": 22137 }, { "epoch": 2.2756706753006477, "grad_norm": 0.034519702196121216, "learning_rate": 0.01, "loss": 2.0394, "step": 22140 }, { "epoch": 2.2759790317607154, "grad_norm": 0.033708199858665466, "learning_rate": 0.01, "loss": 2.0418, "step": 22143 }, { "epoch": 2.2762873882207835, "grad_norm": 0.036958202719688416, "learning_rate": 0.01, "loss": 2.005, "step": 22146 }, { "epoch": 2.276595744680851, "grad_norm": 0.052069418132305145, "learning_rate": 0.01, "loss": 2.0288, "step": 22149 }, { "epoch": 2.276904101140919, "grad_norm": 0.03941415995359421, "learning_rate": 0.01, "loss": 2.0391, "step": 22152 }, { "epoch": 2.277212457600987, "grad_norm": 0.041270628571510315, "learning_rate": 0.01, "loss": 2.0574, "step": 22155 }, { "epoch": 2.2775208140610546, "grad_norm": 0.05398832634091377, "learning_rate": 0.01, "loss": 2.044, "step": 22158 }, { "epoch": 2.277829170521122, "grad_norm": 0.048906486481428146, "learning_rate": 0.01, "loss": 2.046, "step": 22161 }, { "epoch": 2.2781375269811903, "grad_norm": 0.15073837339878082, "learning_rate": 0.01, "loss": 2.0425, "step": 22164 }, { "epoch": 2.278445883441258, "grad_norm": 0.06301066279411316, "learning_rate": 0.01, "loss": 2.0516, "step": 22167 }, { "epoch": 2.278754239901326, "grad_norm": 0.07525215297937393, "learning_rate": 0.01, "loss": 2.0344, "step": 22170 }, { "epoch": 2.2790625963613937, "grad_norm": 0.059356629848480225, "learning_rate": 0.01, "loss": 2.0373, "step": 22173 }, { "epoch": 2.279370952821462, "grad_norm": 0.04340675100684166, "learning_rate": 0.01, "loss": 2.0172, "step": 22176 }, { "epoch": 2.2796793092815295, "grad_norm": 0.052859678864479065, "learning_rate": 0.01, "loss": 2.0215, "step": 22179 }, { "epoch": 2.279987665741597, "grad_norm": 0.05917971953749657, "learning_rate": 0.01, "loss": 2.0286, "step": 22182 }, { "epoch": 2.2802960222016653, "grad_norm": 0.04111889749765396, "learning_rate": 0.01, "loss": 2.0502, "step": 22185 }, { "epoch": 2.280604378661733, "grad_norm": 0.06371071189641953, "learning_rate": 0.01, "loss": 2.0264, "step": 22188 }, { "epoch": 2.2809127351218006, "grad_norm": 0.039862968027591705, "learning_rate": 0.01, "loss": 2.0247, "step": 22191 }, { "epoch": 2.2812210915818687, "grad_norm": 0.0944151058793068, "learning_rate": 0.01, "loss": 2.0257, "step": 22194 }, { "epoch": 2.2815294480419364, "grad_norm": 0.041564084589481354, "learning_rate": 0.01, "loss": 2.0282, "step": 22197 }, { "epoch": 2.2818378045020045, "grad_norm": 0.1109161302447319, "learning_rate": 0.01, "loss": 1.9969, "step": 22200 }, { "epoch": 2.282146160962072, "grad_norm": 0.14463242888450623, "learning_rate": 0.01, "loss": 2.0455, "step": 22203 }, { "epoch": 2.2824545174221402, "grad_norm": 0.06254401057958603, "learning_rate": 0.01, "loss": 2.0269, "step": 22206 }, { "epoch": 2.282762873882208, "grad_norm": 0.03546387329697609, "learning_rate": 0.01, "loss": 2.0116, "step": 22209 }, { "epoch": 2.2830712303422755, "grad_norm": 0.041129205375909805, "learning_rate": 0.01, "loss": 2.0323, "step": 22212 }, { "epoch": 2.2833795868023437, "grad_norm": 0.048473335802555084, "learning_rate": 0.01, "loss": 2.03, "step": 22215 }, { "epoch": 2.2836879432624113, "grad_norm": 0.0633682832121849, "learning_rate": 0.01, "loss": 2.0277, "step": 22218 }, { "epoch": 2.283996299722479, "grad_norm": 0.06431914120912552, "learning_rate": 0.01, "loss": 2.0353, "step": 22221 }, { "epoch": 2.284304656182547, "grad_norm": 0.09050017595291138, "learning_rate": 0.01, "loss": 2.0305, "step": 22224 }, { "epoch": 2.2846130126426147, "grad_norm": 0.07363586872816086, "learning_rate": 0.01, "loss": 2.0421, "step": 22227 }, { "epoch": 2.284921369102683, "grad_norm": 0.07984726130962372, "learning_rate": 0.01, "loss": 2.0216, "step": 22230 }, { "epoch": 2.2852297255627505, "grad_norm": 0.0845586284995079, "learning_rate": 0.01, "loss": 2.0083, "step": 22233 }, { "epoch": 2.2855380820228186, "grad_norm": 0.13392622768878937, "learning_rate": 0.01, "loss": 2.0339, "step": 22236 }, { "epoch": 2.2858464384828863, "grad_norm": 0.06791893392801285, "learning_rate": 0.01, "loss": 2.0457, "step": 22239 }, { "epoch": 2.286154794942954, "grad_norm": 0.044396497309207916, "learning_rate": 0.01, "loss": 2.0408, "step": 22242 }, { "epoch": 2.286463151403022, "grad_norm": 0.04964762553572655, "learning_rate": 0.01, "loss": 2.0259, "step": 22245 }, { "epoch": 2.2867715078630897, "grad_norm": 0.06517814844846725, "learning_rate": 0.01, "loss": 2.0352, "step": 22248 }, { "epoch": 2.2870798643231574, "grad_norm": 0.043722327798604965, "learning_rate": 0.01, "loss": 2.0111, "step": 22251 }, { "epoch": 2.2873882207832255, "grad_norm": 0.06852597743272781, "learning_rate": 0.01, "loss": 2.0377, "step": 22254 }, { "epoch": 2.287696577243293, "grad_norm": 0.13437853753566742, "learning_rate": 0.01, "loss": 2.0376, "step": 22257 }, { "epoch": 2.288004933703361, "grad_norm": 0.03924357146024704, "learning_rate": 0.01, "loss": 2.0283, "step": 22260 }, { "epoch": 2.288313290163429, "grad_norm": 0.039702124893665314, "learning_rate": 0.01, "loss": 2.0293, "step": 22263 }, { "epoch": 2.288621646623497, "grad_norm": 0.05187009647488594, "learning_rate": 0.01, "loss": 1.9957, "step": 22266 }, { "epoch": 2.2889300030835646, "grad_norm": 0.0682847648859024, "learning_rate": 0.01, "loss": 2.0251, "step": 22269 }, { "epoch": 2.2892383595436323, "grad_norm": 0.046652939170598984, "learning_rate": 0.01, "loss": 2.0481, "step": 22272 }, { "epoch": 2.2895467160037004, "grad_norm": 0.04348958283662796, "learning_rate": 0.01, "loss": 2.0492, "step": 22275 }, { "epoch": 2.289855072463768, "grad_norm": 0.05141732096672058, "learning_rate": 0.01, "loss": 2.0372, "step": 22278 }, { "epoch": 2.2901634289238357, "grad_norm": 0.04899610951542854, "learning_rate": 0.01, "loss": 2.0381, "step": 22281 }, { "epoch": 2.290471785383904, "grad_norm": 0.05614173039793968, "learning_rate": 0.01, "loss": 2.0082, "step": 22284 }, { "epoch": 2.2907801418439715, "grad_norm": 0.08255594223737717, "learning_rate": 0.01, "loss": 2.0079, "step": 22287 }, { "epoch": 2.2910884983040396, "grad_norm": 0.12105574458837509, "learning_rate": 0.01, "loss": 2.0341, "step": 22290 }, { "epoch": 2.2913968547641073, "grad_norm": 0.07021155953407288, "learning_rate": 0.01, "loss": 2.0051, "step": 22293 }, { "epoch": 2.2917052112241754, "grad_norm": 0.08364082872867584, "learning_rate": 0.01, "loss": 2.0445, "step": 22296 }, { "epoch": 2.292013567684243, "grad_norm": 0.1123763918876648, "learning_rate": 0.01, "loss": 2.0298, "step": 22299 }, { "epoch": 2.2923219241443107, "grad_norm": 0.06539402157068253, "learning_rate": 0.01, "loss": 2.0245, "step": 22302 }, { "epoch": 2.292630280604379, "grad_norm": 0.08749070018529892, "learning_rate": 0.01, "loss": 2.0336, "step": 22305 }, { "epoch": 2.2929386370644464, "grad_norm": 0.05580052733421326, "learning_rate": 0.01, "loss": 2.0519, "step": 22308 }, { "epoch": 2.293246993524514, "grad_norm": 0.07866258919239044, "learning_rate": 0.01, "loss": 2.0455, "step": 22311 }, { "epoch": 2.293555349984582, "grad_norm": 0.06253038346767426, "learning_rate": 0.01, "loss": 2.0423, "step": 22314 }, { "epoch": 2.29386370644465, "grad_norm": 0.0412861630320549, "learning_rate": 0.01, "loss": 2.0504, "step": 22317 }, { "epoch": 2.294172062904718, "grad_norm": 0.05568886548280716, "learning_rate": 0.01, "loss": 2.0397, "step": 22320 }, { "epoch": 2.2944804193647856, "grad_norm": 0.060274332761764526, "learning_rate": 0.01, "loss": 2.0563, "step": 22323 }, { "epoch": 2.2947887758248537, "grad_norm": 0.09785215556621552, "learning_rate": 0.01, "loss": 2.0378, "step": 22326 }, { "epoch": 2.2950971322849214, "grad_norm": 0.06171462684869766, "learning_rate": 0.01, "loss": 2.0246, "step": 22329 }, { "epoch": 2.295405488744989, "grad_norm": 0.1034122183918953, "learning_rate": 0.01, "loss": 2.0493, "step": 22332 }, { "epoch": 2.295713845205057, "grad_norm": 0.04855991527438164, "learning_rate": 0.01, "loss": 2.0401, "step": 22335 }, { "epoch": 2.296022201665125, "grad_norm": 0.05000189319252968, "learning_rate": 0.01, "loss": 2.0111, "step": 22338 }, { "epoch": 2.2963305581251925, "grad_norm": 0.031126966699957848, "learning_rate": 0.01, "loss": 2.0248, "step": 22341 }, { "epoch": 2.2966389145852606, "grad_norm": 0.08688051998615265, "learning_rate": 0.01, "loss": 2.0227, "step": 22344 }, { "epoch": 2.2969472710453283, "grad_norm": 0.08918049931526184, "learning_rate": 0.01, "loss": 2.0573, "step": 22347 }, { "epoch": 2.2972556275053964, "grad_norm": 0.0592593289911747, "learning_rate": 0.01, "loss": 2.0042, "step": 22350 }, { "epoch": 2.297563983965464, "grad_norm": 0.11501101404428482, "learning_rate": 0.01, "loss": 2.0348, "step": 22353 }, { "epoch": 2.297872340425532, "grad_norm": 0.12966668605804443, "learning_rate": 0.01, "loss": 2.0475, "step": 22356 }, { "epoch": 2.2981806968856, "grad_norm": 0.08889560401439667, "learning_rate": 0.01, "loss": 2.0499, "step": 22359 }, { "epoch": 2.2984890533456674, "grad_norm": 0.07578025758266449, "learning_rate": 0.01, "loss": 2.0479, "step": 22362 }, { "epoch": 2.2987974098057355, "grad_norm": 0.04283175244927406, "learning_rate": 0.01, "loss": 2.0009, "step": 22365 }, { "epoch": 2.299105766265803, "grad_norm": 0.038169119507074356, "learning_rate": 0.01, "loss": 2.0506, "step": 22368 }, { "epoch": 2.299414122725871, "grad_norm": 0.07047520577907562, "learning_rate": 0.01, "loss": 2.0435, "step": 22371 }, { "epoch": 2.299722479185939, "grad_norm": 0.06367038190364838, "learning_rate": 0.01, "loss": 2.0186, "step": 22374 }, { "epoch": 2.3000308356460066, "grad_norm": 0.06601911783218384, "learning_rate": 0.01, "loss": 2.026, "step": 22377 }, { "epoch": 2.3003391921060747, "grad_norm": 0.06333800405263901, "learning_rate": 0.01, "loss": 2.0244, "step": 22380 }, { "epoch": 2.3006475485661424, "grad_norm": 0.0635596215724945, "learning_rate": 0.01, "loss": 2.0102, "step": 22383 }, { "epoch": 2.3009559050262105, "grad_norm": 0.09258962422609329, "learning_rate": 0.01, "loss": 2.0348, "step": 22386 }, { "epoch": 2.301264261486278, "grad_norm": 0.04462733119726181, "learning_rate": 0.01, "loss": 1.9775, "step": 22389 }, { "epoch": 2.301572617946346, "grad_norm": 0.04802479222416878, "learning_rate": 0.01, "loss": 2.0557, "step": 22392 }, { "epoch": 2.301880974406414, "grad_norm": 0.0396190769970417, "learning_rate": 0.01, "loss": 2.0543, "step": 22395 }, { "epoch": 2.3021893308664816, "grad_norm": 0.05610959604382515, "learning_rate": 0.01, "loss": 2.048, "step": 22398 }, { "epoch": 2.3024976873265492, "grad_norm": 0.03454664349555969, "learning_rate": 0.01, "loss": 2.0592, "step": 22401 }, { "epoch": 2.3028060437866174, "grad_norm": 0.049920883029699326, "learning_rate": 0.01, "loss": 2.0533, "step": 22404 }, { "epoch": 2.303114400246685, "grad_norm": 0.04548259451985359, "learning_rate": 0.01, "loss": 2.045, "step": 22407 }, { "epoch": 2.303422756706753, "grad_norm": 0.07624956220388412, "learning_rate": 0.01, "loss": 2.0321, "step": 22410 }, { "epoch": 2.3037311131668208, "grad_norm": 0.1172046959400177, "learning_rate": 0.01, "loss": 2.0285, "step": 22413 }, { "epoch": 2.304039469626889, "grad_norm": 0.07652290165424347, "learning_rate": 0.01, "loss": 2.0324, "step": 22416 }, { "epoch": 2.3043478260869565, "grad_norm": 0.07458405196666718, "learning_rate": 0.01, "loss": 2.0351, "step": 22419 }, { "epoch": 2.304656182547024, "grad_norm": 0.08384053409099579, "learning_rate": 0.01, "loss": 2.0457, "step": 22422 }, { "epoch": 2.3049645390070923, "grad_norm": 0.0492565892636776, "learning_rate": 0.01, "loss": 2.0353, "step": 22425 }, { "epoch": 2.30527289546716, "grad_norm": 0.05125366151332855, "learning_rate": 0.01, "loss": 2.0447, "step": 22428 }, { "epoch": 2.3055812519272276, "grad_norm": 0.03920417279005051, "learning_rate": 0.01, "loss": 2.0471, "step": 22431 }, { "epoch": 2.3058896083872957, "grad_norm": 0.07206401228904724, "learning_rate": 0.01, "loss": 2.0312, "step": 22434 }, { "epoch": 2.3061979648473634, "grad_norm": 0.08888402581214905, "learning_rate": 0.01, "loss": 2.0331, "step": 22437 }, { "epoch": 2.3065063213074315, "grad_norm": 0.1788371056318283, "learning_rate": 0.01, "loss": 2.0461, "step": 22440 }, { "epoch": 2.306814677767499, "grad_norm": 0.11030272394418716, "learning_rate": 0.01, "loss": 2.0314, "step": 22443 }, { "epoch": 2.3071230342275673, "grad_norm": 0.08602694422006607, "learning_rate": 0.01, "loss": 2.0101, "step": 22446 }, { "epoch": 2.307431390687635, "grad_norm": 0.07269123196601868, "learning_rate": 0.01, "loss": 2.0068, "step": 22449 }, { "epoch": 2.3077397471477026, "grad_norm": 0.043196290731430054, "learning_rate": 0.01, "loss": 2.0617, "step": 22452 }, { "epoch": 2.3080481036077707, "grad_norm": 0.03932726010680199, "learning_rate": 0.01, "loss": 2.0314, "step": 22455 }, { "epoch": 2.3083564600678383, "grad_norm": 0.07751597464084625, "learning_rate": 0.01, "loss": 2.0225, "step": 22458 }, { "epoch": 2.3086648165279064, "grad_norm": 0.06549305468797684, "learning_rate": 0.01, "loss": 2.0386, "step": 22461 }, { "epoch": 2.308973172987974, "grad_norm": 0.05819348245859146, "learning_rate": 0.01, "loss": 2.0343, "step": 22464 }, { "epoch": 2.3092815294480418, "grad_norm": 0.04251622408628464, "learning_rate": 0.01, "loss": 2.02, "step": 22467 }, { "epoch": 2.30958988590811, "grad_norm": 0.03997926414012909, "learning_rate": 0.01, "loss": 2.0405, "step": 22470 }, { "epoch": 2.3098982423681775, "grad_norm": 0.060847315937280655, "learning_rate": 0.01, "loss": 2.0301, "step": 22473 }, { "epoch": 2.3102065988282456, "grad_norm": 0.188557431101799, "learning_rate": 0.01, "loss": 2.0388, "step": 22476 }, { "epoch": 2.3105149552883133, "grad_norm": 0.050608254969120026, "learning_rate": 0.01, "loss": 2.0376, "step": 22479 }, { "epoch": 2.310823311748381, "grad_norm": 0.0930873304605484, "learning_rate": 0.01, "loss": 2.0329, "step": 22482 }, { "epoch": 2.311131668208449, "grad_norm": 0.05231544002890587, "learning_rate": 0.01, "loss": 2.0646, "step": 22485 }, { "epoch": 2.3114400246685167, "grad_norm": 0.038996320217847824, "learning_rate": 0.01, "loss": 2.0198, "step": 22488 }, { "epoch": 2.311748381128585, "grad_norm": 0.04804287478327751, "learning_rate": 0.01, "loss": 2.0377, "step": 22491 }, { "epoch": 2.3120567375886525, "grad_norm": 0.05822224169969559, "learning_rate": 0.01, "loss": 2.0581, "step": 22494 }, { "epoch": 2.31236509404872, "grad_norm": 0.04677027836441994, "learning_rate": 0.01, "loss": 2.0552, "step": 22497 }, { "epoch": 2.3126734505087883, "grad_norm": 0.05270523950457573, "learning_rate": 0.01, "loss": 1.9882, "step": 22500 }, { "epoch": 2.312981806968856, "grad_norm": 0.04068690538406372, "learning_rate": 0.01, "loss": 2.0495, "step": 22503 }, { "epoch": 2.313290163428924, "grad_norm": 0.03488868847489357, "learning_rate": 0.01, "loss": 2.0382, "step": 22506 }, { "epoch": 2.3135985198889917, "grad_norm": 0.1007746085524559, "learning_rate": 0.01, "loss": 2.0356, "step": 22509 }, { "epoch": 2.3139068763490593, "grad_norm": 0.04351113364100456, "learning_rate": 0.01, "loss": 2.0513, "step": 22512 }, { "epoch": 2.3142152328091274, "grad_norm": 0.042396917939186096, "learning_rate": 0.01, "loss": 2.0427, "step": 22515 }, { "epoch": 2.314523589269195, "grad_norm": 0.05541342496871948, "learning_rate": 0.01, "loss": 2.0577, "step": 22518 }, { "epoch": 2.314831945729263, "grad_norm": 0.05093314126133919, "learning_rate": 0.01, "loss": 2.0127, "step": 22521 }, { "epoch": 2.315140302189331, "grad_norm": 0.09227500855922699, "learning_rate": 0.01, "loss": 2.0303, "step": 22524 }, { "epoch": 2.3154486586493985, "grad_norm": 0.07242649048566818, "learning_rate": 0.01, "loss": 2.0355, "step": 22527 }, { "epoch": 2.3157570151094666, "grad_norm": 0.07468149811029434, "learning_rate": 0.01, "loss": 2.0443, "step": 22530 }, { "epoch": 2.3160653715695343, "grad_norm": 0.0861506536602974, "learning_rate": 0.01, "loss": 2.0461, "step": 22533 }, { "epoch": 2.3163737280296024, "grad_norm": 0.05024004355072975, "learning_rate": 0.01, "loss": 2.0228, "step": 22536 }, { "epoch": 2.31668208448967, "grad_norm": 0.07255478948354721, "learning_rate": 0.01, "loss": 1.997, "step": 22539 }, { "epoch": 2.3169904409497377, "grad_norm": 0.08199785649776459, "learning_rate": 0.01, "loss": 2.0465, "step": 22542 }, { "epoch": 2.317298797409806, "grad_norm": 0.05194063484668732, "learning_rate": 0.01, "loss": 2.0426, "step": 22545 }, { "epoch": 2.3176071538698735, "grad_norm": 0.05785224214196205, "learning_rate": 0.01, "loss": 2.0311, "step": 22548 }, { "epoch": 2.3179155103299416, "grad_norm": 0.0767458900809288, "learning_rate": 0.01, "loss": 2.0338, "step": 22551 }, { "epoch": 2.3182238667900092, "grad_norm": 0.04905517399311066, "learning_rate": 0.01, "loss": 2.0369, "step": 22554 }, { "epoch": 2.318532223250077, "grad_norm": 0.12444967031478882, "learning_rate": 0.01, "loss": 1.9988, "step": 22557 }, { "epoch": 2.318840579710145, "grad_norm": 0.07965657114982605, "learning_rate": 0.01, "loss": 2.0438, "step": 22560 }, { "epoch": 2.3191489361702127, "grad_norm": 0.12167848646640778, "learning_rate": 0.01, "loss": 2.0359, "step": 22563 }, { "epoch": 2.3194572926302808, "grad_norm": 0.05483706668019295, "learning_rate": 0.01, "loss": 2.034, "step": 22566 }, { "epoch": 2.3197656490903484, "grad_norm": 0.043046776205301285, "learning_rate": 0.01, "loss": 2.0132, "step": 22569 }, { "epoch": 2.320074005550416, "grad_norm": 0.03646457940340042, "learning_rate": 0.01, "loss": 1.9867, "step": 22572 }, { "epoch": 2.320382362010484, "grad_norm": 0.05777902901172638, "learning_rate": 0.01, "loss": 2.0397, "step": 22575 }, { "epoch": 2.320690718470552, "grad_norm": 0.08646814525127411, "learning_rate": 0.01, "loss": 2.0294, "step": 22578 }, { "epoch": 2.32099907493062, "grad_norm": 0.10770904272794724, "learning_rate": 0.01, "loss": 2.0168, "step": 22581 }, { "epoch": 2.3213074313906876, "grad_norm": 0.11546061187982559, "learning_rate": 0.01, "loss": 2.0281, "step": 22584 }, { "epoch": 2.3216157878507553, "grad_norm": 0.08060076832771301, "learning_rate": 0.01, "loss": 2.0378, "step": 22587 }, { "epoch": 2.3219241443108234, "grad_norm": 0.05546190217137337, "learning_rate": 0.01, "loss": 2.0361, "step": 22590 }, { "epoch": 2.322232500770891, "grad_norm": 0.06310277432203293, "learning_rate": 0.01, "loss": 2.0261, "step": 22593 }, { "epoch": 2.322540857230959, "grad_norm": 0.037245072424411774, "learning_rate": 0.01, "loss": 2.038, "step": 22596 }, { "epoch": 2.322849213691027, "grad_norm": 0.05082730948925018, "learning_rate": 0.01, "loss": 2.0327, "step": 22599 }, { "epoch": 2.3231575701510945, "grad_norm": 0.054748065769672394, "learning_rate": 0.01, "loss": 2.0356, "step": 22602 }, { "epoch": 2.3234659266111626, "grad_norm": 0.06093902140855789, "learning_rate": 0.01, "loss": 2.0339, "step": 22605 }, { "epoch": 2.3237742830712302, "grad_norm": 0.10238637775182724, "learning_rate": 0.01, "loss": 2.0559, "step": 22608 }, { "epoch": 2.3240826395312983, "grad_norm": 0.0850609764456749, "learning_rate": 0.01, "loss": 2.0337, "step": 22611 }, { "epoch": 2.324390995991366, "grad_norm": 0.04108014330267906, "learning_rate": 0.01, "loss": 2.045, "step": 22614 }, { "epoch": 2.3246993524514337, "grad_norm": 0.03669281303882599, "learning_rate": 0.01, "loss": 2.0276, "step": 22617 }, { "epoch": 2.3250077089115018, "grad_norm": 0.053524449467659, "learning_rate": 0.01, "loss": 2.0076, "step": 22620 }, { "epoch": 2.3253160653715694, "grad_norm": 0.03545799478888512, "learning_rate": 0.01, "loss": 2.03, "step": 22623 }, { "epoch": 2.3256244218316375, "grad_norm": 0.0569877028465271, "learning_rate": 0.01, "loss": 2.0189, "step": 22626 }, { "epoch": 2.325932778291705, "grad_norm": 0.06943153589963913, "learning_rate": 0.01, "loss": 2.0183, "step": 22629 }, { "epoch": 2.326241134751773, "grad_norm": 0.07389956712722778, "learning_rate": 0.01, "loss": 2.0524, "step": 22632 }, { "epoch": 2.326549491211841, "grad_norm": 0.08397506922483444, "learning_rate": 0.01, "loss": 2.0214, "step": 22635 }, { "epoch": 2.3268578476719086, "grad_norm": 0.13598600029945374, "learning_rate": 0.01, "loss": 2.0276, "step": 22638 }, { "epoch": 2.3271662041319767, "grad_norm": 0.08953214436769485, "learning_rate": 0.01, "loss": 2.0282, "step": 22641 }, { "epoch": 2.3274745605920444, "grad_norm": 0.09812797605991364, "learning_rate": 0.01, "loss": 2.0631, "step": 22644 }, { "epoch": 2.3277829170521125, "grad_norm": 0.06696080416440964, "learning_rate": 0.01, "loss": 2.0314, "step": 22647 }, { "epoch": 2.32809127351218, "grad_norm": 0.07981070876121521, "learning_rate": 0.01, "loss": 2.0209, "step": 22650 }, { "epoch": 2.328399629972248, "grad_norm": 0.06655466556549072, "learning_rate": 0.01, "loss": 2.0426, "step": 22653 }, { "epoch": 2.328707986432316, "grad_norm": 0.061447955667972565, "learning_rate": 0.01, "loss": 2.0349, "step": 22656 }, { "epoch": 2.3290163428923836, "grad_norm": 0.08812141418457031, "learning_rate": 0.01, "loss": 2.0471, "step": 22659 }, { "epoch": 2.3293246993524512, "grad_norm": 0.04082084447145462, "learning_rate": 0.01, "loss": 2.0435, "step": 22662 }, { "epoch": 2.3296330558125193, "grad_norm": 0.04827709123492241, "learning_rate": 0.01, "loss": 2.0344, "step": 22665 }, { "epoch": 2.329941412272587, "grad_norm": 0.04531668871641159, "learning_rate": 0.01, "loss": 2.01, "step": 22668 }, { "epoch": 2.330249768732655, "grad_norm": 0.06739286333322525, "learning_rate": 0.01, "loss": 2.0479, "step": 22671 }, { "epoch": 2.3305581251927228, "grad_norm": 0.07753113657236099, "learning_rate": 0.01, "loss": 2.0454, "step": 22674 }, { "epoch": 2.330866481652791, "grad_norm": 0.1572093814611435, "learning_rate": 0.01, "loss": 2.0089, "step": 22677 }, { "epoch": 2.3311748381128585, "grad_norm": 0.0898297056555748, "learning_rate": 0.01, "loss": 2.0434, "step": 22680 }, { "epoch": 2.331483194572926, "grad_norm": 0.0803251639008522, "learning_rate": 0.01, "loss": 2.0421, "step": 22683 }, { "epoch": 2.3317915510329943, "grad_norm": 0.06155823543667793, "learning_rate": 0.01, "loss": 2.0106, "step": 22686 }, { "epoch": 2.332099907493062, "grad_norm": 0.03681538626551628, "learning_rate": 0.01, "loss": 2.0082, "step": 22689 }, { "epoch": 2.3324082639531296, "grad_norm": 0.03595598414540291, "learning_rate": 0.01, "loss": 2.0563, "step": 22692 }, { "epoch": 2.3327166204131977, "grad_norm": 0.06950604170560837, "learning_rate": 0.01, "loss": 2.0413, "step": 22695 }, { "epoch": 2.3330249768732654, "grad_norm": 0.07386364042758942, "learning_rate": 0.01, "loss": 2.0261, "step": 22698 }, { "epoch": 2.3333333333333335, "grad_norm": 0.06711214780807495, "learning_rate": 0.01, "loss": 2.0418, "step": 22701 }, { "epoch": 2.333641689793401, "grad_norm": 0.07357903569936752, "learning_rate": 0.01, "loss": 2.0271, "step": 22704 }, { "epoch": 2.3339500462534692, "grad_norm": 0.0660550519824028, "learning_rate": 0.01, "loss": 2.0242, "step": 22707 }, { "epoch": 2.334258402713537, "grad_norm": 0.05066053569316864, "learning_rate": 0.01, "loss": 2.0117, "step": 22710 }, { "epoch": 2.3345667591736046, "grad_norm": 0.04992471635341644, "learning_rate": 0.01, "loss": 2.0764, "step": 22713 }, { "epoch": 2.3348751156336727, "grad_norm": 0.03477726876735687, "learning_rate": 0.01, "loss": 2.0239, "step": 22716 }, { "epoch": 2.3351834720937403, "grad_norm": 0.05454208329319954, "learning_rate": 0.01, "loss": 2.0476, "step": 22719 }, { "epoch": 2.335491828553808, "grad_norm": 0.09009736031293869, "learning_rate": 0.01, "loss": 2.0026, "step": 22722 }, { "epoch": 2.335800185013876, "grad_norm": 0.10271045565605164, "learning_rate": 0.01, "loss": 2.0514, "step": 22725 }, { "epoch": 2.3361085414739438, "grad_norm": 0.05939007177948952, "learning_rate": 0.01, "loss": 2.0436, "step": 22728 }, { "epoch": 2.336416897934012, "grad_norm": 0.06454264372587204, "learning_rate": 0.01, "loss": 2.0334, "step": 22731 }, { "epoch": 2.3367252543940795, "grad_norm": 0.04892132803797722, "learning_rate": 0.01, "loss": 2.0305, "step": 22734 }, { "epoch": 2.3370336108541476, "grad_norm": 0.060863394290208817, "learning_rate": 0.01, "loss": 2.0365, "step": 22737 }, { "epoch": 2.3373419673142153, "grad_norm": 0.06073556840419769, "learning_rate": 0.01, "loss": 2.028, "step": 22740 }, { "epoch": 2.337650323774283, "grad_norm": 0.04620682820677757, "learning_rate": 0.01, "loss": 2.034, "step": 22743 }, { "epoch": 2.337958680234351, "grad_norm": 0.04834865778684616, "learning_rate": 0.01, "loss": 2.0407, "step": 22746 }, { "epoch": 2.3382670366944187, "grad_norm": 0.08154502511024475, "learning_rate": 0.01, "loss": 2.0477, "step": 22749 }, { "epoch": 2.3385753931544864, "grad_norm": 0.062378283590078354, "learning_rate": 0.01, "loss": 2.0256, "step": 22752 }, { "epoch": 2.3388837496145545, "grad_norm": 0.11560855805873871, "learning_rate": 0.01, "loss": 2.0059, "step": 22755 }, { "epoch": 2.339192106074622, "grad_norm": 0.14084307849407196, "learning_rate": 0.01, "loss": 2.0179, "step": 22758 }, { "epoch": 2.3395004625346902, "grad_norm": 0.08354218304157257, "learning_rate": 0.01, "loss": 2.0344, "step": 22761 }, { "epoch": 2.339808818994758, "grad_norm": 0.04660021886229515, "learning_rate": 0.01, "loss": 2.0225, "step": 22764 }, { "epoch": 2.340117175454826, "grad_norm": 0.06319580972194672, "learning_rate": 0.01, "loss": 2.0438, "step": 22767 }, { "epoch": 2.3404255319148937, "grad_norm": 0.06721585988998413, "learning_rate": 0.01, "loss": 2.0112, "step": 22770 }, { "epoch": 2.3407338883749613, "grad_norm": 0.08593994379043579, "learning_rate": 0.01, "loss": 2.0149, "step": 22773 }, { "epoch": 2.3410422448350294, "grad_norm": 0.07594014704227448, "learning_rate": 0.01, "loss": 2.045, "step": 22776 }, { "epoch": 2.341350601295097, "grad_norm": 0.05942634493112564, "learning_rate": 0.01, "loss": 2.0557, "step": 22779 }, { "epoch": 2.3416589577551647, "grad_norm": 0.09516235440969467, "learning_rate": 0.01, "loss": 2.0524, "step": 22782 }, { "epoch": 2.341967314215233, "grad_norm": 0.11480774730443954, "learning_rate": 0.01, "loss": 2.0112, "step": 22785 }, { "epoch": 2.3422756706753005, "grad_norm": 0.078018918633461, "learning_rate": 0.01, "loss": 2.0083, "step": 22788 }, { "epoch": 2.3425840271353686, "grad_norm": 0.04268670827150345, "learning_rate": 0.01, "loss": 1.9939, "step": 22791 }, { "epoch": 2.3428923835954363, "grad_norm": 0.04221808537840843, "learning_rate": 0.01, "loss": 2.0198, "step": 22794 }, { "epoch": 2.3432007400555044, "grad_norm": 0.04617379978299141, "learning_rate": 0.01, "loss": 2.036, "step": 22797 }, { "epoch": 2.343509096515572, "grad_norm": 0.05600395053625107, "learning_rate": 0.01, "loss": 2.0289, "step": 22800 }, { "epoch": 2.3438174529756397, "grad_norm": 0.04997321218252182, "learning_rate": 0.01, "loss": 2.0396, "step": 22803 }, { "epoch": 2.344125809435708, "grad_norm": 0.08891329169273376, "learning_rate": 0.01, "loss": 2.0464, "step": 22806 }, { "epoch": 2.3444341658957755, "grad_norm": 0.06883488595485687, "learning_rate": 0.01, "loss": 2.0417, "step": 22809 }, { "epoch": 2.344742522355843, "grad_norm": 0.05532229691743851, "learning_rate": 0.01, "loss": 2.0367, "step": 22812 }, { "epoch": 2.3450508788159112, "grad_norm": 0.04480346292257309, "learning_rate": 0.01, "loss": 2.036, "step": 22815 }, { "epoch": 2.345359235275979, "grad_norm": 0.06533756107091904, "learning_rate": 0.01, "loss": 2.0218, "step": 22818 }, { "epoch": 2.345667591736047, "grad_norm": 0.09851729869842529, "learning_rate": 0.01, "loss": 2.0175, "step": 22821 }, { "epoch": 2.3459759481961147, "grad_norm": 0.06026868894696236, "learning_rate": 0.01, "loss": 2.0359, "step": 22824 }, { "epoch": 2.3462843046561828, "grad_norm": 0.10084753483533859, "learning_rate": 0.01, "loss": 2.0348, "step": 22827 }, { "epoch": 2.3465926611162504, "grad_norm": 0.09302054345607758, "learning_rate": 0.01, "loss": 2.0376, "step": 22830 }, { "epoch": 2.346901017576318, "grad_norm": 0.06304951757192612, "learning_rate": 0.01, "loss": 2.0254, "step": 22833 }, { "epoch": 2.347209374036386, "grad_norm": 0.03625926375389099, "learning_rate": 0.01, "loss": 2.037, "step": 22836 }, { "epoch": 2.347517730496454, "grad_norm": 0.0602838359773159, "learning_rate": 0.01, "loss": 2.0258, "step": 22839 }, { "epoch": 2.3478260869565215, "grad_norm": 0.08285371959209442, "learning_rate": 0.01, "loss": 2.0299, "step": 22842 }, { "epoch": 2.3481344434165896, "grad_norm": 0.05780784785747528, "learning_rate": 0.01, "loss": 2.0288, "step": 22845 }, { "epoch": 2.3484427998766573, "grad_norm": 0.07491873949766159, "learning_rate": 0.01, "loss": 2.0327, "step": 22848 }, { "epoch": 2.3487511563367254, "grad_norm": 0.04745258018374443, "learning_rate": 0.01, "loss": 2.0347, "step": 22851 }, { "epoch": 2.349059512796793, "grad_norm": 0.030006757006049156, "learning_rate": 0.01, "loss": 2.0315, "step": 22854 }, { "epoch": 2.349367869256861, "grad_norm": 0.035286273807287216, "learning_rate": 0.01, "loss": 2.0198, "step": 22857 }, { "epoch": 2.349676225716929, "grad_norm": 0.08970153331756592, "learning_rate": 0.01, "loss": 2.0473, "step": 22860 }, { "epoch": 2.3499845821769965, "grad_norm": 0.06819456070661545, "learning_rate": 0.01, "loss": 2.0246, "step": 22863 }, { "epoch": 2.3502929386370646, "grad_norm": 0.07130144536495209, "learning_rate": 0.01, "loss": 2.019, "step": 22866 }, { "epoch": 2.3506012950971322, "grad_norm": 0.046634428203105927, "learning_rate": 0.01, "loss": 2.0338, "step": 22869 }, { "epoch": 2.3509096515572, "grad_norm": 0.10333798825740814, "learning_rate": 0.01, "loss": 2.0348, "step": 22872 }, { "epoch": 2.351218008017268, "grad_norm": 0.06691887974739075, "learning_rate": 0.01, "loss": 2.0254, "step": 22875 }, { "epoch": 2.3515263644773357, "grad_norm": 0.06780122220516205, "learning_rate": 0.01, "loss": 2.052, "step": 22878 }, { "epoch": 2.3518347209374038, "grad_norm": 0.04448529705405235, "learning_rate": 0.01, "loss": 2.0322, "step": 22881 }, { "epoch": 2.3521430773974714, "grad_norm": 0.09176638722419739, "learning_rate": 0.01, "loss": 2.0168, "step": 22884 }, { "epoch": 2.3524514338575395, "grad_norm": 0.05699750408530235, "learning_rate": 0.01, "loss": 2.0241, "step": 22887 }, { "epoch": 2.352759790317607, "grad_norm": 0.0556836873292923, "learning_rate": 0.01, "loss": 2.0328, "step": 22890 }, { "epoch": 2.353068146777675, "grad_norm": 0.05335136875510216, "learning_rate": 0.01, "loss": 2.0072, "step": 22893 }, { "epoch": 2.353376503237743, "grad_norm": 0.06900188326835632, "learning_rate": 0.01, "loss": 2.0392, "step": 22896 }, { "epoch": 2.3536848596978106, "grad_norm": 0.07300775498151779, "learning_rate": 0.01, "loss": 2.0341, "step": 22899 }, { "epoch": 2.3539932161578783, "grad_norm": 0.0912926122546196, "learning_rate": 0.01, "loss": 2.041, "step": 22902 }, { "epoch": 2.3543015726179464, "grad_norm": 0.0803481861948967, "learning_rate": 0.01, "loss": 2.0225, "step": 22905 }, { "epoch": 2.354609929078014, "grad_norm": 0.05067334324121475, "learning_rate": 0.01, "loss": 2.0187, "step": 22908 }, { "epoch": 2.354918285538082, "grad_norm": 0.10282410681247711, "learning_rate": 0.01, "loss": 2.0349, "step": 22911 }, { "epoch": 2.35522664199815, "grad_norm": 0.04224551469087601, "learning_rate": 0.01, "loss": 2.0166, "step": 22914 }, { "epoch": 2.355534998458218, "grad_norm": 0.06768529862165451, "learning_rate": 0.01, "loss": 2.0304, "step": 22917 }, { "epoch": 2.3558433549182856, "grad_norm": 0.10372413694858551, "learning_rate": 0.01, "loss": 2.0421, "step": 22920 }, { "epoch": 2.356151711378353, "grad_norm": 0.05974859744310379, "learning_rate": 0.01, "loss": 2.0231, "step": 22923 }, { "epoch": 2.3564600678384213, "grad_norm": 0.06408294290304184, "learning_rate": 0.01, "loss": 2.0199, "step": 22926 }, { "epoch": 2.356768424298489, "grad_norm": 0.053037382662296295, "learning_rate": 0.01, "loss": 2.0286, "step": 22929 }, { "epoch": 2.3570767807585566, "grad_norm": 0.038871798664331436, "learning_rate": 0.01, "loss": 2.0588, "step": 22932 }, { "epoch": 2.3573851372186247, "grad_norm": 0.046300627291202545, "learning_rate": 0.01, "loss": 2.033, "step": 22935 }, { "epoch": 2.3576934936786924, "grad_norm": 0.05344367399811745, "learning_rate": 0.01, "loss": 2.0462, "step": 22938 }, { "epoch": 2.3580018501387605, "grad_norm": 0.0878051146864891, "learning_rate": 0.01, "loss": 2.0173, "step": 22941 }, { "epoch": 2.358310206598828, "grad_norm": 0.09469857066869736, "learning_rate": 0.01, "loss": 2.0373, "step": 22944 }, { "epoch": 2.3586185630588963, "grad_norm": 0.04746483266353607, "learning_rate": 0.01, "loss": 2.0155, "step": 22947 }, { "epoch": 2.358926919518964, "grad_norm": 0.051294367760419846, "learning_rate": 0.01, "loss": 2.0452, "step": 22950 }, { "epoch": 2.3592352759790316, "grad_norm": 0.04378626495599747, "learning_rate": 0.01, "loss": 2.0381, "step": 22953 }, { "epoch": 2.3595436324390997, "grad_norm": 0.054627690464258194, "learning_rate": 0.01, "loss": 2.0592, "step": 22956 }, { "epoch": 2.3598519888991674, "grad_norm": 0.04732866585254669, "learning_rate": 0.01, "loss": 2.0284, "step": 22959 }, { "epoch": 2.3601603453592355, "grad_norm": 0.11569249629974365, "learning_rate": 0.01, "loss": 2.007, "step": 22962 }, { "epoch": 2.360468701819303, "grad_norm": 0.10606824606657028, "learning_rate": 0.01, "loss": 2.0357, "step": 22965 }, { "epoch": 2.360777058279371, "grad_norm": 0.05072540044784546, "learning_rate": 0.01, "loss": 2.0581, "step": 22968 }, { "epoch": 2.361085414739439, "grad_norm": 0.0738227441906929, "learning_rate": 0.01, "loss": 2.0305, "step": 22971 }, { "epoch": 2.3613937711995066, "grad_norm": 0.06440632790327072, "learning_rate": 0.01, "loss": 2.0315, "step": 22974 }, { "epoch": 2.3617021276595747, "grad_norm": 0.05426304414868355, "learning_rate": 0.01, "loss": 2.043, "step": 22977 }, { "epoch": 2.3620104841196423, "grad_norm": 0.06414210051298141, "learning_rate": 0.01, "loss": 2.0735, "step": 22980 }, { "epoch": 2.36231884057971, "grad_norm": 0.034726858139038086, "learning_rate": 0.01, "loss": 2.0422, "step": 22983 }, { "epoch": 2.362627197039778, "grad_norm": 0.04326315596699715, "learning_rate": 0.01, "loss": 2.0422, "step": 22986 }, { "epoch": 2.3629355534998457, "grad_norm": 0.06470071524381638, "learning_rate": 0.01, "loss": 2.0192, "step": 22989 }, { "epoch": 2.363243909959914, "grad_norm": 0.09161586314439774, "learning_rate": 0.01, "loss": 2.02, "step": 22992 }, { "epoch": 2.3635522664199815, "grad_norm": 0.035171300172805786, "learning_rate": 0.01, "loss": 2.0413, "step": 22995 }, { "epoch": 2.363860622880049, "grad_norm": 0.059095609933137894, "learning_rate": 0.01, "loss": 2.0237, "step": 22998 }, { "epoch": 2.3641689793401173, "grad_norm": 0.094328872859478, "learning_rate": 0.01, "loss": 2.0362, "step": 23001 }, { "epoch": 2.364477335800185, "grad_norm": 0.059930335730314255, "learning_rate": 0.01, "loss": 2.0274, "step": 23004 }, { "epoch": 2.364785692260253, "grad_norm": 0.08969483524560928, "learning_rate": 0.01, "loss": 2.04, "step": 23007 }, { "epoch": 2.3650940487203207, "grad_norm": 0.054753273725509644, "learning_rate": 0.01, "loss": 2.0195, "step": 23010 }, { "epoch": 2.3654024051803884, "grad_norm": 0.09794525057077408, "learning_rate": 0.01, "loss": 2.0385, "step": 23013 }, { "epoch": 2.3657107616404565, "grad_norm": 0.07700082659721375, "learning_rate": 0.01, "loss": 2.0483, "step": 23016 }, { "epoch": 2.366019118100524, "grad_norm": 0.03871360793709755, "learning_rate": 0.01, "loss": 2.0255, "step": 23019 }, { "epoch": 2.3663274745605922, "grad_norm": 0.08822629600763321, "learning_rate": 0.01, "loss": 2.0296, "step": 23022 }, { "epoch": 2.36663583102066, "grad_norm": 0.08018068969249725, "learning_rate": 0.01, "loss": 2.0405, "step": 23025 }, { "epoch": 2.3669441874807275, "grad_norm": 0.046322405338287354, "learning_rate": 0.01, "loss": 2.0345, "step": 23028 }, { "epoch": 2.3672525439407957, "grad_norm": 0.08056683838367462, "learning_rate": 0.01, "loss": 2.035, "step": 23031 }, { "epoch": 2.3675609004008633, "grad_norm": 0.059787072241306305, "learning_rate": 0.01, "loss": 2.0163, "step": 23034 }, { "epoch": 2.3678692568609314, "grad_norm": 0.07793419063091278, "learning_rate": 0.01, "loss": 2.0039, "step": 23037 }, { "epoch": 2.368177613320999, "grad_norm": 0.1337706595659256, "learning_rate": 0.01, "loss": 2.0138, "step": 23040 }, { "epoch": 2.3684859697810667, "grad_norm": 0.0608229860663414, "learning_rate": 0.01, "loss": 2.0064, "step": 23043 }, { "epoch": 2.368794326241135, "grad_norm": 0.07523812353610992, "learning_rate": 0.01, "loss": 2.0243, "step": 23046 }, { "epoch": 2.3691026827012025, "grad_norm": 0.05788956955075264, "learning_rate": 0.01, "loss": 2.0354, "step": 23049 }, { "epoch": 2.3694110391612706, "grad_norm": 0.05372630059719086, "learning_rate": 0.01, "loss": 2.0188, "step": 23052 }, { "epoch": 2.3697193956213383, "grad_norm": 0.0775051936507225, "learning_rate": 0.01, "loss": 2.0018, "step": 23055 }, { "epoch": 2.370027752081406, "grad_norm": 0.042055148631334305, "learning_rate": 0.01, "loss": 2.0152, "step": 23058 }, { "epoch": 2.370336108541474, "grad_norm": 0.03500501438975334, "learning_rate": 0.01, "loss": 2.0111, "step": 23061 }, { "epoch": 2.3706444650015417, "grad_norm": 0.05405488237738609, "learning_rate": 0.01, "loss": 2.0569, "step": 23064 }, { "epoch": 2.37095282146161, "grad_norm": 0.04199660196900368, "learning_rate": 0.01, "loss": 2.024, "step": 23067 }, { "epoch": 2.3712611779216775, "grad_norm": 0.10155687481164932, "learning_rate": 0.01, "loss": 2.0335, "step": 23070 }, { "epoch": 2.371569534381745, "grad_norm": 0.04267631843686104, "learning_rate": 0.01, "loss": 2.0313, "step": 23073 }, { "epoch": 2.371877890841813, "grad_norm": 0.08961319178342819, "learning_rate": 0.01, "loss": 2.0308, "step": 23076 }, { "epoch": 2.372186247301881, "grad_norm": 0.06389858573675156, "learning_rate": 0.01, "loss": 2.02, "step": 23079 }, { "epoch": 2.372494603761949, "grad_norm": 0.0733552873134613, "learning_rate": 0.01, "loss": 2.0304, "step": 23082 }, { "epoch": 2.3728029602220166, "grad_norm": 0.04577986150979996, "learning_rate": 0.01, "loss": 2.0137, "step": 23085 }, { "epoch": 2.3731113166820843, "grad_norm": 0.07929688692092896, "learning_rate": 0.01, "loss": 2.0357, "step": 23088 }, { "epoch": 2.3734196731421524, "grad_norm": 0.07446785271167755, "learning_rate": 0.01, "loss": 2.0058, "step": 23091 }, { "epoch": 2.37372802960222, "grad_norm": 0.08491658419370651, "learning_rate": 0.01, "loss": 2.0372, "step": 23094 }, { "epoch": 2.374036386062288, "grad_norm": 0.04434317350387573, "learning_rate": 0.01, "loss": 2.022, "step": 23097 }, { "epoch": 2.374344742522356, "grad_norm": 0.031715601682662964, "learning_rate": 0.01, "loss": 2.0436, "step": 23100 }, { "epoch": 2.3746530989824235, "grad_norm": 0.05073494091629982, "learning_rate": 0.01, "loss": 2.0205, "step": 23103 }, { "epoch": 2.3749614554424916, "grad_norm": 0.0484505333006382, "learning_rate": 0.01, "loss": 2.0388, "step": 23106 }, { "epoch": 2.3752698119025593, "grad_norm": 0.036467526108026505, "learning_rate": 0.01, "loss": 2.0136, "step": 23109 }, { "epoch": 2.3755781683626274, "grad_norm": 0.048451874405145645, "learning_rate": 0.01, "loss": 2.014, "step": 23112 }, { "epoch": 2.375886524822695, "grad_norm": 0.048714105039834976, "learning_rate": 0.01, "loss": 2.0546, "step": 23115 }, { "epoch": 2.3761948812827627, "grad_norm": 0.07499600946903229, "learning_rate": 0.01, "loss": 2.024, "step": 23118 }, { "epoch": 2.376503237742831, "grad_norm": 0.052757177501916885, "learning_rate": 0.01, "loss": 2.07, "step": 23121 }, { "epoch": 2.3768115942028984, "grad_norm": 0.062441807240247726, "learning_rate": 0.01, "loss": 2.0156, "step": 23124 }, { "epoch": 2.3771199506629666, "grad_norm": 0.04984834045171738, "learning_rate": 0.01, "loss": 2.0046, "step": 23127 }, { "epoch": 2.377428307123034, "grad_norm": 0.12042737007141113, "learning_rate": 0.01, "loss": 2.0043, "step": 23130 }, { "epoch": 2.377736663583102, "grad_norm": 0.08114577829837799, "learning_rate": 0.01, "loss": 2.0212, "step": 23133 }, { "epoch": 2.37804502004317, "grad_norm": 0.044427551329135895, "learning_rate": 0.01, "loss": 2.0127, "step": 23136 }, { "epoch": 2.3783533765032376, "grad_norm": 0.04814080893993378, "learning_rate": 0.01, "loss": 2.0237, "step": 23139 }, { "epoch": 2.3786617329633057, "grad_norm": 0.03783731907606125, "learning_rate": 0.01, "loss": 2.0293, "step": 23142 }, { "epoch": 2.3789700894233734, "grad_norm": 0.03743833675980568, "learning_rate": 0.01, "loss": 2.0446, "step": 23145 }, { "epoch": 2.3792784458834415, "grad_norm": 0.05341466888785362, "learning_rate": 0.01, "loss": 2.0071, "step": 23148 }, { "epoch": 2.379586802343509, "grad_norm": 0.04965018108487129, "learning_rate": 0.01, "loss": 2.0257, "step": 23151 }, { "epoch": 2.379895158803577, "grad_norm": 0.06330037117004395, "learning_rate": 0.01, "loss": 2.0466, "step": 23154 }, { "epoch": 2.380203515263645, "grad_norm": 0.04018980264663696, "learning_rate": 0.01, "loss": 2.045, "step": 23157 }, { "epoch": 2.3805118717237126, "grad_norm": 0.04469529166817665, "learning_rate": 0.01, "loss": 2.0437, "step": 23160 }, { "epoch": 2.3808202281837803, "grad_norm": 0.07242292910814285, "learning_rate": 0.01, "loss": 2.0199, "step": 23163 }, { "epoch": 2.3811285846438484, "grad_norm": 0.05165792256593704, "learning_rate": 0.01, "loss": 2.0459, "step": 23166 }, { "epoch": 2.381436941103916, "grad_norm": 0.12728899717330933, "learning_rate": 0.01, "loss": 2.0086, "step": 23169 }, { "epoch": 2.381745297563984, "grad_norm": 0.07357775419950485, "learning_rate": 0.01, "loss": 2.0436, "step": 23172 }, { "epoch": 2.382053654024052, "grad_norm": 0.05125182494521141, "learning_rate": 0.01, "loss": 2.0269, "step": 23175 }, { "epoch": 2.38236201048412, "grad_norm": 0.061880383640527725, "learning_rate": 0.01, "loss": 2.0355, "step": 23178 }, { "epoch": 2.3826703669441875, "grad_norm": 0.03774267062544823, "learning_rate": 0.01, "loss": 2.0205, "step": 23181 }, { "epoch": 2.382978723404255, "grad_norm": 0.04750019684433937, "learning_rate": 0.01, "loss": 2.046, "step": 23184 }, { "epoch": 2.3832870798643233, "grad_norm": 0.06727109849452972, "learning_rate": 0.01, "loss": 2.0545, "step": 23187 }, { "epoch": 2.383595436324391, "grad_norm": 0.08848878741264343, "learning_rate": 0.01, "loss": 2.0458, "step": 23190 }, { "epoch": 2.3839037927844586, "grad_norm": 0.07577743381261826, "learning_rate": 0.01, "loss": 2.0502, "step": 23193 }, { "epoch": 2.3842121492445267, "grad_norm": 0.07908739149570465, "learning_rate": 0.01, "loss": 2.0516, "step": 23196 }, { "epoch": 2.3845205057045944, "grad_norm": 0.08100943267345428, "learning_rate": 0.01, "loss": 2.036, "step": 23199 }, { "epoch": 2.3848288621646625, "grad_norm": 0.12388890236616135, "learning_rate": 0.01, "loss": 2.0506, "step": 23202 }, { "epoch": 2.38513721862473, "grad_norm": 0.0538008026778698, "learning_rate": 0.01, "loss": 2.0236, "step": 23205 }, { "epoch": 2.3854455750847983, "grad_norm": 0.04448952525854111, "learning_rate": 0.01, "loss": 2.0354, "step": 23208 }, { "epoch": 2.385753931544866, "grad_norm": 0.038850218057632446, "learning_rate": 0.01, "loss": 2.0368, "step": 23211 }, { "epoch": 2.3860622880049336, "grad_norm": 0.10436736047267914, "learning_rate": 0.01, "loss": 2.0316, "step": 23214 }, { "epoch": 2.3863706444650017, "grad_norm": 0.03787916898727417, "learning_rate": 0.01, "loss": 2.0358, "step": 23217 }, { "epoch": 2.3866790009250693, "grad_norm": 0.0874573364853859, "learning_rate": 0.01, "loss": 2.0306, "step": 23220 }, { "epoch": 2.386987357385137, "grad_norm": 0.08322203904390335, "learning_rate": 0.01, "loss": 2.0495, "step": 23223 }, { "epoch": 2.387295713845205, "grad_norm": 0.034374579787254333, "learning_rate": 0.01, "loss": 2.0047, "step": 23226 }, { "epoch": 2.3876040703052728, "grad_norm": 0.03772469237446785, "learning_rate": 0.01, "loss": 2.0309, "step": 23229 }, { "epoch": 2.387912426765341, "grad_norm": 0.12290430814027786, "learning_rate": 0.01, "loss": 2.0343, "step": 23232 }, { "epoch": 2.3882207832254085, "grad_norm": 0.04306014999747276, "learning_rate": 0.01, "loss": 2.0248, "step": 23235 }, { "epoch": 2.3885291396854766, "grad_norm": 0.10940956324338913, "learning_rate": 0.01, "loss": 2.0322, "step": 23238 }, { "epoch": 2.3888374961455443, "grad_norm": 0.06950829923152924, "learning_rate": 0.01, "loss": 2.0168, "step": 23241 }, { "epoch": 2.389145852605612, "grad_norm": 0.11061616986989975, "learning_rate": 0.01, "loss": 2.017, "step": 23244 }, { "epoch": 2.38945420906568, "grad_norm": 0.051813628524541855, "learning_rate": 0.01, "loss": 2.0657, "step": 23247 }, { "epoch": 2.3897625655257477, "grad_norm": 0.10411377251148224, "learning_rate": 0.01, "loss": 2.0044, "step": 23250 }, { "epoch": 2.3900709219858154, "grad_norm": 0.03951719403266907, "learning_rate": 0.01, "loss": 2.0135, "step": 23253 }, { "epoch": 2.3903792784458835, "grad_norm": 0.08920100331306458, "learning_rate": 0.01, "loss": 2.0523, "step": 23256 }, { "epoch": 2.390687634905951, "grad_norm": 0.03843710198998451, "learning_rate": 0.01, "loss": 2.0387, "step": 23259 }, { "epoch": 2.3909959913660193, "grad_norm": 0.09093856811523438, "learning_rate": 0.01, "loss": 2.0477, "step": 23262 }, { "epoch": 2.391304347826087, "grad_norm": 0.06239760294556618, "learning_rate": 0.01, "loss": 2.0407, "step": 23265 }, { "epoch": 2.391612704286155, "grad_norm": 0.07422836124897003, "learning_rate": 0.01, "loss": 2.0232, "step": 23268 }, { "epoch": 2.3919210607462227, "grad_norm": 0.04418382793664932, "learning_rate": 0.01, "loss": 2.0346, "step": 23271 }, { "epoch": 2.3922294172062903, "grad_norm": 0.08631134033203125, "learning_rate": 0.01, "loss": 2.0675, "step": 23274 }, { "epoch": 2.3925377736663584, "grad_norm": 0.05164894834160805, "learning_rate": 0.01, "loss": 2.0328, "step": 23277 }, { "epoch": 2.392846130126426, "grad_norm": 0.056628335267305374, "learning_rate": 0.01, "loss": 2.0462, "step": 23280 }, { "epoch": 2.3931544865864938, "grad_norm": 0.10568714141845703, "learning_rate": 0.01, "loss": 2.0295, "step": 23283 }, { "epoch": 2.393462843046562, "grad_norm": 0.1235639825463295, "learning_rate": 0.01, "loss": 2.0149, "step": 23286 }, { "epoch": 2.3937711995066295, "grad_norm": 0.04766182228922844, "learning_rate": 0.01, "loss": 2.016, "step": 23289 }, { "epoch": 2.3940795559666976, "grad_norm": 0.040086500346660614, "learning_rate": 0.01, "loss": 2.0011, "step": 23292 }, { "epoch": 2.3943879124267653, "grad_norm": 0.04474460333585739, "learning_rate": 0.01, "loss": 2.003, "step": 23295 }, { "epoch": 2.3946962688868334, "grad_norm": 0.03349655494093895, "learning_rate": 0.01, "loss": 2.0251, "step": 23298 }, { "epoch": 2.395004625346901, "grad_norm": 0.04361939802765846, "learning_rate": 0.01, "loss": 2.0105, "step": 23301 }, { "epoch": 2.3953129818069687, "grad_norm": 0.08834217488765717, "learning_rate": 0.01, "loss": 2.0455, "step": 23304 }, { "epoch": 2.395621338267037, "grad_norm": 0.09799984842538834, "learning_rate": 0.01, "loss": 2.0153, "step": 23307 }, { "epoch": 2.3959296947271045, "grad_norm": 0.06085311621427536, "learning_rate": 0.01, "loss": 2.0326, "step": 23310 }, { "epoch": 2.396238051187172, "grad_norm": 0.04466985911130905, "learning_rate": 0.01, "loss": 2.0122, "step": 23313 }, { "epoch": 2.3965464076472403, "grad_norm": 0.05248266085982323, "learning_rate": 0.01, "loss": 2.0388, "step": 23316 }, { "epoch": 2.396854764107308, "grad_norm": 0.06024044379591942, "learning_rate": 0.01, "loss": 2.0232, "step": 23319 }, { "epoch": 2.397163120567376, "grad_norm": 0.05061257630586624, "learning_rate": 0.01, "loss": 2.0259, "step": 23322 }, { "epoch": 2.3974714770274437, "grad_norm": 0.04136064276099205, "learning_rate": 0.01, "loss": 2.0356, "step": 23325 }, { "epoch": 2.397779833487512, "grad_norm": 0.048327211290597916, "learning_rate": 0.01, "loss": 2.0158, "step": 23328 }, { "epoch": 2.3980881899475794, "grad_norm": 0.038865648210048676, "learning_rate": 0.01, "loss": 2.0312, "step": 23331 }, { "epoch": 2.398396546407647, "grad_norm": 0.04522191360592842, "learning_rate": 0.01, "loss": 2.0125, "step": 23334 }, { "epoch": 2.398704902867715, "grad_norm": 0.033488884568214417, "learning_rate": 0.01, "loss": 2.0496, "step": 23337 }, { "epoch": 2.399013259327783, "grad_norm": 0.05207303538918495, "learning_rate": 0.01, "loss": 2.0254, "step": 23340 }, { "epoch": 2.3993216157878505, "grad_norm": 0.09892462193965912, "learning_rate": 0.01, "loss": 2.0294, "step": 23343 }, { "epoch": 2.3996299722479186, "grad_norm": 0.07379446923732758, "learning_rate": 0.01, "loss": 2.0351, "step": 23346 }, { "epoch": 2.3999383287079863, "grad_norm": 0.07604874670505524, "learning_rate": 0.01, "loss": 2.0348, "step": 23349 }, { "epoch": 2.4002466851680544, "grad_norm": 0.05876392871141434, "learning_rate": 0.01, "loss": 2.0442, "step": 23352 }, { "epoch": 2.400555041628122, "grad_norm": 0.06497185677289963, "learning_rate": 0.01, "loss": 2.0289, "step": 23355 }, { "epoch": 2.40086339808819, "grad_norm": 0.059574466198682785, "learning_rate": 0.01, "loss": 2.0056, "step": 23358 }, { "epoch": 2.401171754548258, "grad_norm": 0.03427527844905853, "learning_rate": 0.01, "loss": 2.0115, "step": 23361 }, { "epoch": 2.4014801110083255, "grad_norm": 0.04248304292559624, "learning_rate": 0.01, "loss": 2.022, "step": 23364 }, { "epoch": 2.4017884674683936, "grad_norm": 0.11010278761386871, "learning_rate": 0.01, "loss": 2.0378, "step": 23367 }, { "epoch": 2.4020968239284612, "grad_norm": 0.03992384672164917, "learning_rate": 0.01, "loss": 2.024, "step": 23370 }, { "epoch": 2.402405180388529, "grad_norm": 0.0774572566151619, "learning_rate": 0.01, "loss": 2.047, "step": 23373 }, { "epoch": 2.402713536848597, "grad_norm": 0.0753302052617073, "learning_rate": 0.01, "loss": 2.0182, "step": 23376 }, { "epoch": 2.4030218933086647, "grad_norm": 0.06872344017028809, "learning_rate": 0.01, "loss": 2.041, "step": 23379 }, { "epoch": 2.4033302497687328, "grad_norm": 0.03949934244155884, "learning_rate": 0.01, "loss": 2.0444, "step": 23382 }, { "epoch": 2.4036386062288004, "grad_norm": 0.06230955943465233, "learning_rate": 0.01, "loss": 2.0168, "step": 23385 }, { "epoch": 2.4039469626888685, "grad_norm": 0.0821446031332016, "learning_rate": 0.01, "loss": 2.0091, "step": 23388 }, { "epoch": 2.404255319148936, "grad_norm": 0.0392254963517189, "learning_rate": 0.01, "loss": 2.0083, "step": 23391 }, { "epoch": 2.404563675609004, "grad_norm": 0.04208219051361084, "learning_rate": 0.01, "loss": 2.008, "step": 23394 }, { "epoch": 2.404872032069072, "grad_norm": 0.053683795034885406, "learning_rate": 0.01, "loss": 2.0175, "step": 23397 }, { "epoch": 2.4051803885291396, "grad_norm": 0.0707748755812645, "learning_rate": 0.01, "loss": 2.0447, "step": 23400 }, { "epoch": 2.4054887449892073, "grad_norm": 0.04077059030532837, "learning_rate": 0.01, "loss": 2.0247, "step": 23403 }, { "epoch": 2.4057971014492754, "grad_norm": 0.041255395859479904, "learning_rate": 0.01, "loss": 2.0542, "step": 23406 }, { "epoch": 2.406105457909343, "grad_norm": 0.04560267925262451, "learning_rate": 0.01, "loss": 2.0134, "step": 23409 }, { "epoch": 2.406413814369411, "grad_norm": 0.1152443066239357, "learning_rate": 0.01, "loss": 2.0367, "step": 23412 }, { "epoch": 2.406722170829479, "grad_norm": 0.06491648405790329, "learning_rate": 0.01, "loss": 2.046, "step": 23415 }, { "epoch": 2.407030527289547, "grad_norm": 0.10551664233207703, "learning_rate": 0.01, "loss": 2.0176, "step": 23418 }, { "epoch": 2.4073388837496146, "grad_norm": 0.05565710365772247, "learning_rate": 0.01, "loss": 2.051, "step": 23421 }, { "epoch": 2.4076472402096822, "grad_norm": 0.07831353694200516, "learning_rate": 0.01, "loss": 2.0311, "step": 23424 }, { "epoch": 2.4079555966697503, "grad_norm": 0.04822281748056412, "learning_rate": 0.01, "loss": 2.0086, "step": 23427 }, { "epoch": 2.408263953129818, "grad_norm": 0.10326611250638962, "learning_rate": 0.01, "loss": 2.0079, "step": 23430 }, { "epoch": 2.4085723095898857, "grad_norm": 0.03122270293533802, "learning_rate": 0.01, "loss": 2.0371, "step": 23433 }, { "epoch": 2.4088806660499538, "grad_norm": 0.04365687072277069, "learning_rate": 0.01, "loss": 2.0323, "step": 23436 }, { "epoch": 2.4091890225100214, "grad_norm": 0.055597368627786636, "learning_rate": 0.01, "loss": 2.0345, "step": 23439 }, { "epoch": 2.4094973789700895, "grad_norm": 0.05635388568043709, "learning_rate": 0.01, "loss": 2.0255, "step": 23442 }, { "epoch": 2.409805735430157, "grad_norm": 0.059105440974235535, "learning_rate": 0.01, "loss": 2.0366, "step": 23445 }, { "epoch": 2.4101140918902253, "grad_norm": 0.03262312710285187, "learning_rate": 0.01, "loss": 2.0371, "step": 23448 }, { "epoch": 2.410422448350293, "grad_norm": 0.12139608711004257, "learning_rate": 0.01, "loss": 2.0423, "step": 23451 }, { "epoch": 2.4107308048103606, "grad_norm": 0.0331861712038517, "learning_rate": 0.01, "loss": 2.0242, "step": 23454 }, { "epoch": 2.4110391612704287, "grad_norm": 0.14580830931663513, "learning_rate": 0.01, "loss": 2.0268, "step": 23457 }, { "epoch": 2.4113475177304964, "grad_norm": 0.08361393213272095, "learning_rate": 0.01, "loss": 2.0357, "step": 23460 }, { "epoch": 2.4116558741905645, "grad_norm": 0.04604450613260269, "learning_rate": 0.01, "loss": 2.0218, "step": 23463 }, { "epoch": 2.411964230650632, "grad_norm": 0.0660676434636116, "learning_rate": 0.01, "loss": 2.0051, "step": 23466 }, { "epoch": 2.4122725871107, "grad_norm": 0.03829890862107277, "learning_rate": 0.01, "loss": 2.0037, "step": 23469 }, { "epoch": 2.412580943570768, "grad_norm": 0.03486289829015732, "learning_rate": 0.01, "loss": 2.0157, "step": 23472 }, { "epoch": 2.4128893000308356, "grad_norm": 0.043325070291757584, "learning_rate": 0.01, "loss": 2.0228, "step": 23475 }, { "epoch": 2.4131976564909037, "grad_norm": 0.062313832342624664, "learning_rate": 0.01, "loss": 2.0278, "step": 23478 }, { "epoch": 2.4135060129509713, "grad_norm": 0.07333476096391678, "learning_rate": 0.01, "loss": 2.031, "step": 23481 }, { "epoch": 2.413814369411039, "grad_norm": 0.05855254456400871, "learning_rate": 0.01, "loss": 2.0019, "step": 23484 }, { "epoch": 2.414122725871107, "grad_norm": 0.07818280905485153, "learning_rate": 0.01, "loss": 2.0305, "step": 23487 }, { "epoch": 2.4144310823311748, "grad_norm": 0.17364929616451263, "learning_rate": 0.01, "loss": 2.0358, "step": 23490 }, { "epoch": 2.414739438791243, "grad_norm": 0.04748675599694252, "learning_rate": 0.01, "loss": 2.0441, "step": 23493 }, { "epoch": 2.4150477952513105, "grad_norm": 0.04472067952156067, "learning_rate": 0.01, "loss": 2.001, "step": 23496 }, { "epoch": 2.415356151711378, "grad_norm": 0.04624779149889946, "learning_rate": 0.01, "loss": 2.0606, "step": 23499 }, { "epoch": 2.4156645081714463, "grad_norm": 0.04642438516020775, "learning_rate": 0.01, "loss": 2.0387, "step": 23502 }, { "epoch": 2.415972864631514, "grad_norm": 0.03872397541999817, "learning_rate": 0.01, "loss": 2.0463, "step": 23505 }, { "epoch": 2.416281221091582, "grad_norm": 0.055659957230091095, "learning_rate": 0.01, "loss": 2.0076, "step": 23508 }, { "epoch": 2.4165895775516497, "grad_norm": 0.05187131464481354, "learning_rate": 0.01, "loss": 2.0252, "step": 23511 }, { "epoch": 2.4168979340117174, "grad_norm": 0.04747389629483223, "learning_rate": 0.01, "loss": 1.9933, "step": 23514 }, { "epoch": 2.4172062904717855, "grad_norm": 0.04594513028860092, "learning_rate": 0.01, "loss": 2.0489, "step": 23517 }, { "epoch": 2.417514646931853, "grad_norm": 0.08976871520280838, "learning_rate": 0.01, "loss": 2.0728, "step": 23520 }, { "epoch": 2.4178230033919212, "grad_norm": 0.09563881158828735, "learning_rate": 0.01, "loss": 2.0399, "step": 23523 }, { "epoch": 2.418131359851989, "grad_norm": 0.05488200485706329, "learning_rate": 0.01, "loss": 2.0314, "step": 23526 }, { "epoch": 2.4184397163120566, "grad_norm": 0.10557366907596588, "learning_rate": 0.01, "loss": 2.0141, "step": 23529 }, { "epoch": 2.4187480727721247, "grad_norm": 0.06399507075548172, "learning_rate": 0.01, "loss": 2.032, "step": 23532 }, { "epoch": 2.4190564292321923, "grad_norm": 0.06832768023014069, "learning_rate": 0.01, "loss": 2.0445, "step": 23535 }, { "epoch": 2.4193647856922604, "grad_norm": 0.04164440929889679, "learning_rate": 0.01, "loss": 2.046, "step": 23538 }, { "epoch": 2.419673142152328, "grad_norm": 0.03020538203418255, "learning_rate": 0.01, "loss": 2.0368, "step": 23541 }, { "epoch": 2.4199814986123958, "grad_norm": 0.05985027551651001, "learning_rate": 0.01, "loss": 2.0166, "step": 23544 }, { "epoch": 2.420289855072464, "grad_norm": 0.08126893639564514, "learning_rate": 0.01, "loss": 2.0334, "step": 23547 }, { "epoch": 2.4205982115325315, "grad_norm": 0.03897944465279579, "learning_rate": 0.01, "loss": 2.0475, "step": 23550 }, { "epoch": 2.4209065679925996, "grad_norm": 0.05052472651004791, "learning_rate": 0.01, "loss": 2.0536, "step": 23553 }, { "epoch": 2.4212149244526673, "grad_norm": 0.03434956073760986, "learning_rate": 0.01, "loss": 2.0379, "step": 23556 }, { "epoch": 2.421523280912735, "grad_norm": 0.0911344438791275, "learning_rate": 0.01, "loss": 2.0412, "step": 23559 }, { "epoch": 2.421831637372803, "grad_norm": 0.11097840219736099, "learning_rate": 0.01, "loss": 1.9977, "step": 23562 }, { "epoch": 2.4221399938328707, "grad_norm": 0.09742028266191483, "learning_rate": 0.01, "loss": 2.0188, "step": 23565 }, { "epoch": 2.422448350292939, "grad_norm": 0.04184804484248161, "learning_rate": 0.01, "loss": 2.0143, "step": 23568 }, { "epoch": 2.4227567067530065, "grad_norm": 0.04188203811645508, "learning_rate": 0.01, "loss": 2.0039, "step": 23571 }, { "epoch": 2.423065063213074, "grad_norm": 0.05326924845576286, "learning_rate": 0.01, "loss": 2.0269, "step": 23574 }, { "epoch": 2.4233734196731422, "grad_norm": 0.04598800092935562, "learning_rate": 0.01, "loss": 2.0272, "step": 23577 }, { "epoch": 2.42368177613321, "grad_norm": 0.1110750213265419, "learning_rate": 0.01, "loss": 2.0178, "step": 23580 }, { "epoch": 2.423990132593278, "grad_norm": 0.0473935566842556, "learning_rate": 0.01, "loss": 2.0289, "step": 23583 }, { "epoch": 2.4242984890533457, "grad_norm": 0.08599594235420227, "learning_rate": 0.01, "loss": 2.0039, "step": 23586 }, { "epoch": 2.4246068455134133, "grad_norm": 0.03763740509748459, "learning_rate": 0.01, "loss": 2.0332, "step": 23589 }, { "epoch": 2.4249152019734814, "grad_norm": 0.056663963943719864, "learning_rate": 0.01, "loss": 2.0284, "step": 23592 }, { "epoch": 2.425223558433549, "grad_norm": 0.09664086252450943, "learning_rate": 0.01, "loss": 2.0424, "step": 23595 }, { "epoch": 2.425531914893617, "grad_norm": 0.047283969819545746, "learning_rate": 0.01, "loss": 2.0179, "step": 23598 }, { "epoch": 2.425840271353685, "grad_norm": 0.03726861625909805, "learning_rate": 0.01, "loss": 2.0154, "step": 23601 }, { "epoch": 2.4261486278137525, "grad_norm": 0.0514906644821167, "learning_rate": 0.01, "loss": 2.0229, "step": 23604 }, { "epoch": 2.4264569842738206, "grad_norm": 0.11972562968730927, "learning_rate": 0.01, "loss": 2.07, "step": 23607 }, { "epoch": 2.4267653407338883, "grad_norm": 0.17023397982120514, "learning_rate": 0.01, "loss": 2.0478, "step": 23610 }, { "epoch": 2.4270736971939564, "grad_norm": 0.1355310082435608, "learning_rate": 0.01, "loss": 2.0356, "step": 23613 }, { "epoch": 2.427382053654024, "grad_norm": 0.039905257523059845, "learning_rate": 0.01, "loss": 2.0305, "step": 23616 }, { "epoch": 2.4276904101140917, "grad_norm": 0.0339365154504776, "learning_rate": 0.01, "loss": 2.0328, "step": 23619 }, { "epoch": 2.42799876657416, "grad_norm": 0.03365493193268776, "learning_rate": 0.01, "loss": 2.0267, "step": 23622 }, { "epoch": 2.4283071230342275, "grad_norm": 0.041596464812755585, "learning_rate": 0.01, "loss": 2.0078, "step": 23625 }, { "epoch": 2.4286154794942956, "grad_norm": 0.0537833645939827, "learning_rate": 0.01, "loss": 2.0444, "step": 23628 }, { "epoch": 2.4289238359543632, "grad_norm": 0.049658019095659256, "learning_rate": 0.01, "loss": 2.0165, "step": 23631 }, { "epoch": 2.429232192414431, "grad_norm": 0.03983505442738533, "learning_rate": 0.01, "loss": 2.0223, "step": 23634 }, { "epoch": 2.429540548874499, "grad_norm": 0.0780608057975769, "learning_rate": 0.01, "loss": 2.0011, "step": 23637 }, { "epoch": 2.4298489053345667, "grad_norm": 0.13898798823356628, "learning_rate": 0.01, "loss": 2.0424, "step": 23640 }, { "epoch": 2.4301572617946348, "grad_norm": 0.07781122624874115, "learning_rate": 0.01, "loss": 2.0499, "step": 23643 }, { "epoch": 2.4304656182547024, "grad_norm": 0.045428015291690826, "learning_rate": 0.01, "loss": 2.0188, "step": 23646 }, { "epoch": 2.43077397471477, "grad_norm": 0.03437395393848419, "learning_rate": 0.01, "loss": 2.038, "step": 23649 }, { "epoch": 2.431082331174838, "grad_norm": 0.06030704453587532, "learning_rate": 0.01, "loss": 2.0563, "step": 23652 }, { "epoch": 2.431390687634906, "grad_norm": 0.046194083988666534, "learning_rate": 0.01, "loss": 2.0324, "step": 23655 }, { "epoch": 2.431699044094974, "grad_norm": 0.10041481256484985, "learning_rate": 0.01, "loss": 2.0184, "step": 23658 }, { "epoch": 2.4320074005550416, "grad_norm": 0.07109228521585464, "learning_rate": 0.01, "loss": 2.002, "step": 23661 }, { "epoch": 2.4323157570151093, "grad_norm": 0.03319769352674484, "learning_rate": 0.01, "loss": 2.0172, "step": 23664 }, { "epoch": 2.4326241134751774, "grad_norm": 0.047379735857248306, "learning_rate": 0.01, "loss": 2.0495, "step": 23667 }, { "epoch": 2.432932469935245, "grad_norm": 0.04836783930659294, "learning_rate": 0.01, "loss": 2.045, "step": 23670 }, { "epoch": 2.433240826395313, "grad_norm": 0.07264747470617294, "learning_rate": 0.01, "loss": 2.0147, "step": 23673 }, { "epoch": 2.433549182855381, "grad_norm": 0.09331446141004562, "learning_rate": 0.01, "loss": 2.0259, "step": 23676 }, { "epoch": 2.433857539315449, "grad_norm": 0.07618245482444763, "learning_rate": 0.01, "loss": 2.0188, "step": 23679 }, { "epoch": 2.4341658957755166, "grad_norm": 0.13270626962184906, "learning_rate": 0.01, "loss": 2.05, "step": 23682 }, { "epoch": 2.4344742522355842, "grad_norm": 0.06691613793373108, "learning_rate": 0.01, "loss": 2.0353, "step": 23685 }, { "epoch": 2.4347826086956523, "grad_norm": 0.04376102611422539, "learning_rate": 0.01, "loss": 2.0213, "step": 23688 }, { "epoch": 2.43509096515572, "grad_norm": 0.04829081892967224, "learning_rate": 0.01, "loss": 2.0337, "step": 23691 }, { "epoch": 2.4353993216157876, "grad_norm": 0.04604188725352287, "learning_rate": 0.01, "loss": 2.0297, "step": 23694 }, { "epoch": 2.4357076780758558, "grad_norm": 0.06296957284212112, "learning_rate": 0.01, "loss": 2.0044, "step": 23697 }, { "epoch": 2.4360160345359234, "grad_norm": 0.07184949517250061, "learning_rate": 0.01, "loss": 2.0204, "step": 23700 }, { "epoch": 2.4363243909959915, "grad_norm": 0.11544491350650787, "learning_rate": 0.01, "loss": 2.0512, "step": 23703 }, { "epoch": 2.436632747456059, "grad_norm": 0.03796609491109848, "learning_rate": 0.01, "loss": 2.0154, "step": 23706 }, { "epoch": 2.4369411039161273, "grad_norm": 0.09068101644515991, "learning_rate": 0.01, "loss": 2.0402, "step": 23709 }, { "epoch": 2.437249460376195, "grad_norm": 0.06220867484807968, "learning_rate": 0.01, "loss": 2.0289, "step": 23712 }, { "epoch": 2.4375578168362626, "grad_norm": 0.040711212903261185, "learning_rate": 0.01, "loss": 2.0365, "step": 23715 }, { "epoch": 2.4378661732963307, "grad_norm": 0.03610675781965256, "learning_rate": 0.01, "loss": 2.023, "step": 23718 }, { "epoch": 2.4381745297563984, "grad_norm": 0.06709878146648407, "learning_rate": 0.01, "loss": 2.0125, "step": 23721 }, { "epoch": 2.438482886216466, "grad_norm": 0.10784180462360382, "learning_rate": 0.01, "loss": 2.0328, "step": 23724 }, { "epoch": 2.438791242676534, "grad_norm": 0.0911094918847084, "learning_rate": 0.01, "loss": 2.0594, "step": 23727 }, { "epoch": 2.439099599136602, "grad_norm": 0.059867773205041885, "learning_rate": 0.01, "loss": 2.0244, "step": 23730 }, { "epoch": 2.43940795559667, "grad_norm": 0.06214331462979317, "learning_rate": 0.01, "loss": 2.0698, "step": 23733 }, { "epoch": 2.4397163120567376, "grad_norm": 0.058527860790491104, "learning_rate": 0.01, "loss": 2.0562, "step": 23736 }, { "epoch": 2.4400246685168057, "grad_norm": 0.09416967630386353, "learning_rate": 0.01, "loss": 2.0385, "step": 23739 }, { "epoch": 2.4403330249768733, "grad_norm": 0.11225190758705139, "learning_rate": 0.01, "loss": 2.0534, "step": 23742 }, { "epoch": 2.440641381436941, "grad_norm": 0.056495241820812225, "learning_rate": 0.01, "loss": 2.038, "step": 23745 }, { "epoch": 2.440949737897009, "grad_norm": 0.05432302877306938, "learning_rate": 0.01, "loss": 2.01, "step": 23748 }, { "epoch": 2.4412580943570767, "grad_norm": 0.04850140959024429, "learning_rate": 0.01, "loss": 2.0303, "step": 23751 }, { "epoch": 2.4415664508171444, "grad_norm": 0.05038965865969658, "learning_rate": 0.01, "loss": 2.0226, "step": 23754 }, { "epoch": 2.4418748072772125, "grad_norm": 0.050226423889398575, "learning_rate": 0.01, "loss": 2.0238, "step": 23757 }, { "epoch": 2.44218316373728, "grad_norm": 0.05979544296860695, "learning_rate": 0.01, "loss": 2.0378, "step": 23760 }, { "epoch": 2.4424915201973483, "grad_norm": 0.04254556819796562, "learning_rate": 0.01, "loss": 2.0207, "step": 23763 }, { "epoch": 2.442799876657416, "grad_norm": 0.1078273206949234, "learning_rate": 0.01, "loss": 2.0342, "step": 23766 }, { "epoch": 2.443108233117484, "grad_norm": 0.0591372586786747, "learning_rate": 0.01, "loss": 2.0186, "step": 23769 }, { "epoch": 2.4434165895775517, "grad_norm": 0.05430880934000015, "learning_rate": 0.01, "loss": 2.0232, "step": 23772 }, { "epoch": 2.4437249460376194, "grad_norm": 0.07246505469083786, "learning_rate": 0.01, "loss": 2.0372, "step": 23775 }, { "epoch": 2.4440333024976875, "grad_norm": 0.04457786679267883, "learning_rate": 0.01, "loss": 2.0157, "step": 23778 }, { "epoch": 2.444341658957755, "grad_norm": 0.03915979340672493, "learning_rate": 0.01, "loss": 2.0173, "step": 23781 }, { "epoch": 2.444650015417823, "grad_norm": 0.08284246176481247, "learning_rate": 0.01, "loss": 2.0433, "step": 23784 }, { "epoch": 2.444958371877891, "grad_norm": 0.08254294097423553, "learning_rate": 0.01, "loss": 2.012, "step": 23787 }, { "epoch": 2.4452667283379586, "grad_norm": 0.12263736873865128, "learning_rate": 0.01, "loss": 2.0491, "step": 23790 }, { "epoch": 2.4455750847980267, "grad_norm": 0.08192913979291916, "learning_rate": 0.01, "loss": 2.047, "step": 23793 }, { "epoch": 2.4458834412580943, "grad_norm": 0.09507100284099579, "learning_rate": 0.01, "loss": 2.0357, "step": 23796 }, { "epoch": 2.4461917977181624, "grad_norm": 0.10554829984903336, "learning_rate": 0.01, "loss": 2.024, "step": 23799 }, { "epoch": 2.44650015417823, "grad_norm": 0.051542408764362335, "learning_rate": 0.01, "loss": 2.0304, "step": 23802 }, { "epoch": 2.4468085106382977, "grad_norm": 0.05467440187931061, "learning_rate": 0.01, "loss": 2.043, "step": 23805 }, { "epoch": 2.447116867098366, "grad_norm": 0.033462487161159515, "learning_rate": 0.01, "loss": 2.0267, "step": 23808 }, { "epoch": 2.4474252235584335, "grad_norm": 0.04688438028097153, "learning_rate": 0.01, "loss": 2.032, "step": 23811 }, { "epoch": 2.447733580018501, "grad_norm": 0.04699881002306938, "learning_rate": 0.01, "loss": 2.0392, "step": 23814 }, { "epoch": 2.4480419364785693, "grad_norm": 0.03932753950357437, "learning_rate": 0.01, "loss": 2.0193, "step": 23817 }, { "epoch": 2.448350292938637, "grad_norm": 0.11096165329217911, "learning_rate": 0.01, "loss": 2.0124, "step": 23820 }, { "epoch": 2.448658649398705, "grad_norm": 0.03774998337030411, "learning_rate": 0.01, "loss": 2.0192, "step": 23823 }, { "epoch": 2.4489670058587727, "grad_norm": 0.03317665681242943, "learning_rate": 0.01, "loss": 2.0218, "step": 23826 }, { "epoch": 2.449275362318841, "grad_norm": 0.06077956408262253, "learning_rate": 0.01, "loss": 2.0397, "step": 23829 }, { "epoch": 2.4495837187789085, "grad_norm": 0.08997214585542679, "learning_rate": 0.01, "loss": 2.0451, "step": 23832 }, { "epoch": 2.449892075238976, "grad_norm": 0.12234194576740265, "learning_rate": 0.01, "loss": 2.0144, "step": 23835 }, { "epoch": 2.4502004316990442, "grad_norm": 0.13833922147750854, "learning_rate": 0.01, "loss": 2.0348, "step": 23838 }, { "epoch": 2.450508788159112, "grad_norm": 0.06707292795181274, "learning_rate": 0.01, "loss": 2.0276, "step": 23841 }, { "epoch": 2.4508171446191795, "grad_norm": 0.04679076373577118, "learning_rate": 0.01, "loss": 2.0624, "step": 23844 }, { "epoch": 2.4511255010792476, "grad_norm": 0.08921289443969727, "learning_rate": 0.01, "loss": 2.0188, "step": 23847 }, { "epoch": 2.4514338575393153, "grad_norm": 0.056680746376514435, "learning_rate": 0.01, "loss": 2.0299, "step": 23850 }, { "epoch": 2.4517422139993834, "grad_norm": 0.07349438965320587, "learning_rate": 0.01, "loss": 2.0249, "step": 23853 }, { "epoch": 2.452050570459451, "grad_norm": 0.057414181530475616, "learning_rate": 0.01, "loss": 2.0054, "step": 23856 }, { "epoch": 2.452358926919519, "grad_norm": 0.09144090861082077, "learning_rate": 0.01, "loss": 2.0264, "step": 23859 }, { "epoch": 2.452667283379587, "grad_norm": 0.05499831214547157, "learning_rate": 0.01, "loss": 2.035, "step": 23862 }, { "epoch": 2.4529756398396545, "grad_norm": 0.03803296014666557, "learning_rate": 0.01, "loss": 1.9935, "step": 23865 }, { "epoch": 2.4532839962997226, "grad_norm": 0.11475666612386703, "learning_rate": 0.01, "loss": 2.0491, "step": 23868 }, { "epoch": 2.4535923527597903, "grad_norm": 0.13063554465770721, "learning_rate": 0.01, "loss": 1.9912, "step": 23871 }, { "epoch": 2.453900709219858, "grad_norm": 0.04982873052358627, "learning_rate": 0.01, "loss": 2.0718, "step": 23874 }, { "epoch": 2.454209065679926, "grad_norm": 0.04090685769915581, "learning_rate": 0.01, "loss": 2.0095, "step": 23877 }, { "epoch": 2.4545174221399937, "grad_norm": 0.044233810156583786, "learning_rate": 0.01, "loss": 2.0235, "step": 23880 }, { "epoch": 2.454825778600062, "grad_norm": 0.045451819896698, "learning_rate": 0.01, "loss": 2.0135, "step": 23883 }, { "epoch": 2.4551341350601295, "grad_norm": 0.04413032531738281, "learning_rate": 0.01, "loss": 2.0371, "step": 23886 }, { "epoch": 2.4554424915201976, "grad_norm": 0.07937499135732651, "learning_rate": 0.01, "loss": 2.0375, "step": 23889 }, { "epoch": 2.455750847980265, "grad_norm": 0.09389673918485641, "learning_rate": 0.01, "loss": 2.0265, "step": 23892 }, { "epoch": 2.456059204440333, "grad_norm": 0.061937954276800156, "learning_rate": 0.01, "loss": 2.0335, "step": 23895 }, { "epoch": 2.456367560900401, "grad_norm": 0.038002993911504745, "learning_rate": 0.01, "loss": 1.9868, "step": 23898 }, { "epoch": 2.4566759173604686, "grad_norm": 0.05679142847657204, "learning_rate": 0.01, "loss": 2.0346, "step": 23901 }, { "epoch": 2.4569842738205363, "grad_norm": 0.07343320548534393, "learning_rate": 0.01, "loss": 2.0105, "step": 23904 }, { "epoch": 2.4572926302806044, "grad_norm": 0.04992273077368736, "learning_rate": 0.01, "loss": 2.0327, "step": 23907 }, { "epoch": 2.457600986740672, "grad_norm": 0.07384973764419556, "learning_rate": 0.01, "loss": 2.0337, "step": 23910 }, { "epoch": 2.45790934320074, "grad_norm": 0.05728161707520485, "learning_rate": 0.01, "loss": 2.0036, "step": 23913 }, { "epoch": 2.458217699660808, "grad_norm": 0.06384929269552231, "learning_rate": 0.01, "loss": 2.0471, "step": 23916 }, { "epoch": 2.458526056120876, "grad_norm": 0.07515307515859604, "learning_rate": 0.01, "loss": 2.0485, "step": 23919 }, { "epoch": 2.4588344125809436, "grad_norm": 0.09443585574626923, "learning_rate": 0.01, "loss": 2.0183, "step": 23922 }, { "epoch": 2.4591427690410113, "grad_norm": 0.059930965304374695, "learning_rate": 0.01, "loss": 1.9955, "step": 23925 }, { "epoch": 2.4594511255010794, "grad_norm": 0.09930091351270676, "learning_rate": 0.01, "loss": 2.0269, "step": 23928 }, { "epoch": 2.459759481961147, "grad_norm": 0.03487955033779144, "learning_rate": 0.01, "loss": 2.003, "step": 23931 }, { "epoch": 2.4600678384212147, "grad_norm": 0.08312834799289703, "learning_rate": 0.01, "loss": 2.0118, "step": 23934 }, { "epoch": 2.460376194881283, "grad_norm": 0.06459874659776688, "learning_rate": 0.01, "loss": 2.0394, "step": 23937 }, { "epoch": 2.4606845513413504, "grad_norm": 0.10331536084413528, "learning_rate": 0.01, "loss": 2.0428, "step": 23940 }, { "epoch": 2.4609929078014185, "grad_norm": 0.06738618016242981, "learning_rate": 0.01, "loss": 2.0409, "step": 23943 }, { "epoch": 2.461301264261486, "grad_norm": 0.05211193859577179, "learning_rate": 0.01, "loss": 2.0296, "step": 23946 }, { "epoch": 2.4616096207215543, "grad_norm": 0.05060914158821106, "learning_rate": 0.01, "loss": 2.0518, "step": 23949 }, { "epoch": 2.461917977181622, "grad_norm": 0.037570733577013016, "learning_rate": 0.01, "loss": 1.9946, "step": 23952 }, { "epoch": 2.4622263336416896, "grad_norm": 0.06046308949589729, "learning_rate": 0.01, "loss": 2.0154, "step": 23955 }, { "epoch": 2.4625346901017577, "grad_norm": 0.08765476942062378, "learning_rate": 0.01, "loss": 2.0433, "step": 23958 }, { "epoch": 2.4628430465618254, "grad_norm": 0.07397017627954483, "learning_rate": 0.01, "loss": 2.0187, "step": 23961 }, { "epoch": 2.463151403021893, "grad_norm": 0.10959914326667786, "learning_rate": 0.01, "loss": 2.0076, "step": 23964 }, { "epoch": 2.463459759481961, "grad_norm": 0.04373926669359207, "learning_rate": 0.01, "loss": 2.0219, "step": 23967 }, { "epoch": 2.463768115942029, "grad_norm": 0.049903422594070435, "learning_rate": 0.01, "loss": 2.0046, "step": 23970 }, { "epoch": 2.464076472402097, "grad_norm": 0.037193864583969116, "learning_rate": 0.01, "loss": 2.012, "step": 23973 }, { "epoch": 2.4643848288621646, "grad_norm": 0.06762266159057617, "learning_rate": 0.01, "loss": 2.0179, "step": 23976 }, { "epoch": 2.4646931853222327, "grad_norm": 0.03918517008423805, "learning_rate": 0.01, "loss": 2.002, "step": 23979 }, { "epoch": 2.4650015417823004, "grad_norm": 0.09819602221250534, "learning_rate": 0.01, "loss": 2.0208, "step": 23982 }, { "epoch": 2.465309898242368, "grad_norm": 0.12719838321208954, "learning_rate": 0.01, "loss": 2.0155, "step": 23985 }, { "epoch": 2.465618254702436, "grad_norm": 0.05890420451760292, "learning_rate": 0.01, "loss": 2.0459, "step": 23988 }, { "epoch": 2.465926611162504, "grad_norm": 0.05101997032761574, "learning_rate": 0.01, "loss": 2.0266, "step": 23991 }, { "epoch": 2.466234967622572, "grad_norm": 0.04597810283303261, "learning_rate": 0.01, "loss": 2.0348, "step": 23994 }, { "epoch": 2.4665433240826395, "grad_norm": 0.0504877045750618, "learning_rate": 0.01, "loss": 2.0267, "step": 23997 }, { "epoch": 2.466851680542707, "grad_norm": 0.07908850163221359, "learning_rate": 0.01, "loss": 2.0215, "step": 24000 }, { "epoch": 2.4671600370027753, "grad_norm": 0.07396575808525085, "learning_rate": 0.01, "loss": 2.0287, "step": 24003 }, { "epoch": 2.467468393462843, "grad_norm": 0.06695767492055893, "learning_rate": 0.01, "loss": 2.0061, "step": 24006 }, { "epoch": 2.467776749922911, "grad_norm": 0.0907633900642395, "learning_rate": 0.01, "loss": 2.0261, "step": 24009 }, { "epoch": 2.4680851063829787, "grad_norm": 0.07585210353136063, "learning_rate": 0.01, "loss": 2.041, "step": 24012 }, { "epoch": 2.4683934628430464, "grad_norm": 0.049894414842128754, "learning_rate": 0.01, "loss": 2.0352, "step": 24015 }, { "epoch": 2.4687018193031145, "grad_norm": 0.038147564977407455, "learning_rate": 0.01, "loss": 2.043, "step": 24018 }, { "epoch": 2.469010175763182, "grad_norm": 0.03651060909032822, "learning_rate": 0.01, "loss": 2.0208, "step": 24021 }, { "epoch": 2.4693185322232503, "grad_norm": 0.09396708011627197, "learning_rate": 0.01, "loss": 2.0419, "step": 24024 }, { "epoch": 2.469626888683318, "grad_norm": 0.03474831208586693, "learning_rate": 0.01, "loss": 2.0217, "step": 24027 }, { "epoch": 2.4699352451433856, "grad_norm": 0.09970984607934952, "learning_rate": 0.01, "loss": 2.0348, "step": 24030 }, { "epoch": 2.4702436016034537, "grad_norm": 0.10633893311023712, "learning_rate": 0.01, "loss": 2.019, "step": 24033 }, { "epoch": 2.4705519580635213, "grad_norm": 0.0787937119603157, "learning_rate": 0.01, "loss": 2.0109, "step": 24036 }, { "epoch": 2.4708603145235895, "grad_norm": 0.05214501917362213, "learning_rate": 0.01, "loss": 2.0023, "step": 24039 }, { "epoch": 2.471168670983657, "grad_norm": 0.02967134490609169, "learning_rate": 0.01, "loss": 2.0043, "step": 24042 }, { "epoch": 2.4714770274437248, "grad_norm": 0.03181852400302887, "learning_rate": 0.01, "loss": 2.0164, "step": 24045 }, { "epoch": 2.471785383903793, "grad_norm": 0.03905104100704193, "learning_rate": 0.01, "loss": 2.0194, "step": 24048 }, { "epoch": 2.4720937403638605, "grad_norm": 0.051532018929719925, "learning_rate": 0.01, "loss": 2.0045, "step": 24051 }, { "epoch": 2.4724020968239286, "grad_norm": 0.13418106734752655, "learning_rate": 0.01, "loss": 2.0486, "step": 24054 }, { "epoch": 2.4727104532839963, "grad_norm": 0.0745147094130516, "learning_rate": 0.01, "loss": 2.0448, "step": 24057 }, { "epoch": 2.473018809744064, "grad_norm": 0.06666639447212219, "learning_rate": 0.01, "loss": 2.0274, "step": 24060 }, { "epoch": 2.473327166204132, "grad_norm": 0.08356054127216339, "learning_rate": 0.01, "loss": 2.028, "step": 24063 }, { "epoch": 2.4736355226641997, "grad_norm": 0.04733874648809433, "learning_rate": 0.01, "loss": 2.0294, "step": 24066 }, { "epoch": 2.473943879124268, "grad_norm": 0.07551899552345276, "learning_rate": 0.01, "loss": 2.0389, "step": 24069 }, { "epoch": 2.4742522355843355, "grad_norm": 0.03114013373851776, "learning_rate": 0.01, "loss": 2.0454, "step": 24072 }, { "epoch": 2.474560592044403, "grad_norm": 0.062112826853990555, "learning_rate": 0.01, "loss": 2.048, "step": 24075 }, { "epoch": 2.4748689485044713, "grad_norm": 0.0542120561003685, "learning_rate": 0.01, "loss": 2.0198, "step": 24078 }, { "epoch": 2.475177304964539, "grad_norm": 0.06153399124741554, "learning_rate": 0.01, "loss": 2.0264, "step": 24081 }, { "epoch": 2.475485661424607, "grad_norm": 0.06306985020637512, "learning_rate": 0.01, "loss": 2.0425, "step": 24084 }, { "epoch": 2.4757940178846747, "grad_norm": 0.03326687961816788, "learning_rate": 0.01, "loss": 2.0211, "step": 24087 }, { "epoch": 2.4761023743447423, "grad_norm": 0.10279777646064758, "learning_rate": 0.01, "loss": 2.0483, "step": 24090 }, { "epoch": 2.4764107308048104, "grad_norm": 0.045618560165166855, "learning_rate": 0.01, "loss": 2.0476, "step": 24093 }, { "epoch": 2.476719087264878, "grad_norm": 0.05065792426466942, "learning_rate": 0.01, "loss": 2.0261, "step": 24096 }, { "epoch": 2.477027443724946, "grad_norm": 0.04114675521850586, "learning_rate": 0.01, "loss": 2.0324, "step": 24099 }, { "epoch": 2.477335800185014, "grad_norm": 0.11044265329837799, "learning_rate": 0.01, "loss": 2.0379, "step": 24102 }, { "epoch": 2.4776441566450815, "grad_norm": 0.053481362760066986, "learning_rate": 0.01, "loss": 2.0408, "step": 24105 }, { "epoch": 2.4779525131051496, "grad_norm": 0.14947016537189484, "learning_rate": 0.01, "loss": 2.0511, "step": 24108 }, { "epoch": 2.4782608695652173, "grad_norm": 0.08009488135576248, "learning_rate": 0.01, "loss": 2.0383, "step": 24111 }, { "epoch": 2.4785692260252854, "grad_norm": 0.06397935748100281, "learning_rate": 0.01, "loss": 2.0445, "step": 24114 }, { "epoch": 2.478877582485353, "grad_norm": 0.0409528985619545, "learning_rate": 0.01, "loss": 2.0386, "step": 24117 }, { "epoch": 2.4791859389454207, "grad_norm": 0.11498477309942245, "learning_rate": 0.01, "loss": 1.9942, "step": 24120 }, { "epoch": 2.479494295405489, "grad_norm": 0.02937469258904457, "learning_rate": 0.01, "loss": 2.0329, "step": 24123 }, { "epoch": 2.4798026518655565, "grad_norm": 0.03659180551767349, "learning_rate": 0.01, "loss": 2.0638, "step": 24126 }, { "epoch": 2.4801110083256246, "grad_norm": 0.10866084694862366, "learning_rate": 0.01, "loss": 2.0319, "step": 24129 }, { "epoch": 2.4804193647856922, "grad_norm": 0.07002250105142593, "learning_rate": 0.01, "loss": 2.03, "step": 24132 }, { "epoch": 2.48072772124576, "grad_norm": 0.07129926234483719, "learning_rate": 0.01, "loss": 2.0407, "step": 24135 }, { "epoch": 2.481036077705828, "grad_norm": 0.061107151210308075, "learning_rate": 0.01, "loss": 2.0288, "step": 24138 }, { "epoch": 2.4813444341658957, "grad_norm": 0.04937992990016937, "learning_rate": 0.01, "loss": 2.0492, "step": 24141 }, { "epoch": 2.481652790625964, "grad_norm": 0.04651058465242386, "learning_rate": 0.01, "loss": 2.0474, "step": 24144 }, { "epoch": 2.4819611470860314, "grad_norm": 0.06658688187599182, "learning_rate": 0.01, "loss": 2.0429, "step": 24147 }, { "epoch": 2.482269503546099, "grad_norm": 0.05241367965936661, "learning_rate": 0.01, "loss": 2.0193, "step": 24150 }, { "epoch": 2.482577860006167, "grad_norm": 0.03564739227294922, "learning_rate": 0.01, "loss": 2.0092, "step": 24153 }, { "epoch": 2.482886216466235, "grad_norm": 0.03984629735350609, "learning_rate": 0.01, "loss": 2.0302, "step": 24156 }, { "epoch": 2.483194572926303, "grad_norm": 0.10245262831449509, "learning_rate": 0.01, "loss": 2.0327, "step": 24159 }, { "epoch": 2.4835029293863706, "grad_norm": 0.0727042481303215, "learning_rate": 0.01, "loss": 2.0295, "step": 24162 }, { "epoch": 2.4838112858464383, "grad_norm": 0.041190728545188904, "learning_rate": 0.01, "loss": 1.9987, "step": 24165 }, { "epoch": 2.4841196423065064, "grad_norm": 0.04986109584569931, "learning_rate": 0.01, "loss": 2.0227, "step": 24168 }, { "epoch": 2.484427998766574, "grad_norm": 0.0689210444688797, "learning_rate": 0.01, "loss": 2.0262, "step": 24171 }, { "epoch": 2.484736355226642, "grad_norm": 0.043997250497341156, "learning_rate": 0.01, "loss": 2.007, "step": 24174 }, { "epoch": 2.48504471168671, "grad_norm": 0.06231982633471489, "learning_rate": 0.01, "loss": 2.0095, "step": 24177 }, { "epoch": 2.485353068146778, "grad_norm": 0.09958053380250931, "learning_rate": 0.01, "loss": 2.022, "step": 24180 }, { "epoch": 2.4856614246068456, "grad_norm": 0.05297970771789551, "learning_rate": 0.01, "loss": 2.0416, "step": 24183 }, { "epoch": 2.4859697810669132, "grad_norm": 0.07420172542333603, "learning_rate": 0.01, "loss": 1.988, "step": 24186 }, { "epoch": 2.4862781375269813, "grad_norm": 0.049017585813999176, "learning_rate": 0.01, "loss": 2.0195, "step": 24189 }, { "epoch": 2.486586493987049, "grad_norm": 0.05398377403616905, "learning_rate": 0.01, "loss": 2.0161, "step": 24192 }, { "epoch": 2.4868948504471167, "grad_norm": 0.03338189795613289, "learning_rate": 0.01, "loss": 2.0073, "step": 24195 }, { "epoch": 2.4872032069071848, "grad_norm": 0.1013825535774231, "learning_rate": 0.01, "loss": 2.0372, "step": 24198 }, { "epoch": 2.4875115633672524, "grad_norm": 0.06101495400071144, "learning_rate": 0.01, "loss": 2.0313, "step": 24201 }, { "epoch": 2.4878199198273205, "grad_norm": 0.06915189325809479, "learning_rate": 0.01, "loss": 1.9753, "step": 24204 }, { "epoch": 2.488128276287388, "grad_norm": 0.09961054474115372, "learning_rate": 0.01, "loss": 2.0183, "step": 24207 }, { "epoch": 2.4884366327474563, "grad_norm": 0.039923045784235, "learning_rate": 0.01, "loss": 2.0256, "step": 24210 }, { "epoch": 2.488744989207524, "grad_norm": 0.07982566952705383, "learning_rate": 0.01, "loss": 2.0225, "step": 24213 }, { "epoch": 2.4890533456675916, "grad_norm": 0.06360599398612976, "learning_rate": 0.01, "loss": 2.0126, "step": 24216 }, { "epoch": 2.4893617021276597, "grad_norm": 0.06489767879247665, "learning_rate": 0.01, "loss": 2.0615, "step": 24219 }, { "epoch": 2.4896700585877274, "grad_norm": 0.08873300999403, "learning_rate": 0.01, "loss": 2.0144, "step": 24222 }, { "epoch": 2.489978415047795, "grad_norm": 0.04309386387467384, "learning_rate": 0.01, "loss": 2.0478, "step": 24225 }, { "epoch": 2.490286771507863, "grad_norm": 0.042991675436496735, "learning_rate": 0.01, "loss": 2.0046, "step": 24228 }, { "epoch": 2.490595127967931, "grad_norm": 0.04507692903280258, "learning_rate": 0.01, "loss": 2.0417, "step": 24231 }, { "epoch": 2.490903484427999, "grad_norm": 0.09288784116506577, "learning_rate": 0.01, "loss": 2.0181, "step": 24234 }, { "epoch": 2.4912118408880666, "grad_norm": 0.040776100009679794, "learning_rate": 0.01, "loss": 2.0524, "step": 24237 }, { "epoch": 2.4915201973481347, "grad_norm": 0.09847230464220047, "learning_rate": 0.01, "loss": 2.0298, "step": 24240 }, { "epoch": 2.4918285538082023, "grad_norm": 0.05252716690301895, "learning_rate": 0.01, "loss": 2.0288, "step": 24243 }, { "epoch": 2.49213691026827, "grad_norm": 0.04028663411736488, "learning_rate": 0.01, "loss": 2.0351, "step": 24246 }, { "epoch": 2.492445266728338, "grad_norm": 0.04721330851316452, "learning_rate": 0.01, "loss": 2.045, "step": 24249 }, { "epoch": 2.4927536231884058, "grad_norm": 0.0453517884016037, "learning_rate": 0.01, "loss": 2.0264, "step": 24252 }, { "epoch": 2.4930619796484734, "grad_norm": 0.09120344370603561, "learning_rate": 0.01, "loss": 2.0189, "step": 24255 }, { "epoch": 2.4933703361085415, "grad_norm": 0.1132507398724556, "learning_rate": 0.01, "loss": 2.0109, "step": 24258 }, { "epoch": 2.493678692568609, "grad_norm": 0.07698217034339905, "learning_rate": 0.01, "loss": 2.0391, "step": 24261 }, { "epoch": 2.4939870490286773, "grad_norm": 0.04104442894458771, "learning_rate": 0.01, "loss": 2.0357, "step": 24264 }, { "epoch": 2.494295405488745, "grad_norm": 0.04681272804737091, "learning_rate": 0.01, "loss": 2.0142, "step": 24267 }, { "epoch": 2.494603761948813, "grad_norm": 0.05137484520673752, "learning_rate": 0.01, "loss": 2.0616, "step": 24270 }, { "epoch": 2.4949121184088807, "grad_norm": 0.05654723569750786, "learning_rate": 0.01, "loss": 2.0371, "step": 24273 }, { "epoch": 2.4952204748689484, "grad_norm": 0.052294518798589706, "learning_rate": 0.01, "loss": 2.0163, "step": 24276 }, { "epoch": 2.4955288313290165, "grad_norm": 0.06906304508447647, "learning_rate": 0.01, "loss": 2.0049, "step": 24279 }, { "epoch": 2.495837187789084, "grad_norm": 0.0664992555975914, "learning_rate": 0.01, "loss": 2.037, "step": 24282 }, { "epoch": 2.496145544249152, "grad_norm": 0.06502712517976761, "learning_rate": 0.01, "loss": 2.0439, "step": 24285 }, { "epoch": 2.49645390070922, "grad_norm": 0.047290291637182236, "learning_rate": 0.01, "loss": 2.0267, "step": 24288 }, { "epoch": 2.4967622571692876, "grad_norm": 0.07271420210599899, "learning_rate": 0.01, "loss": 2.0325, "step": 24291 }, { "epoch": 2.4970706136293557, "grad_norm": 0.042307768017053604, "learning_rate": 0.01, "loss": 2.0223, "step": 24294 }, { "epoch": 2.4973789700894233, "grad_norm": 0.04834264889359474, "learning_rate": 0.01, "loss": 2.0278, "step": 24297 }, { "epoch": 2.4976873265494914, "grad_norm": 0.045435305684804916, "learning_rate": 0.01, "loss": 1.9997, "step": 24300 }, { "epoch": 2.497995683009559, "grad_norm": 0.07511632144451141, "learning_rate": 0.01, "loss": 2.025, "step": 24303 }, { "epoch": 2.4983040394696268, "grad_norm": 0.14621250331401825, "learning_rate": 0.01, "loss": 2.0401, "step": 24306 }, { "epoch": 2.498612395929695, "grad_norm": 0.05969877541065216, "learning_rate": 0.01, "loss": 2.0309, "step": 24309 }, { "epoch": 2.4989207523897625, "grad_norm": 0.040014345198869705, "learning_rate": 0.01, "loss": 2.0301, "step": 24312 }, { "epoch": 2.49922910884983, "grad_norm": 0.05185026675462723, "learning_rate": 0.01, "loss": 2.0149, "step": 24315 }, { "epoch": 2.4995374653098983, "grad_norm": 0.04459863528609276, "learning_rate": 0.01, "loss": 2.03, "step": 24318 }, { "epoch": 2.499845821769966, "grad_norm": 0.06208227947354317, "learning_rate": 0.01, "loss": 2.0253, "step": 24321 }, { "epoch": 2.500154178230034, "grad_norm": 0.0788293108344078, "learning_rate": 0.01, "loss": 2.0154, "step": 24324 }, { "epoch": 2.5004625346901017, "grad_norm": 0.0636831745505333, "learning_rate": 0.01, "loss": 2.0522, "step": 24327 }, { "epoch": 2.50077089115017, "grad_norm": 0.05828903615474701, "learning_rate": 0.01, "loss": 2.0093, "step": 24330 }, { "epoch": 2.5010792476102375, "grad_norm": 0.06897569447755814, "learning_rate": 0.01, "loss": 1.9997, "step": 24333 }, { "epoch": 2.501387604070305, "grad_norm": 0.03793172910809517, "learning_rate": 0.01, "loss": 2.0209, "step": 24336 }, { "epoch": 2.5016959605303732, "grad_norm": 0.05384537950158119, "learning_rate": 0.01, "loss": 2.0355, "step": 24339 }, { "epoch": 2.502004316990441, "grad_norm": 0.07979964464902878, "learning_rate": 0.01, "loss": 2.023, "step": 24342 }, { "epoch": 2.5023126734505086, "grad_norm": 0.054392259567976, "learning_rate": 0.01, "loss": 2.0115, "step": 24345 }, { "epoch": 2.5026210299105767, "grad_norm": 0.06897418200969696, "learning_rate": 0.01, "loss": 2.0147, "step": 24348 }, { "epoch": 2.5029293863706443, "grad_norm": 0.0801873579621315, "learning_rate": 0.01, "loss": 2.0397, "step": 24351 }, { "epoch": 2.5032377428307124, "grad_norm": 0.03708551451563835, "learning_rate": 0.01, "loss": 2.0076, "step": 24354 }, { "epoch": 2.50354609929078, "grad_norm": 0.09656143933534622, "learning_rate": 0.01, "loss": 2.0463, "step": 24357 }, { "epoch": 2.503854455750848, "grad_norm": 0.0770551860332489, "learning_rate": 0.01, "loss": 2.0355, "step": 24360 }, { "epoch": 2.504162812210916, "grad_norm": 0.08103878796100616, "learning_rate": 0.01, "loss": 2.0262, "step": 24363 }, { "epoch": 2.5044711686709835, "grad_norm": 0.10928300768136978, "learning_rate": 0.01, "loss": 2.0389, "step": 24366 }, { "epoch": 2.5047795251310516, "grad_norm": 0.07541976869106293, "learning_rate": 0.01, "loss": 2.0242, "step": 24369 }, { "epoch": 2.5050878815911193, "grad_norm": 0.05937611311674118, "learning_rate": 0.01, "loss": 2.0395, "step": 24372 }, { "epoch": 2.505396238051187, "grad_norm": 0.05396249517798424, "learning_rate": 0.01, "loss": 2.0164, "step": 24375 }, { "epoch": 2.505704594511255, "grad_norm": 0.04004419967532158, "learning_rate": 0.01, "loss": 2.0174, "step": 24378 }, { "epoch": 2.5060129509713227, "grad_norm": 0.05242707580327988, "learning_rate": 0.01, "loss": 2.0119, "step": 24381 }, { "epoch": 2.506321307431391, "grad_norm": 0.038752540946006775, "learning_rate": 0.01, "loss": 2.0181, "step": 24384 }, { "epoch": 2.5066296638914585, "grad_norm": 0.07296596467494965, "learning_rate": 0.01, "loss": 2.0027, "step": 24387 }, { "epoch": 2.5069380203515266, "grad_norm": 0.116209976375103, "learning_rate": 0.01, "loss": 2.0205, "step": 24390 }, { "epoch": 2.5072463768115942, "grad_norm": 0.09165041148662567, "learning_rate": 0.01, "loss": 2.0271, "step": 24393 }, { "epoch": 2.507554733271662, "grad_norm": 0.14264173805713654, "learning_rate": 0.01, "loss": 2.0217, "step": 24396 }, { "epoch": 2.50786308973173, "grad_norm": 0.045917656272649765, "learning_rate": 0.01, "loss": 2.0018, "step": 24399 }, { "epoch": 2.5081714461917977, "grad_norm": 0.057148344814777374, "learning_rate": 0.01, "loss": 2.0295, "step": 24402 }, { "epoch": 2.5084798026518653, "grad_norm": 0.03626836836338043, "learning_rate": 0.01, "loss": 2.0197, "step": 24405 }, { "epoch": 2.5087881591119334, "grad_norm": 0.03621996194124222, "learning_rate": 0.01, "loss": 2.0307, "step": 24408 }, { "epoch": 2.509096515572001, "grad_norm": 0.05835467949509621, "learning_rate": 0.01, "loss": 2.0506, "step": 24411 }, { "epoch": 2.509404872032069, "grad_norm": 0.03973361849784851, "learning_rate": 0.01, "loss": 2.0076, "step": 24414 }, { "epoch": 2.509713228492137, "grad_norm": 0.09463023394346237, "learning_rate": 0.01, "loss": 2.0254, "step": 24417 }, { "epoch": 2.510021584952205, "grad_norm": 0.07552462071180344, "learning_rate": 0.01, "loss": 2.0182, "step": 24420 }, { "epoch": 2.5103299414122726, "grad_norm": 0.1288609802722931, "learning_rate": 0.01, "loss": 2.0337, "step": 24423 }, { "epoch": 2.5106382978723403, "grad_norm": 0.06943691521883011, "learning_rate": 0.01, "loss": 2.0162, "step": 24426 }, { "epoch": 2.5109466543324084, "grad_norm": 0.08581320196390152, "learning_rate": 0.01, "loss": 2.0292, "step": 24429 }, { "epoch": 2.511255010792476, "grad_norm": 0.07379914820194244, "learning_rate": 0.01, "loss": 2.0328, "step": 24432 }, { "epoch": 2.5115633672525437, "grad_norm": 0.09235703945159912, "learning_rate": 0.01, "loss": 2.0194, "step": 24435 }, { "epoch": 2.511871723712612, "grad_norm": 0.05038774758577347, "learning_rate": 0.01, "loss": 2.0249, "step": 24438 }, { "epoch": 2.5121800801726795, "grad_norm": 0.08711019903421402, "learning_rate": 0.01, "loss": 2.0276, "step": 24441 }, { "epoch": 2.5124884366327476, "grad_norm": 0.05432825908064842, "learning_rate": 0.01, "loss": 2.0251, "step": 24444 }, { "epoch": 2.5127967930928152, "grad_norm": 0.10115527361631393, "learning_rate": 0.01, "loss": 2.016, "step": 24447 }, { "epoch": 2.5131051495528833, "grad_norm": 0.10387013852596283, "learning_rate": 0.01, "loss": 2.0198, "step": 24450 }, { "epoch": 2.513413506012951, "grad_norm": 0.0740542933344841, "learning_rate": 0.01, "loss": 2.0179, "step": 24453 }, { "epoch": 2.5137218624730187, "grad_norm": 0.07834311574697495, "learning_rate": 0.01, "loss": 2.0731, "step": 24456 }, { "epoch": 2.5140302189330868, "grad_norm": 0.06743736565113068, "learning_rate": 0.01, "loss": 2.0588, "step": 24459 }, { "epoch": 2.5143385753931544, "grad_norm": 0.051791246980428696, "learning_rate": 0.01, "loss": 2.0063, "step": 24462 }, { "epoch": 2.514646931853222, "grad_norm": 0.0731598362326622, "learning_rate": 0.01, "loss": 2.0271, "step": 24465 }, { "epoch": 2.51495528831329, "grad_norm": 0.04995987191796303, "learning_rate": 0.01, "loss": 2.0224, "step": 24468 }, { "epoch": 2.515263644773358, "grad_norm": 0.04953973367810249, "learning_rate": 0.01, "loss": 2.0176, "step": 24471 }, { "epoch": 2.515572001233426, "grad_norm": 0.05432116240262985, "learning_rate": 0.01, "loss": 2.0349, "step": 24474 }, { "epoch": 2.5158803576934936, "grad_norm": 0.048791225999593735, "learning_rate": 0.01, "loss": 2.0296, "step": 24477 }, { "epoch": 2.5161887141535617, "grad_norm": 0.11742904037237167, "learning_rate": 0.01, "loss": 2.04, "step": 24480 }, { "epoch": 2.5164970706136294, "grad_norm": 0.12617075443267822, "learning_rate": 0.01, "loss": 2.0214, "step": 24483 }, { "epoch": 2.516805427073697, "grad_norm": 0.051573995500802994, "learning_rate": 0.01, "loss": 2.0662, "step": 24486 }, { "epoch": 2.517113783533765, "grad_norm": 0.09131506085395813, "learning_rate": 0.01, "loss": 2.0487, "step": 24489 }, { "epoch": 2.517422139993833, "grad_norm": 0.06593006104230881, "learning_rate": 0.01, "loss": 2.0369, "step": 24492 }, { "epoch": 2.5177304964539005, "grad_norm": 0.038310687988996506, "learning_rate": 0.01, "loss": 1.9954, "step": 24495 }, { "epoch": 2.5180388529139686, "grad_norm": 0.05975675210356712, "learning_rate": 0.01, "loss": 2.028, "step": 24498 }, { "epoch": 2.518347209374036, "grad_norm": 0.04541294649243355, "learning_rate": 0.01, "loss": 2.0285, "step": 24501 }, { "epoch": 2.5186555658341043, "grad_norm": 0.053723473101854324, "learning_rate": 0.01, "loss": 2.0029, "step": 24504 }, { "epoch": 2.518963922294172, "grad_norm": 0.030095964670181274, "learning_rate": 0.01, "loss": 2.0401, "step": 24507 }, { "epoch": 2.51927227875424, "grad_norm": 0.10244923830032349, "learning_rate": 0.01, "loss": 2.0348, "step": 24510 }, { "epoch": 2.5195806352143078, "grad_norm": 0.06249944120645523, "learning_rate": 0.01, "loss": 2.0449, "step": 24513 }, { "epoch": 2.5198889916743754, "grad_norm": 0.08720767498016357, "learning_rate": 0.01, "loss": 2.036, "step": 24516 }, { "epoch": 2.5201973481344435, "grad_norm": 0.07686194777488708, "learning_rate": 0.01, "loss": 2.0151, "step": 24519 }, { "epoch": 2.520505704594511, "grad_norm": 0.08837150782346725, "learning_rate": 0.01, "loss": 2.0414, "step": 24522 }, { "epoch": 2.520814061054579, "grad_norm": 0.0794796347618103, "learning_rate": 0.01, "loss": 2.027, "step": 24525 }, { "epoch": 2.521122417514647, "grad_norm": 0.05655858293175697, "learning_rate": 0.01, "loss": 2.0336, "step": 24528 }, { "epoch": 2.521430773974715, "grad_norm": 0.08295401185750961, "learning_rate": 0.01, "loss": 2.0007, "step": 24531 }, { "epoch": 2.5217391304347827, "grad_norm": 0.03982521593570709, "learning_rate": 0.01, "loss": 2.0288, "step": 24534 }, { "epoch": 2.5220474868948504, "grad_norm": 0.04791923984885216, "learning_rate": 0.01, "loss": 2.038, "step": 24537 }, { "epoch": 2.5223558433549185, "grad_norm": 0.11436691880226135, "learning_rate": 0.01, "loss": 2.046, "step": 24540 }, { "epoch": 2.522664199814986, "grad_norm": 0.1064198762178421, "learning_rate": 0.01, "loss": 2.0144, "step": 24543 }, { "epoch": 2.522972556275054, "grad_norm": 0.08036024123430252, "learning_rate": 0.01, "loss": 2.0152, "step": 24546 }, { "epoch": 2.523280912735122, "grad_norm": 0.061799556016922, "learning_rate": 0.01, "loss": 2.0165, "step": 24549 }, { "epoch": 2.5235892691951896, "grad_norm": 0.04592469707131386, "learning_rate": 0.01, "loss": 2.0061, "step": 24552 }, { "epoch": 2.523897625655257, "grad_norm": 0.036766473203897476, "learning_rate": 0.01, "loss": 1.9913, "step": 24555 }, { "epoch": 2.5242059821153253, "grad_norm": 0.09619138389825821, "learning_rate": 0.01, "loss": 2.0264, "step": 24558 }, { "epoch": 2.5245143385753934, "grad_norm": 0.03915918245911598, "learning_rate": 0.01, "loss": 2.0098, "step": 24561 }, { "epoch": 2.524822695035461, "grad_norm": 0.04883084446191788, "learning_rate": 0.01, "loss": 2.0298, "step": 24564 }, { "epoch": 2.5251310514955287, "grad_norm": 0.05630512908101082, "learning_rate": 0.01, "loss": 2.0117, "step": 24567 }, { "epoch": 2.525439407955597, "grad_norm": 0.04064425081014633, "learning_rate": 0.01, "loss": 2.0434, "step": 24570 }, { "epoch": 2.5257477644156645, "grad_norm": 0.05302917957305908, "learning_rate": 0.01, "loss": 2.044, "step": 24573 }, { "epoch": 2.526056120875732, "grad_norm": 0.07677201181650162, "learning_rate": 0.01, "loss": 2.0294, "step": 24576 }, { "epoch": 2.5263644773358003, "grad_norm": 0.07950242608785629, "learning_rate": 0.01, "loss": 2.0006, "step": 24579 }, { "epoch": 2.526672833795868, "grad_norm": 0.07068518549203873, "learning_rate": 0.01, "loss": 2.0278, "step": 24582 }, { "epoch": 2.5269811902559356, "grad_norm": 0.08623625338077545, "learning_rate": 0.01, "loss": 2.0369, "step": 24585 }, { "epoch": 2.5272895467160037, "grad_norm": 0.05549190193414688, "learning_rate": 0.01, "loss": 2.0382, "step": 24588 }, { "epoch": 2.527597903176072, "grad_norm": 0.05710297450423241, "learning_rate": 0.01, "loss": 2.0209, "step": 24591 }, { "epoch": 2.5279062596361395, "grad_norm": 0.05071646347641945, "learning_rate": 0.01, "loss": 2.0066, "step": 24594 }, { "epoch": 2.528214616096207, "grad_norm": 0.09765972197055817, "learning_rate": 0.01, "loss": 2.0468, "step": 24597 }, { "epoch": 2.5285229725562752, "grad_norm": 0.05874921754002571, "learning_rate": 0.01, "loss": 2.0306, "step": 24600 }, { "epoch": 2.528831329016343, "grad_norm": 0.10598991811275482, "learning_rate": 0.01, "loss": 2.0156, "step": 24603 }, { "epoch": 2.5291396854764105, "grad_norm": 0.07071609050035477, "learning_rate": 0.01, "loss": 2.0284, "step": 24606 }, { "epoch": 2.5294480419364787, "grad_norm": 0.07132923603057861, "learning_rate": 0.01, "loss": 2.0004, "step": 24609 }, { "epoch": 2.5297563983965463, "grad_norm": 0.06741276383399963, "learning_rate": 0.01, "loss": 2.0247, "step": 24612 }, { "epoch": 2.530064754856614, "grad_norm": 0.10399371385574341, "learning_rate": 0.01, "loss": 2.033, "step": 24615 }, { "epoch": 2.530373111316682, "grad_norm": 0.054513610899448395, "learning_rate": 0.01, "loss": 2.0364, "step": 24618 }, { "epoch": 2.53068146777675, "grad_norm": 0.03990021347999573, "learning_rate": 0.01, "loss": 2.019, "step": 24621 }, { "epoch": 2.530989824236818, "grad_norm": 0.0329439677298069, "learning_rate": 0.01, "loss": 2.0653, "step": 24624 }, { "epoch": 2.5312981806968855, "grad_norm": 0.08065532892942429, "learning_rate": 0.01, "loss": 2.0195, "step": 24627 }, { "epoch": 2.5316065371569536, "grad_norm": 0.04455409198999405, "learning_rate": 0.01, "loss": 2.0383, "step": 24630 }, { "epoch": 2.5319148936170213, "grad_norm": 0.09395566582679749, "learning_rate": 0.01, "loss": 2.026, "step": 24633 }, { "epoch": 2.532223250077089, "grad_norm": 0.04042106121778488, "learning_rate": 0.01, "loss": 2.0272, "step": 24636 }, { "epoch": 2.532531606537157, "grad_norm": 0.09208521991968155, "learning_rate": 0.01, "loss": 2.0265, "step": 24639 }, { "epoch": 2.5328399629972247, "grad_norm": 0.06603435426950455, "learning_rate": 0.01, "loss": 2.044, "step": 24642 }, { "epoch": 2.533148319457293, "grad_norm": 0.039963483810424805, "learning_rate": 0.01, "loss": 2.0491, "step": 24645 }, { "epoch": 2.5334566759173605, "grad_norm": 0.14821624755859375, "learning_rate": 0.01, "loss": 2.0013, "step": 24648 }, { "epoch": 2.5337650323774286, "grad_norm": 0.06644035130739212, "learning_rate": 0.01, "loss": 2.0433, "step": 24651 }, { "epoch": 2.534073388837496, "grad_norm": 0.0375928059220314, "learning_rate": 0.01, "loss": 2.0264, "step": 24654 }, { "epoch": 2.534381745297564, "grad_norm": 0.06041393801569939, "learning_rate": 0.01, "loss": 2.0198, "step": 24657 }, { "epoch": 2.534690101757632, "grad_norm": 0.06117352098226547, "learning_rate": 0.01, "loss": 2.0148, "step": 24660 }, { "epoch": 2.5349984582176996, "grad_norm": 0.05386986956000328, "learning_rate": 0.01, "loss": 2.0521, "step": 24663 }, { "epoch": 2.5353068146777673, "grad_norm": 0.03399750217795372, "learning_rate": 0.01, "loss": 2.0223, "step": 24666 }, { "epoch": 2.5356151711378354, "grad_norm": 0.06256785988807678, "learning_rate": 0.01, "loss": 2.0372, "step": 24669 }, { "epoch": 2.535923527597903, "grad_norm": 0.08575739711523056, "learning_rate": 0.01, "loss": 2.0214, "step": 24672 }, { "epoch": 2.536231884057971, "grad_norm": 0.0895959809422493, "learning_rate": 0.01, "loss": 2.0339, "step": 24675 }, { "epoch": 2.536540240518039, "grad_norm": 0.06579075753688812, "learning_rate": 0.01, "loss": 2.0363, "step": 24678 }, { "epoch": 2.536848596978107, "grad_norm": 0.04509506747126579, "learning_rate": 0.01, "loss": 2.0331, "step": 24681 }, { "epoch": 2.5371569534381746, "grad_norm": 0.03535350412130356, "learning_rate": 0.01, "loss": 2.0262, "step": 24684 }, { "epoch": 2.5374653098982423, "grad_norm": 0.03496406227350235, "learning_rate": 0.01, "loss": 2.0183, "step": 24687 }, { "epoch": 2.5377736663583104, "grad_norm": 0.04595872759819031, "learning_rate": 0.01, "loss": 2.0376, "step": 24690 }, { "epoch": 2.538082022818378, "grad_norm": 0.07009676098823547, "learning_rate": 0.01, "loss": 2.0236, "step": 24693 }, { "epoch": 2.5383903792784457, "grad_norm": 0.07328460365533829, "learning_rate": 0.01, "loss": 2.0163, "step": 24696 }, { "epoch": 2.538698735738514, "grad_norm": 0.09521552175283432, "learning_rate": 0.01, "loss": 2.0288, "step": 24699 }, { "epoch": 2.5390070921985815, "grad_norm": 0.09087500721216202, "learning_rate": 0.01, "loss": 2.0524, "step": 24702 }, { "epoch": 2.5393154486586496, "grad_norm": 0.05657880753278732, "learning_rate": 0.01, "loss": 2.0336, "step": 24705 }, { "epoch": 2.539623805118717, "grad_norm": 0.13524407148361206, "learning_rate": 0.01, "loss": 2.0245, "step": 24708 }, { "epoch": 2.5399321615787853, "grad_norm": 0.04498621076345444, "learning_rate": 0.01, "loss": 2.0101, "step": 24711 }, { "epoch": 2.540240518038853, "grad_norm": 0.04117140173912048, "learning_rate": 0.01, "loss": 2.0149, "step": 24714 }, { "epoch": 2.5405488744989206, "grad_norm": 0.03630746528506279, "learning_rate": 0.01, "loss": 2.0323, "step": 24717 }, { "epoch": 2.5408572309589887, "grad_norm": 0.03791969269514084, "learning_rate": 0.01, "loss": 2.0142, "step": 24720 }, { "epoch": 2.5411655874190564, "grad_norm": 0.045213595032691956, "learning_rate": 0.01, "loss": 2.0136, "step": 24723 }, { "epoch": 2.541473943879124, "grad_norm": 0.08232447504997253, "learning_rate": 0.01, "loss": 2.0101, "step": 24726 }, { "epoch": 2.541782300339192, "grad_norm": 0.0790674090385437, "learning_rate": 0.01, "loss": 2.0282, "step": 24729 }, { "epoch": 2.54209065679926, "grad_norm": 0.09643759578466415, "learning_rate": 0.01, "loss": 2.008, "step": 24732 }, { "epoch": 2.542399013259328, "grad_norm": 0.09790430217981339, "learning_rate": 0.01, "loss": 1.9987, "step": 24735 }, { "epoch": 2.5427073697193956, "grad_norm": 0.04904096946120262, "learning_rate": 0.01, "loss": 2.0366, "step": 24738 }, { "epoch": 2.5430157261794637, "grad_norm": 0.042802706360816956, "learning_rate": 0.01, "loss": 2.0548, "step": 24741 }, { "epoch": 2.5433240826395314, "grad_norm": 0.04947663098573685, "learning_rate": 0.01, "loss": 2.03, "step": 24744 }, { "epoch": 2.543632439099599, "grad_norm": 0.040841687470674515, "learning_rate": 0.01, "loss": 2.0246, "step": 24747 }, { "epoch": 2.543940795559667, "grad_norm": 0.051419809460639954, "learning_rate": 0.01, "loss": 2.0352, "step": 24750 }, { "epoch": 2.544249152019735, "grad_norm": 0.07173865288496017, "learning_rate": 0.01, "loss": 2.0396, "step": 24753 }, { "epoch": 2.5445575084798024, "grad_norm": 0.07664339989423752, "learning_rate": 0.01, "loss": 2.0209, "step": 24756 }, { "epoch": 2.5448658649398705, "grad_norm": 0.05180468037724495, "learning_rate": 0.01, "loss": 2.0375, "step": 24759 }, { "epoch": 2.545174221399938, "grad_norm": 0.03839515894651413, "learning_rate": 0.01, "loss": 2.0408, "step": 24762 }, { "epoch": 2.5454825778600063, "grad_norm": 0.08712394535541534, "learning_rate": 0.01, "loss": 2.0303, "step": 24765 }, { "epoch": 2.545790934320074, "grad_norm": 0.06906873732805252, "learning_rate": 0.01, "loss": 2.0103, "step": 24768 }, { "epoch": 2.546099290780142, "grad_norm": 0.04779994115233421, "learning_rate": 0.01, "loss": 2.0007, "step": 24771 }, { "epoch": 2.5464076472402097, "grad_norm": 0.03945513069629669, "learning_rate": 0.01, "loss": 2.0207, "step": 24774 }, { "epoch": 2.5467160037002774, "grad_norm": 0.04089882969856262, "learning_rate": 0.01, "loss": 2.0475, "step": 24777 }, { "epoch": 2.5470243601603455, "grad_norm": 0.04492718353867531, "learning_rate": 0.01, "loss": 2.0333, "step": 24780 }, { "epoch": 2.547332716620413, "grad_norm": 0.0761101022362709, "learning_rate": 0.01, "loss": 2.024, "step": 24783 }, { "epoch": 2.547641073080481, "grad_norm": 0.09586388617753983, "learning_rate": 0.01, "loss": 2.0455, "step": 24786 }, { "epoch": 2.547949429540549, "grad_norm": 0.0410308912396431, "learning_rate": 0.01, "loss": 2.0067, "step": 24789 }, { "epoch": 2.5482577860006166, "grad_norm": 0.0583110935986042, "learning_rate": 0.01, "loss": 2.0433, "step": 24792 }, { "epoch": 2.5485661424606847, "grad_norm": 0.03310194984078407, "learning_rate": 0.01, "loss": 2.0022, "step": 24795 }, { "epoch": 2.5488744989207524, "grad_norm": 0.0849560797214508, "learning_rate": 0.01, "loss": 2.0458, "step": 24798 }, { "epoch": 2.5491828553808205, "grad_norm": 0.052898190915584564, "learning_rate": 0.01, "loss": 2.0099, "step": 24801 }, { "epoch": 2.549491211840888, "grad_norm": 0.09630381315946579, "learning_rate": 0.01, "loss": 2.0312, "step": 24804 }, { "epoch": 2.5497995683009558, "grad_norm": 0.04892333596944809, "learning_rate": 0.01, "loss": 2.0151, "step": 24807 }, { "epoch": 2.550107924761024, "grad_norm": 0.09465577453374863, "learning_rate": 0.01, "loss": 2.0163, "step": 24810 }, { "epoch": 2.5504162812210915, "grad_norm": 0.0832308977842331, "learning_rate": 0.01, "loss": 2.0446, "step": 24813 }, { "epoch": 2.550724637681159, "grad_norm": 0.11276236176490784, "learning_rate": 0.01, "loss": 2.0243, "step": 24816 }, { "epoch": 2.5510329941412273, "grad_norm": 0.08327414095401764, "learning_rate": 0.01, "loss": 2.0607, "step": 24819 }, { "epoch": 2.551341350601295, "grad_norm": 0.05502014979720116, "learning_rate": 0.01, "loss": 2.0281, "step": 24822 }, { "epoch": 2.551649707061363, "grad_norm": 0.03681863471865654, "learning_rate": 0.01, "loss": 2.0101, "step": 24825 }, { "epoch": 2.5519580635214307, "grad_norm": 0.08096860349178314, "learning_rate": 0.01, "loss": 1.9904, "step": 24828 }, { "epoch": 2.552266419981499, "grad_norm": 0.05901675671339035, "learning_rate": 0.01, "loss": 2.046, "step": 24831 }, { "epoch": 2.5525747764415665, "grad_norm": 0.09850065410137177, "learning_rate": 0.01, "loss": 2.0495, "step": 24834 }, { "epoch": 2.552883132901634, "grad_norm": 0.08438712358474731, "learning_rate": 0.01, "loss": 2.0185, "step": 24837 }, { "epoch": 2.5531914893617023, "grad_norm": 0.04949135333299637, "learning_rate": 0.01, "loss": 2.0197, "step": 24840 }, { "epoch": 2.55349984582177, "grad_norm": 0.044099997729063034, "learning_rate": 0.01, "loss": 2.0275, "step": 24843 }, { "epoch": 2.5538082022818376, "grad_norm": 0.08626649528741837, "learning_rate": 0.01, "loss": 2.0246, "step": 24846 }, { "epoch": 2.5541165587419057, "grad_norm": 0.08545881509780884, "learning_rate": 0.01, "loss": 2.0221, "step": 24849 }, { "epoch": 2.5544249152019733, "grad_norm": 0.06181343272328377, "learning_rate": 0.01, "loss": 2.0379, "step": 24852 }, { "epoch": 2.5547332716620414, "grad_norm": 0.0839785784482956, "learning_rate": 0.01, "loss": 2.0309, "step": 24855 }, { "epoch": 2.555041628122109, "grad_norm": 0.055504992604255676, "learning_rate": 0.01, "loss": 2.0333, "step": 24858 }, { "epoch": 2.555349984582177, "grad_norm": 0.04236135631799698, "learning_rate": 0.01, "loss": 1.9981, "step": 24861 }, { "epoch": 2.555658341042245, "grad_norm": 0.035614918917417526, "learning_rate": 0.01, "loss": 2.006, "step": 24864 }, { "epoch": 2.5559666975023125, "grad_norm": 0.04150492325425148, "learning_rate": 0.01, "loss": 2.0414, "step": 24867 }, { "epoch": 2.5562750539623806, "grad_norm": 0.07994359731674194, "learning_rate": 0.01, "loss": 2.0302, "step": 24870 }, { "epoch": 2.5565834104224483, "grad_norm": 0.08954035490751266, "learning_rate": 0.01, "loss": 2.0208, "step": 24873 }, { "epoch": 2.556891766882516, "grad_norm": 0.1362268626689911, "learning_rate": 0.01, "loss": 2.0211, "step": 24876 }, { "epoch": 2.557200123342584, "grad_norm": 0.11425944417715073, "learning_rate": 0.01, "loss": 2.0126, "step": 24879 }, { "epoch": 2.5575084798026517, "grad_norm": 0.07083035260438919, "learning_rate": 0.01, "loss": 2.0312, "step": 24882 }, { "epoch": 2.55781683626272, "grad_norm": 0.06250528246164322, "learning_rate": 0.01, "loss": 2.0317, "step": 24885 }, { "epoch": 2.5581251927227875, "grad_norm": 0.047506481409072876, "learning_rate": 0.01, "loss": 2.0134, "step": 24888 }, { "epoch": 2.5584335491828556, "grad_norm": 0.04237549751996994, "learning_rate": 0.01, "loss": 2.0227, "step": 24891 }, { "epoch": 2.5587419056429233, "grad_norm": 0.04128411412239075, "learning_rate": 0.01, "loss": 2.0122, "step": 24894 }, { "epoch": 2.559050262102991, "grad_norm": 0.03886473551392555, "learning_rate": 0.01, "loss": 2.0385, "step": 24897 }, { "epoch": 2.559358618563059, "grad_norm": 0.1163051575422287, "learning_rate": 0.01, "loss": 2.0456, "step": 24900 }, { "epoch": 2.5596669750231267, "grad_norm": 0.04279797896742821, "learning_rate": 0.01, "loss": 2.0122, "step": 24903 }, { "epoch": 2.5599753314831943, "grad_norm": 0.08159471303224564, "learning_rate": 0.01, "loss": 2.0218, "step": 24906 }, { "epoch": 2.5602836879432624, "grad_norm": 0.06161525472998619, "learning_rate": 0.01, "loss": 2.009, "step": 24909 }, { "epoch": 2.56059204440333, "grad_norm": 0.05011424049735069, "learning_rate": 0.01, "loss": 2.0001, "step": 24912 }, { "epoch": 2.560900400863398, "grad_norm": 0.05973159521818161, "learning_rate": 0.01, "loss": 2.0289, "step": 24915 }, { "epoch": 2.561208757323466, "grad_norm": 0.07461394369602203, "learning_rate": 0.01, "loss": 2.0357, "step": 24918 }, { "epoch": 2.561517113783534, "grad_norm": 0.09631699323654175, "learning_rate": 0.01, "loss": 2.0234, "step": 24921 }, { "epoch": 2.5618254702436016, "grad_norm": 0.05727219581604004, "learning_rate": 0.01, "loss": 2.0404, "step": 24924 }, { "epoch": 2.5621338267036693, "grad_norm": 0.08594338595867157, "learning_rate": 0.01, "loss": 2.0166, "step": 24927 }, { "epoch": 2.5624421831637374, "grad_norm": 0.1109083890914917, "learning_rate": 0.01, "loss": 2.0185, "step": 24930 }, { "epoch": 2.562750539623805, "grad_norm": 0.0593339204788208, "learning_rate": 0.01, "loss": 2.0064, "step": 24933 }, { "epoch": 2.5630588960838727, "grad_norm": 0.0381302647292614, "learning_rate": 0.01, "loss": 2.0236, "step": 24936 }, { "epoch": 2.563367252543941, "grad_norm": 0.056093595921993256, "learning_rate": 0.01, "loss": 2.0269, "step": 24939 }, { "epoch": 2.5636756090040085, "grad_norm": 0.11212731897830963, "learning_rate": 0.01, "loss": 2.0488, "step": 24942 }, { "epoch": 2.5639839654640766, "grad_norm": 0.07110024988651276, "learning_rate": 0.01, "loss": 2.013, "step": 24945 }, { "epoch": 2.5642923219241442, "grad_norm": 0.05951390787959099, "learning_rate": 0.01, "loss": 2.0412, "step": 24948 }, { "epoch": 2.5646006783842124, "grad_norm": 0.07836031913757324, "learning_rate": 0.01, "loss": 2.0143, "step": 24951 }, { "epoch": 2.56490903484428, "grad_norm": 0.06882999837398529, "learning_rate": 0.01, "loss": 2.0065, "step": 24954 }, { "epoch": 2.5652173913043477, "grad_norm": 0.0868605375289917, "learning_rate": 0.01, "loss": 2.0111, "step": 24957 }, { "epoch": 2.5655257477644158, "grad_norm": 0.10812171548604965, "learning_rate": 0.01, "loss": 2.0462, "step": 24960 }, { "epoch": 2.5658341042244834, "grad_norm": 0.07124783843755722, "learning_rate": 0.01, "loss": 2.013, "step": 24963 }, { "epoch": 2.566142460684551, "grad_norm": 0.037611838430166245, "learning_rate": 0.01, "loss": 2.0277, "step": 24966 }, { "epoch": 2.566450817144619, "grad_norm": 0.03723758086562157, "learning_rate": 0.01, "loss": 2.0187, "step": 24969 }, { "epoch": 2.566759173604687, "grad_norm": 0.08805309981107712, "learning_rate": 0.01, "loss": 2.01, "step": 24972 }, { "epoch": 2.567067530064755, "grad_norm": 0.10381683707237244, "learning_rate": 0.01, "loss": 2.0253, "step": 24975 }, { "epoch": 2.5673758865248226, "grad_norm": 0.11186369508504868, "learning_rate": 0.01, "loss": 2.0271, "step": 24978 }, { "epoch": 2.5676842429848907, "grad_norm": 0.07986850291490555, "learning_rate": 0.01, "loss": 2.0252, "step": 24981 }, { "epoch": 2.5679925994449584, "grad_norm": 0.04094192385673523, "learning_rate": 0.01, "loss": 1.9982, "step": 24984 }, { "epoch": 2.568300955905026, "grad_norm": 0.049611032009124756, "learning_rate": 0.01, "loss": 2.0283, "step": 24987 }, { "epoch": 2.568609312365094, "grad_norm": 0.05613689869642258, "learning_rate": 0.01, "loss": 2.0304, "step": 24990 }, { "epoch": 2.568917668825162, "grad_norm": 0.051894500851631165, "learning_rate": 0.01, "loss": 2.0069, "step": 24993 }, { "epoch": 2.5692260252852295, "grad_norm": 0.04092536121606827, "learning_rate": 0.01, "loss": 2.0283, "step": 24996 }, { "epoch": 2.5695343817452976, "grad_norm": 0.03474249318242073, "learning_rate": 0.01, "loss": 2.0083, "step": 24999 }, { "epoch": 2.5698427382053652, "grad_norm": 0.04513520747423172, "learning_rate": 0.01, "loss": 2.0288, "step": 25002 }, { "epoch": 2.5701510946654333, "grad_norm": 0.06130135431885719, "learning_rate": 0.01, "loss": 2.0245, "step": 25005 }, { "epoch": 2.570459451125501, "grad_norm": 0.07398026436567307, "learning_rate": 0.01, "loss": 2.0199, "step": 25008 }, { "epoch": 2.570767807585569, "grad_norm": 0.06060103699564934, "learning_rate": 0.01, "loss": 1.9956, "step": 25011 }, { "epoch": 2.5710761640456368, "grad_norm": 0.051868923008441925, "learning_rate": 0.01, "loss": 2.0355, "step": 25014 }, { "epoch": 2.5713845205057044, "grad_norm": 0.09465671330690384, "learning_rate": 0.01, "loss": 2.014, "step": 25017 }, { "epoch": 2.5716928769657725, "grad_norm": 0.048888836055994034, "learning_rate": 0.01, "loss": 2.0163, "step": 25020 }, { "epoch": 2.57200123342584, "grad_norm": 0.04938677325844765, "learning_rate": 0.01, "loss": 2.0165, "step": 25023 }, { "epoch": 2.572309589885908, "grad_norm": 0.1066848635673523, "learning_rate": 0.01, "loss": 2.0354, "step": 25026 }, { "epoch": 2.572617946345976, "grad_norm": 0.044199470430612564, "learning_rate": 0.01, "loss": 2.0358, "step": 25029 }, { "epoch": 2.572926302806044, "grad_norm": 0.06313291937112808, "learning_rate": 0.01, "loss": 2.0166, "step": 25032 }, { "epoch": 2.5732346592661117, "grad_norm": 0.08843620121479034, "learning_rate": 0.01, "loss": 2.0125, "step": 25035 }, { "epoch": 2.5735430157261794, "grad_norm": 0.028659775853157043, "learning_rate": 0.01, "loss": 2.0324, "step": 25038 }, { "epoch": 2.5738513721862475, "grad_norm": 0.09034299850463867, "learning_rate": 0.01, "loss": 2.0311, "step": 25041 }, { "epoch": 2.574159728646315, "grad_norm": 0.08496701717376709, "learning_rate": 0.01, "loss": 2.0457, "step": 25044 }, { "epoch": 2.574468085106383, "grad_norm": 0.04633186012506485, "learning_rate": 0.01, "loss": 2.0204, "step": 25047 }, { "epoch": 2.574776441566451, "grad_norm": 0.05091328173875809, "learning_rate": 0.01, "loss": 2.0622, "step": 25050 }, { "epoch": 2.5750847980265186, "grad_norm": 0.03941154107451439, "learning_rate": 0.01, "loss": 2.0097, "step": 25053 }, { "epoch": 2.5753931544865862, "grad_norm": 0.07574623823165894, "learning_rate": 0.01, "loss": 2.0119, "step": 25056 }, { "epoch": 2.5757015109466543, "grad_norm": 0.07106275856494904, "learning_rate": 0.01, "loss": 2.0164, "step": 25059 }, { "epoch": 2.5760098674067224, "grad_norm": 0.06767601519823074, "learning_rate": 0.01, "loss": 2.0243, "step": 25062 }, { "epoch": 2.57631822386679, "grad_norm": 0.05537039414048195, "learning_rate": 0.01, "loss": 2.0375, "step": 25065 }, { "epoch": 2.5766265803268578, "grad_norm": 0.06547438353300095, "learning_rate": 0.01, "loss": 2.0014, "step": 25068 }, { "epoch": 2.576934936786926, "grad_norm": 0.0862760990858078, "learning_rate": 0.01, "loss": 2.004, "step": 25071 }, { "epoch": 2.5772432932469935, "grad_norm": 0.041683733463287354, "learning_rate": 0.01, "loss": 2.0484, "step": 25074 }, { "epoch": 2.577551649707061, "grad_norm": 0.049321915954351425, "learning_rate": 0.01, "loss": 2.0158, "step": 25077 }, { "epoch": 2.5778600061671293, "grad_norm": 0.09261754900217056, "learning_rate": 0.01, "loss": 2.0162, "step": 25080 }, { "epoch": 2.578168362627197, "grad_norm": 0.07979609072208405, "learning_rate": 0.01, "loss": 2.0425, "step": 25083 }, { "epoch": 2.5784767190872646, "grad_norm": 0.06629879772663116, "learning_rate": 0.01, "loss": 2.0339, "step": 25086 }, { "epoch": 2.5787850755473327, "grad_norm": 0.07896976172924042, "learning_rate": 0.01, "loss": 2.0144, "step": 25089 }, { "epoch": 2.579093432007401, "grad_norm": 0.06102503091096878, "learning_rate": 0.01, "loss": 2.0012, "step": 25092 }, { "epoch": 2.5794017884674685, "grad_norm": 0.07823985069990158, "learning_rate": 0.01, "loss": 2.0207, "step": 25095 }, { "epoch": 2.579710144927536, "grad_norm": 0.08163253217935562, "learning_rate": 0.01, "loss": 2.0583, "step": 25098 }, { "epoch": 2.5800185013876042, "grad_norm": 0.06111651286482811, "learning_rate": 0.01, "loss": 2.0434, "step": 25101 }, { "epoch": 2.580326857847672, "grad_norm": 0.03768099471926689, "learning_rate": 0.01, "loss": 2.0185, "step": 25104 }, { "epoch": 2.5806352143077396, "grad_norm": 0.0871853157877922, "learning_rate": 0.01, "loss": 2.0293, "step": 25107 }, { "epoch": 2.5809435707678077, "grad_norm": 0.05394020304083824, "learning_rate": 0.01, "loss": 2.0249, "step": 25110 }, { "epoch": 2.5812519272278753, "grad_norm": 0.07910983264446259, "learning_rate": 0.01, "loss": 2.042, "step": 25113 }, { "epoch": 2.581560283687943, "grad_norm": 0.06922271102666855, "learning_rate": 0.01, "loss": 2.0493, "step": 25116 }, { "epoch": 2.581868640148011, "grad_norm": 0.05517781525850296, "learning_rate": 0.01, "loss": 2.0161, "step": 25119 }, { "epoch": 2.582176996608079, "grad_norm": 0.05166595056653023, "learning_rate": 0.01, "loss": 2.0402, "step": 25122 }, { "epoch": 2.582485353068147, "grad_norm": 0.045153357088565826, "learning_rate": 0.01, "loss": 2.0366, "step": 25125 }, { "epoch": 2.5827937095282145, "grad_norm": 0.07232387363910675, "learning_rate": 0.01, "loss": 2.0345, "step": 25128 }, { "epoch": 2.5831020659882826, "grad_norm": 0.035037536174058914, "learning_rate": 0.01, "loss": 2.0195, "step": 25131 }, { "epoch": 2.5834104224483503, "grad_norm": 0.039313822984695435, "learning_rate": 0.01, "loss": 2.0196, "step": 25134 }, { "epoch": 2.583718778908418, "grad_norm": 0.0632469579577446, "learning_rate": 0.01, "loss": 2.0454, "step": 25137 }, { "epoch": 2.584027135368486, "grad_norm": 0.10993051528930664, "learning_rate": 0.01, "loss": 2.0289, "step": 25140 }, { "epoch": 2.5843354918285537, "grad_norm": 0.0852990597486496, "learning_rate": 0.01, "loss": 2.053, "step": 25143 }, { "epoch": 2.5846438482886214, "grad_norm": 0.0442403107881546, "learning_rate": 0.01, "loss": 2.0089, "step": 25146 }, { "epoch": 2.5849522047486895, "grad_norm": 0.03534874692559242, "learning_rate": 0.01, "loss": 2.0297, "step": 25149 }, { "epoch": 2.5852605612087576, "grad_norm": 0.031708016991615295, "learning_rate": 0.01, "loss": 2.0064, "step": 25152 }, { "epoch": 2.5855689176688252, "grad_norm": 0.056695304811000824, "learning_rate": 0.01, "loss": 2.0265, "step": 25155 }, { "epoch": 2.585877274128893, "grad_norm": 0.12697716057300568, "learning_rate": 0.01, "loss": 2.0415, "step": 25158 }, { "epoch": 2.586185630588961, "grad_norm": 0.07686912268400192, "learning_rate": 0.01, "loss": 2.0098, "step": 25161 }, { "epoch": 2.5864939870490287, "grad_norm": 0.10015466809272766, "learning_rate": 0.01, "loss": 2.0229, "step": 25164 }, { "epoch": 2.5868023435090963, "grad_norm": 0.05786514654755592, "learning_rate": 0.01, "loss": 2.0025, "step": 25167 }, { "epoch": 2.5871106999691644, "grad_norm": 0.05359407886862755, "learning_rate": 0.01, "loss": 2.0075, "step": 25170 }, { "epoch": 2.587419056429232, "grad_norm": 0.10763208568096161, "learning_rate": 0.01, "loss": 2.0611, "step": 25173 }, { "epoch": 2.5877274128893, "grad_norm": 0.06255360692739487, "learning_rate": 0.01, "loss": 2.0173, "step": 25176 }, { "epoch": 2.588035769349368, "grad_norm": 0.0519418902695179, "learning_rate": 0.01, "loss": 2.0095, "step": 25179 }, { "epoch": 2.588344125809436, "grad_norm": 0.09810636937618256, "learning_rate": 0.01, "loss": 2.0012, "step": 25182 }, { "epoch": 2.5886524822695036, "grad_norm": 0.05091201886534691, "learning_rate": 0.01, "loss": 2.0221, "step": 25185 }, { "epoch": 2.5889608387295713, "grad_norm": 0.046215660870075226, "learning_rate": 0.01, "loss": 2.0235, "step": 25188 }, { "epoch": 2.5892691951896394, "grad_norm": 0.06873856484889984, "learning_rate": 0.01, "loss": 2.0234, "step": 25191 }, { "epoch": 2.589577551649707, "grad_norm": 0.08075796812772751, "learning_rate": 0.01, "loss": 2.0399, "step": 25194 }, { "epoch": 2.5898859081097747, "grad_norm": 0.10317845642566681, "learning_rate": 0.01, "loss": 2.0087, "step": 25197 }, { "epoch": 2.590194264569843, "grad_norm": 0.07780349254608154, "learning_rate": 0.01, "loss": 2.0052, "step": 25200 }, { "epoch": 2.5905026210299105, "grad_norm": 0.0646161437034607, "learning_rate": 0.01, "loss": 2.0077, "step": 25203 }, { "epoch": 2.5908109774899786, "grad_norm": 0.10224328190088272, "learning_rate": 0.01, "loss": 2.0091, "step": 25206 }, { "epoch": 2.5911193339500462, "grad_norm": 0.0714394599199295, "learning_rate": 0.01, "loss": 2.0246, "step": 25209 }, { "epoch": 2.5914276904101143, "grad_norm": 0.06261731684207916, "learning_rate": 0.01, "loss": 1.9908, "step": 25212 }, { "epoch": 2.591736046870182, "grad_norm": 0.07271763682365417, "learning_rate": 0.01, "loss": 2.0489, "step": 25215 }, { "epoch": 2.5920444033302497, "grad_norm": 0.07044383883476257, "learning_rate": 0.01, "loss": 2.0461, "step": 25218 }, { "epoch": 2.5923527597903178, "grad_norm": 0.10808674246072769, "learning_rate": 0.01, "loss": 2.0229, "step": 25221 }, { "epoch": 2.5926611162503854, "grad_norm": 0.049697790294885635, "learning_rate": 0.01, "loss": 2.0139, "step": 25224 }, { "epoch": 2.592969472710453, "grad_norm": 0.08951854705810547, "learning_rate": 0.01, "loss": 2.0228, "step": 25227 }, { "epoch": 2.593277829170521, "grad_norm": 0.06834417581558228, "learning_rate": 0.01, "loss": 2.0362, "step": 25230 }, { "epoch": 2.593586185630589, "grad_norm": 0.037199974060058594, "learning_rate": 0.01, "loss": 1.9987, "step": 25233 }, { "epoch": 2.593894542090657, "grad_norm": 0.056284189224243164, "learning_rate": 0.01, "loss": 2.0313, "step": 25236 }, { "epoch": 2.5942028985507246, "grad_norm": 0.07686278969049454, "learning_rate": 0.01, "loss": 2.0376, "step": 25239 }, { "epoch": 2.5945112550107927, "grad_norm": 0.043646443635225296, "learning_rate": 0.01, "loss": 2.0145, "step": 25242 }, { "epoch": 2.5948196114708604, "grad_norm": 0.0594371035695076, "learning_rate": 0.01, "loss": 2.0147, "step": 25245 }, { "epoch": 2.595127967930928, "grad_norm": 0.08617819100618362, "learning_rate": 0.01, "loss": 2.0307, "step": 25248 }, { "epoch": 2.595436324390996, "grad_norm": 0.13672196865081787, "learning_rate": 0.01, "loss": 2.0515, "step": 25251 }, { "epoch": 2.595744680851064, "grad_norm": 0.062405068427324295, "learning_rate": 0.01, "loss": 2.0205, "step": 25254 }, { "epoch": 2.5960530373111315, "grad_norm": 0.042263686656951904, "learning_rate": 0.01, "loss": 2.0336, "step": 25257 }, { "epoch": 2.5963613937711996, "grad_norm": 0.04821668192744255, "learning_rate": 0.01, "loss": 2.0231, "step": 25260 }, { "epoch": 2.5966697502312672, "grad_norm": 0.048817023634910583, "learning_rate": 0.01, "loss": 2.0178, "step": 25263 }, { "epoch": 2.5969781066913353, "grad_norm": 0.05794850364327431, "learning_rate": 0.01, "loss": 2.023, "step": 25266 }, { "epoch": 2.597286463151403, "grad_norm": 0.05057196319103241, "learning_rate": 0.01, "loss": 2.0075, "step": 25269 }, { "epoch": 2.597594819611471, "grad_norm": 0.05443112552165985, "learning_rate": 0.01, "loss": 2.0381, "step": 25272 }, { "epoch": 2.5979031760715388, "grad_norm": 0.0533830001950264, "learning_rate": 0.01, "loss": 2.0201, "step": 25275 }, { "epoch": 2.5982115325316064, "grad_norm": 0.040888138115406036, "learning_rate": 0.01, "loss": 1.9956, "step": 25278 }, { "epoch": 2.5985198889916745, "grad_norm": 0.07154986262321472, "learning_rate": 0.01, "loss": 2.0285, "step": 25281 }, { "epoch": 2.598828245451742, "grad_norm": 0.17314322292804718, "learning_rate": 0.01, "loss": 2.0288, "step": 25284 }, { "epoch": 2.59913660191181, "grad_norm": 0.051602523773908615, "learning_rate": 0.01, "loss": 2.0351, "step": 25287 }, { "epoch": 2.599444958371878, "grad_norm": 0.047731827944517136, "learning_rate": 0.01, "loss": 2.0435, "step": 25290 }, { "epoch": 2.5997533148319456, "grad_norm": 0.0334257036447525, "learning_rate": 0.01, "loss": 2.0074, "step": 25293 }, { "epoch": 2.6000616712920137, "grad_norm": 0.03708617389202118, "learning_rate": 0.01, "loss": 1.9933, "step": 25296 }, { "epoch": 2.6003700277520814, "grad_norm": 0.08540193736553192, "learning_rate": 0.01, "loss": 2.0085, "step": 25299 }, { "epoch": 2.6006783842121495, "grad_norm": 0.1036924496293068, "learning_rate": 0.01, "loss": 2.0238, "step": 25302 }, { "epoch": 2.600986740672217, "grad_norm": 0.056603506207466125, "learning_rate": 0.01, "loss": 2.0161, "step": 25305 }, { "epoch": 2.601295097132285, "grad_norm": 0.1030723974108696, "learning_rate": 0.01, "loss": 2.0414, "step": 25308 }, { "epoch": 2.601603453592353, "grad_norm": 0.060525115579366684, "learning_rate": 0.01, "loss": 2.0083, "step": 25311 }, { "epoch": 2.6019118100524206, "grad_norm": 0.061082128435373306, "learning_rate": 0.01, "loss": 1.987, "step": 25314 }, { "epoch": 2.602220166512488, "grad_norm": 0.045477550476789474, "learning_rate": 0.01, "loss": 2.0251, "step": 25317 }, { "epoch": 2.6025285229725563, "grad_norm": 0.03306104615330696, "learning_rate": 0.01, "loss": 2.0359, "step": 25320 }, { "epoch": 2.602836879432624, "grad_norm": 0.052543554455041885, "learning_rate": 0.01, "loss": 2.0454, "step": 25323 }, { "epoch": 2.603145235892692, "grad_norm": 0.04408182203769684, "learning_rate": 0.01, "loss": 2.0375, "step": 25326 }, { "epoch": 2.6034535923527597, "grad_norm": 0.05216488614678383, "learning_rate": 0.01, "loss": 2.0331, "step": 25329 }, { "epoch": 2.603761948812828, "grad_norm": 0.12084914743900299, "learning_rate": 0.01, "loss": 2.0052, "step": 25332 }, { "epoch": 2.6040703052728955, "grad_norm": 0.09642963856458664, "learning_rate": 0.01, "loss": 2.0161, "step": 25335 }, { "epoch": 2.604378661732963, "grad_norm": 0.06409110128879547, "learning_rate": 0.01, "loss": 2.0052, "step": 25338 }, { "epoch": 2.6046870181930313, "grad_norm": 0.07277770340442657, "learning_rate": 0.01, "loss": 2.0241, "step": 25341 }, { "epoch": 2.604995374653099, "grad_norm": 0.049252595752477646, "learning_rate": 0.01, "loss": 2.0172, "step": 25344 }, { "epoch": 2.6053037311131666, "grad_norm": 0.0495469830930233, "learning_rate": 0.01, "loss": 2.0195, "step": 25347 }, { "epoch": 2.6056120875732347, "grad_norm": 0.06318475306034088, "learning_rate": 0.01, "loss": 1.9937, "step": 25350 }, { "epoch": 2.6059204440333024, "grad_norm": 0.07843147218227386, "learning_rate": 0.01, "loss": 2.0333, "step": 25353 }, { "epoch": 2.6062288004933705, "grad_norm": 0.055239688605070114, "learning_rate": 0.01, "loss": 2.0308, "step": 25356 }, { "epoch": 2.606537156953438, "grad_norm": 0.03876148536801338, "learning_rate": 0.01, "loss": 2.0227, "step": 25359 }, { "epoch": 2.6068455134135062, "grad_norm": 0.12309783697128296, "learning_rate": 0.01, "loss": 2.0233, "step": 25362 }, { "epoch": 2.607153869873574, "grad_norm": 0.09926038980484009, "learning_rate": 0.01, "loss": 2.0305, "step": 25365 }, { "epoch": 2.6074622263336416, "grad_norm": 0.07237336784601212, "learning_rate": 0.01, "loss": 1.9929, "step": 25368 }, { "epoch": 2.6077705827937097, "grad_norm": 0.09653117507696152, "learning_rate": 0.01, "loss": 2.0547, "step": 25371 }, { "epoch": 2.6080789392537773, "grad_norm": 0.0454515665769577, "learning_rate": 0.01, "loss": 2.0324, "step": 25374 }, { "epoch": 2.608387295713845, "grad_norm": 0.04541772976517677, "learning_rate": 0.01, "loss": 2.0301, "step": 25377 }, { "epoch": 2.608695652173913, "grad_norm": 0.05721856653690338, "learning_rate": 0.01, "loss": 2.011, "step": 25380 }, { "epoch": 2.6090040086339807, "grad_norm": 0.0526764839887619, "learning_rate": 0.01, "loss": 2.0176, "step": 25383 }, { "epoch": 2.609312365094049, "grad_norm": 0.08415975421667099, "learning_rate": 0.01, "loss": 1.9948, "step": 25386 }, { "epoch": 2.6096207215541165, "grad_norm": 0.07950541377067566, "learning_rate": 0.01, "loss": 2.0285, "step": 25389 }, { "epoch": 2.6099290780141846, "grad_norm": 0.07684530317783356, "learning_rate": 0.01, "loss": 2.0283, "step": 25392 }, { "epoch": 2.6102374344742523, "grad_norm": 0.0458965003490448, "learning_rate": 0.01, "loss": 2.0335, "step": 25395 }, { "epoch": 2.61054579093432, "grad_norm": 0.11776190251111984, "learning_rate": 0.01, "loss": 2.0117, "step": 25398 }, { "epoch": 2.610854147394388, "grad_norm": 0.03954809904098511, "learning_rate": 0.01, "loss": 2.0352, "step": 25401 }, { "epoch": 2.6111625038544557, "grad_norm": 0.08056820929050446, "learning_rate": 0.01, "loss": 2.0338, "step": 25404 }, { "epoch": 2.6114708603145234, "grad_norm": 0.03288201987743378, "learning_rate": 0.01, "loss": 2.0067, "step": 25407 }, { "epoch": 2.6117792167745915, "grad_norm": 0.06156465783715248, "learning_rate": 0.01, "loss": 2.0455, "step": 25410 }, { "epoch": 2.612087573234659, "grad_norm": 0.04141581431031227, "learning_rate": 0.01, "loss": 1.9822, "step": 25413 }, { "epoch": 2.6123959296947272, "grad_norm": 0.06731928139925003, "learning_rate": 0.01, "loss": 2.028, "step": 25416 }, { "epoch": 2.612704286154795, "grad_norm": 0.07682816684246063, "learning_rate": 0.01, "loss": 2.0155, "step": 25419 }, { "epoch": 2.613012642614863, "grad_norm": 0.10766996443271637, "learning_rate": 0.01, "loss": 1.9897, "step": 25422 }, { "epoch": 2.6133209990749307, "grad_norm": 0.11409672349691391, "learning_rate": 0.01, "loss": 1.9823, "step": 25425 }, { "epoch": 2.6136293555349983, "grad_norm": 0.07693130522966385, "learning_rate": 0.01, "loss": 2.02, "step": 25428 }, { "epoch": 2.6139377119950664, "grad_norm": 0.034606434404850006, "learning_rate": 0.01, "loss": 2.043, "step": 25431 }, { "epoch": 2.614246068455134, "grad_norm": 0.0957694724202156, "learning_rate": 0.01, "loss": 2.0527, "step": 25434 }, { "epoch": 2.6145544249152017, "grad_norm": 0.05739649757742882, "learning_rate": 0.01, "loss": 2.0209, "step": 25437 }, { "epoch": 2.61486278137527, "grad_norm": 0.05702357366681099, "learning_rate": 0.01, "loss": 2.0321, "step": 25440 }, { "epoch": 2.6151711378353375, "grad_norm": 0.10596863180398941, "learning_rate": 0.01, "loss": 2.023, "step": 25443 }, { "epoch": 2.6154794942954056, "grad_norm": 0.07135487347841263, "learning_rate": 0.01, "loss": 2.0192, "step": 25446 }, { "epoch": 2.6157878507554733, "grad_norm": 0.04034152254462242, "learning_rate": 0.01, "loss": 2.034, "step": 25449 }, { "epoch": 2.6160962072155414, "grad_norm": 0.05510259047150612, "learning_rate": 0.01, "loss": 2.0201, "step": 25452 }, { "epoch": 2.616404563675609, "grad_norm": 0.03920494019985199, "learning_rate": 0.01, "loss": 1.9831, "step": 25455 }, { "epoch": 2.6167129201356767, "grad_norm": 0.06935703754425049, "learning_rate": 0.01, "loss": 2.0242, "step": 25458 }, { "epoch": 2.617021276595745, "grad_norm": 0.04524112120270729, "learning_rate": 0.01, "loss": 2.0415, "step": 25461 }, { "epoch": 2.6173296330558125, "grad_norm": 0.03639009967446327, "learning_rate": 0.01, "loss": 2.0086, "step": 25464 }, { "epoch": 2.61763798951588, "grad_norm": 0.0551072359085083, "learning_rate": 0.01, "loss": 2.0279, "step": 25467 }, { "epoch": 2.617946345975948, "grad_norm": 0.03943365439772606, "learning_rate": 0.01, "loss": 2.0229, "step": 25470 }, { "epoch": 2.618254702436016, "grad_norm": 0.05363466218113899, "learning_rate": 0.01, "loss": 2.0285, "step": 25473 }, { "epoch": 2.618563058896084, "grad_norm": 0.10065023601055145, "learning_rate": 0.01, "loss": 2.0307, "step": 25476 }, { "epoch": 2.6188714153561516, "grad_norm": 0.06447052210569382, "learning_rate": 0.01, "loss": 2.0375, "step": 25479 }, { "epoch": 2.6191797718162197, "grad_norm": 0.03966406360268593, "learning_rate": 0.01, "loss": 1.9923, "step": 25482 }, { "epoch": 2.6194881282762874, "grad_norm": 0.05280005559325218, "learning_rate": 0.01, "loss": 2.0184, "step": 25485 }, { "epoch": 2.619796484736355, "grad_norm": 0.1111968457698822, "learning_rate": 0.01, "loss": 1.9933, "step": 25488 }, { "epoch": 2.620104841196423, "grad_norm": 0.11483361572027206, "learning_rate": 0.01, "loss": 2.0007, "step": 25491 }, { "epoch": 2.620413197656491, "grad_norm": 0.039019446820020676, "learning_rate": 0.01, "loss": 2.0435, "step": 25494 }, { "epoch": 2.6207215541165585, "grad_norm": 0.05683600530028343, "learning_rate": 0.01, "loss": 2.0323, "step": 25497 }, { "epoch": 2.6210299105766266, "grad_norm": 0.042798321694135666, "learning_rate": 0.01, "loss": 2.0255, "step": 25500 }, { "epoch": 2.6213382670366943, "grad_norm": 0.040838126093149185, "learning_rate": 0.01, "loss": 2.0164, "step": 25503 }, { "epoch": 2.6216466234967624, "grad_norm": 0.07249750196933746, "learning_rate": 0.01, "loss": 2.0395, "step": 25506 }, { "epoch": 2.62195497995683, "grad_norm": 0.08197704702615738, "learning_rate": 0.01, "loss": 1.9932, "step": 25509 }, { "epoch": 2.622263336416898, "grad_norm": 0.11885122954845428, "learning_rate": 0.01, "loss": 2.0203, "step": 25512 }, { "epoch": 2.622571692876966, "grad_norm": 0.07574759423732758, "learning_rate": 0.01, "loss": 2.0443, "step": 25515 }, { "epoch": 2.6228800493370334, "grad_norm": 0.05106687173247337, "learning_rate": 0.01, "loss": 2.0222, "step": 25518 }, { "epoch": 2.6231884057971016, "grad_norm": 0.03381403908133507, "learning_rate": 0.01, "loss": 2.0167, "step": 25521 }, { "epoch": 2.623496762257169, "grad_norm": 0.04259166494011879, "learning_rate": 0.01, "loss": 1.9915, "step": 25524 }, { "epoch": 2.623805118717237, "grad_norm": 0.06276170909404755, "learning_rate": 0.01, "loss": 2.0116, "step": 25527 }, { "epoch": 2.624113475177305, "grad_norm": 0.05478539317846298, "learning_rate": 0.01, "loss": 2.0299, "step": 25530 }, { "epoch": 2.6244218316373726, "grad_norm": 0.05915123224258423, "learning_rate": 0.01, "loss": 2.0211, "step": 25533 }, { "epoch": 2.6247301880974407, "grad_norm": 0.09082819521427155, "learning_rate": 0.01, "loss": 2.038, "step": 25536 }, { "epoch": 2.6250385445575084, "grad_norm": 0.04604744166135788, "learning_rate": 0.01, "loss": 2.0395, "step": 25539 }, { "epoch": 2.6253469010175765, "grad_norm": 0.04548676684498787, "learning_rate": 0.01, "loss": 2.0144, "step": 25542 }, { "epoch": 2.625655257477644, "grad_norm": 0.03629077225923538, "learning_rate": 0.01, "loss": 2.0237, "step": 25545 }, { "epoch": 2.625963613937712, "grad_norm": 0.060490988194942474, "learning_rate": 0.01, "loss": 2.0049, "step": 25548 }, { "epoch": 2.62627197039778, "grad_norm": 0.12501440942287445, "learning_rate": 0.01, "loss": 2.0289, "step": 25551 }, { "epoch": 2.6265803268578476, "grad_norm": 0.052221618592739105, "learning_rate": 0.01, "loss": 2.0092, "step": 25554 }, { "epoch": 2.6268886833179153, "grad_norm": 0.08127149194478989, "learning_rate": 0.01, "loss": 2.0133, "step": 25557 }, { "epoch": 2.6271970397779834, "grad_norm": 0.13186825811862946, "learning_rate": 0.01, "loss": 2.0281, "step": 25560 }, { "epoch": 2.6275053962380515, "grad_norm": 0.10297831892967224, "learning_rate": 0.01, "loss": 2.0137, "step": 25563 }, { "epoch": 2.627813752698119, "grad_norm": 0.04737088829278946, "learning_rate": 0.01, "loss": 2.0334, "step": 25566 }, { "epoch": 2.628122109158187, "grad_norm": 0.055748652666807175, "learning_rate": 0.01, "loss": 2.0085, "step": 25569 }, { "epoch": 2.628430465618255, "grad_norm": 0.08968318998813629, "learning_rate": 0.01, "loss": 2.023, "step": 25572 }, { "epoch": 2.6287388220783225, "grad_norm": 0.04962621256709099, "learning_rate": 0.01, "loss": 2.0111, "step": 25575 }, { "epoch": 2.62904717853839, "grad_norm": 0.033645693212747574, "learning_rate": 0.01, "loss": 2.0332, "step": 25578 }, { "epoch": 2.6293555349984583, "grad_norm": 0.05864016339182854, "learning_rate": 0.01, "loss": 2.0159, "step": 25581 }, { "epoch": 2.629663891458526, "grad_norm": 0.12207403779029846, "learning_rate": 0.01, "loss": 2.0282, "step": 25584 }, { "epoch": 2.6299722479185936, "grad_norm": 0.049948643893003464, "learning_rate": 0.01, "loss": 2.0534, "step": 25587 }, { "epoch": 2.6302806043786617, "grad_norm": 0.08774693310260773, "learning_rate": 0.01, "loss": 2.0214, "step": 25590 }, { "epoch": 2.63058896083873, "grad_norm": 0.053613871335983276, "learning_rate": 0.01, "loss": 2.0171, "step": 25593 }, { "epoch": 2.6308973172987975, "grad_norm": 0.06298764050006866, "learning_rate": 0.01, "loss": 2.0197, "step": 25596 }, { "epoch": 2.631205673758865, "grad_norm": 0.03511015325784683, "learning_rate": 0.01, "loss": 2.0239, "step": 25599 }, { "epoch": 2.6315140302189333, "grad_norm": 0.09345996379852295, "learning_rate": 0.01, "loss": 2.0239, "step": 25602 }, { "epoch": 2.631822386679001, "grad_norm": 0.06202877685427666, "learning_rate": 0.01, "loss": 2.0286, "step": 25605 }, { "epoch": 2.6321307431390686, "grad_norm": 0.10231085866689682, "learning_rate": 0.01, "loss": 2.0324, "step": 25608 }, { "epoch": 2.6324390995991367, "grad_norm": 0.12403954565525055, "learning_rate": 0.01, "loss": 2.0074, "step": 25611 }, { "epoch": 2.6327474560592043, "grad_norm": 0.059275902807712555, "learning_rate": 0.01, "loss": 2.0093, "step": 25614 }, { "epoch": 2.633055812519272, "grad_norm": 0.04833563044667244, "learning_rate": 0.01, "loss": 2.0218, "step": 25617 }, { "epoch": 2.63336416897934, "grad_norm": 0.04218841344118118, "learning_rate": 0.01, "loss": 2.0132, "step": 25620 }, { "epoch": 2.633672525439408, "grad_norm": 0.1189088523387909, "learning_rate": 0.01, "loss": 2.0407, "step": 25623 }, { "epoch": 2.633980881899476, "grad_norm": 0.11460559070110321, "learning_rate": 0.01, "loss": 2.0083, "step": 25626 }, { "epoch": 2.6342892383595435, "grad_norm": 0.13970693945884705, "learning_rate": 0.01, "loss": 2.0195, "step": 25629 }, { "epoch": 2.6345975948196116, "grad_norm": 0.06081441789865494, "learning_rate": 0.01, "loss": 2.0285, "step": 25632 }, { "epoch": 2.6349059512796793, "grad_norm": 0.055472832173109055, "learning_rate": 0.01, "loss": 2.0019, "step": 25635 }, { "epoch": 2.635214307739747, "grad_norm": 0.06767648458480835, "learning_rate": 0.01, "loss": 2.0193, "step": 25638 }, { "epoch": 2.635522664199815, "grad_norm": 0.060980260372161865, "learning_rate": 0.01, "loss": 2.0137, "step": 25641 }, { "epoch": 2.6358310206598827, "grad_norm": 0.04839742183685303, "learning_rate": 0.01, "loss": 1.9982, "step": 25644 }, { "epoch": 2.6361393771199504, "grad_norm": 0.04725825786590576, "learning_rate": 0.01, "loss": 2.0176, "step": 25647 }, { "epoch": 2.6364477335800185, "grad_norm": 0.04045959562063217, "learning_rate": 0.01, "loss": 2.0455, "step": 25650 }, { "epoch": 2.6367560900400866, "grad_norm": 0.07219504565000534, "learning_rate": 0.01, "loss": 2.0391, "step": 25653 }, { "epoch": 2.6370644465001543, "grad_norm": 0.0438094437122345, "learning_rate": 0.01, "loss": 2.029, "step": 25656 }, { "epoch": 2.637372802960222, "grad_norm": 0.06555571407079697, "learning_rate": 0.01, "loss": 2.033, "step": 25659 }, { "epoch": 2.63768115942029, "grad_norm": 0.07731533795595169, "learning_rate": 0.01, "loss": 2.0432, "step": 25662 }, { "epoch": 2.6379895158803577, "grad_norm": 0.045945990830659866, "learning_rate": 0.01, "loss": 2.0308, "step": 25665 }, { "epoch": 2.6382978723404253, "grad_norm": 0.07175582647323608, "learning_rate": 0.01, "loss": 2.019, "step": 25668 }, { "epoch": 2.6386062288004934, "grad_norm": 0.07860400527715683, "learning_rate": 0.01, "loss": 2.0291, "step": 25671 }, { "epoch": 2.638914585260561, "grad_norm": 0.05635571479797363, "learning_rate": 0.01, "loss": 2.0519, "step": 25674 }, { "epoch": 2.639222941720629, "grad_norm": 0.07352261245250702, "learning_rate": 0.01, "loss": 2.0235, "step": 25677 }, { "epoch": 2.639531298180697, "grad_norm": 0.07502644509077072, "learning_rate": 0.01, "loss": 1.9877, "step": 25680 }, { "epoch": 2.639839654640765, "grad_norm": 0.05903060361742973, "learning_rate": 0.01, "loss": 2.012, "step": 25683 }, { "epoch": 2.6401480111008326, "grad_norm": 0.07454460859298706, "learning_rate": 0.01, "loss": 1.9988, "step": 25686 }, { "epoch": 2.6404563675609003, "grad_norm": 0.06615950912237167, "learning_rate": 0.01, "loss": 2.0379, "step": 25689 }, { "epoch": 2.6407647240209684, "grad_norm": 0.07897205650806427, "learning_rate": 0.01, "loss": 2.0366, "step": 25692 }, { "epoch": 2.641073080481036, "grad_norm": 0.12898573279380798, "learning_rate": 0.01, "loss": 2.0252, "step": 25695 }, { "epoch": 2.6413814369411037, "grad_norm": 0.10731782764196396, "learning_rate": 0.01, "loss": 2.0508, "step": 25698 }, { "epoch": 2.641689793401172, "grad_norm": 0.07936359196901321, "learning_rate": 0.01, "loss": 1.9993, "step": 25701 }, { "epoch": 2.6419981498612395, "grad_norm": 0.07443369179964066, "learning_rate": 0.01, "loss": 2.0586, "step": 25704 }, { "epoch": 2.6423065063213076, "grad_norm": 0.07027627527713776, "learning_rate": 0.01, "loss": 2.0375, "step": 25707 }, { "epoch": 2.6426148627813753, "grad_norm": 0.04909298196434975, "learning_rate": 0.01, "loss": 2.0234, "step": 25710 }, { "epoch": 2.6429232192414434, "grad_norm": 0.09595979005098343, "learning_rate": 0.01, "loss": 2.0072, "step": 25713 }, { "epoch": 2.643231575701511, "grad_norm": 0.07217621803283691, "learning_rate": 0.01, "loss": 2.0187, "step": 25716 }, { "epoch": 2.6435399321615787, "grad_norm": 0.10547403246164322, "learning_rate": 0.01, "loss": 2.0351, "step": 25719 }, { "epoch": 2.643848288621647, "grad_norm": 0.048883359879255295, "learning_rate": 0.01, "loss": 1.9996, "step": 25722 }, { "epoch": 2.6441566450817144, "grad_norm": 0.09399348497390747, "learning_rate": 0.01, "loss": 2.0112, "step": 25725 }, { "epoch": 2.644465001541782, "grad_norm": 0.05369775742292404, "learning_rate": 0.01, "loss": 2.0483, "step": 25728 }, { "epoch": 2.64477335800185, "grad_norm": 0.08048120886087418, "learning_rate": 0.01, "loss": 2.0279, "step": 25731 }, { "epoch": 2.645081714461918, "grad_norm": 0.07802169024944305, "learning_rate": 0.01, "loss": 2.0178, "step": 25734 }, { "epoch": 2.645390070921986, "grad_norm": 0.06137097254395485, "learning_rate": 0.01, "loss": 2.0382, "step": 25737 }, { "epoch": 2.6456984273820536, "grad_norm": 0.05498100444674492, "learning_rate": 0.01, "loss": 2.0252, "step": 25740 }, { "epoch": 2.6460067838421217, "grad_norm": 0.057225704193115234, "learning_rate": 0.01, "loss": 2.0243, "step": 25743 }, { "epoch": 2.6463151403021894, "grad_norm": 0.06727226078510284, "learning_rate": 0.01, "loss": 2.0316, "step": 25746 }, { "epoch": 2.646623496762257, "grad_norm": 0.04740385711193085, "learning_rate": 0.01, "loss": 2.0263, "step": 25749 }, { "epoch": 2.646931853222325, "grad_norm": 0.04808951914310455, "learning_rate": 0.01, "loss": 2.0063, "step": 25752 }, { "epoch": 2.647240209682393, "grad_norm": 0.04689471423625946, "learning_rate": 0.01, "loss": 2.0209, "step": 25755 }, { "epoch": 2.6475485661424605, "grad_norm": 0.10928355902433395, "learning_rate": 0.01, "loss": 1.9951, "step": 25758 }, { "epoch": 2.6478569226025286, "grad_norm": 0.054571595042943954, "learning_rate": 0.01, "loss": 2.0216, "step": 25761 }, { "epoch": 2.6481652790625962, "grad_norm": 0.0809570774435997, "learning_rate": 0.01, "loss": 2.0451, "step": 25764 }, { "epoch": 2.6484736355226643, "grad_norm": 0.10407903790473938, "learning_rate": 0.01, "loss": 2.0205, "step": 25767 }, { "epoch": 2.648781991982732, "grad_norm": 0.045180514454841614, "learning_rate": 0.01, "loss": 2.0137, "step": 25770 }, { "epoch": 2.6490903484428, "grad_norm": 0.07642797380685806, "learning_rate": 0.01, "loss": 1.9826, "step": 25773 }, { "epoch": 2.6493987049028678, "grad_norm": 0.06943316757678986, "learning_rate": 0.01, "loss": 2.0186, "step": 25776 }, { "epoch": 2.6497070613629354, "grad_norm": 0.06536037474870682, "learning_rate": 0.01, "loss": 2.008, "step": 25779 }, { "epoch": 2.6500154178230035, "grad_norm": 0.04698020964860916, "learning_rate": 0.01, "loss": 2.0149, "step": 25782 }, { "epoch": 2.650323774283071, "grad_norm": 0.05523454770445824, "learning_rate": 0.01, "loss": 2.0191, "step": 25785 }, { "epoch": 2.650632130743139, "grad_norm": 0.043647054582834244, "learning_rate": 0.01, "loss": 1.9993, "step": 25788 }, { "epoch": 2.650940487203207, "grad_norm": 0.05682402104139328, "learning_rate": 0.01, "loss": 2.0315, "step": 25791 }, { "epoch": 2.6512488436632746, "grad_norm": 0.10919704288244247, "learning_rate": 0.01, "loss": 1.9967, "step": 25794 }, { "epoch": 2.6515572001233427, "grad_norm": 0.0895003080368042, "learning_rate": 0.01, "loss": 2.0361, "step": 25797 }, { "epoch": 2.6518655565834104, "grad_norm": 0.07750852406024933, "learning_rate": 0.01, "loss": 2.0079, "step": 25800 }, { "epoch": 2.6521739130434785, "grad_norm": 0.07465649396181107, "learning_rate": 0.01, "loss": 2.0272, "step": 25803 }, { "epoch": 2.652482269503546, "grad_norm": 0.042654186487197876, "learning_rate": 0.01, "loss": 2.0386, "step": 25806 }, { "epoch": 2.652790625963614, "grad_norm": 0.04910847172141075, "learning_rate": 0.01, "loss": 2.0049, "step": 25809 }, { "epoch": 2.653098982423682, "grad_norm": 0.04584549367427826, "learning_rate": 0.01, "loss": 2.0232, "step": 25812 }, { "epoch": 2.6534073388837496, "grad_norm": 0.03400958329439163, "learning_rate": 0.01, "loss": 1.9994, "step": 25815 }, { "epoch": 2.6537156953438172, "grad_norm": 0.11867182701826096, "learning_rate": 0.01, "loss": 2.0275, "step": 25818 }, { "epoch": 2.6540240518038853, "grad_norm": 0.09303310513496399, "learning_rate": 0.01, "loss": 2.0533, "step": 25821 }, { "epoch": 2.654332408263953, "grad_norm": 0.04951944947242737, "learning_rate": 0.01, "loss": 2.0391, "step": 25824 }, { "epoch": 2.654640764724021, "grad_norm": 0.05893867462873459, "learning_rate": 0.01, "loss": 2.0109, "step": 25827 }, { "epoch": 2.6549491211840888, "grad_norm": 0.034138891845941544, "learning_rate": 0.01, "loss": 2.0101, "step": 25830 }, { "epoch": 2.655257477644157, "grad_norm": 0.039834585040807724, "learning_rate": 0.01, "loss": 2.0008, "step": 25833 }, { "epoch": 2.6555658341042245, "grad_norm": 0.10098033398389816, "learning_rate": 0.01, "loss": 2.0156, "step": 25836 }, { "epoch": 2.655874190564292, "grad_norm": 0.07205129414796829, "learning_rate": 0.01, "loss": 2.0256, "step": 25839 }, { "epoch": 2.6561825470243603, "grad_norm": 0.04370702803134918, "learning_rate": 0.01, "loss": 2.0031, "step": 25842 }, { "epoch": 2.656490903484428, "grad_norm": 0.10662799328565598, "learning_rate": 0.01, "loss": 2.0311, "step": 25845 }, { "epoch": 2.6567992599444956, "grad_norm": 0.1833692342042923, "learning_rate": 0.01, "loss": 2.0585, "step": 25848 }, { "epoch": 2.6571076164045637, "grad_norm": 0.12229418754577637, "learning_rate": 0.01, "loss": 2.0219, "step": 25851 }, { "epoch": 2.6574159728646314, "grad_norm": 0.05533694103360176, "learning_rate": 0.01, "loss": 2.026, "step": 25854 }, { "epoch": 2.6577243293246995, "grad_norm": 0.042578935623168945, "learning_rate": 0.01, "loss": 2.0247, "step": 25857 }, { "epoch": 2.658032685784767, "grad_norm": 0.034579649567604065, "learning_rate": 0.01, "loss": 2.0286, "step": 25860 }, { "epoch": 2.6583410422448353, "grad_norm": 0.052627481520175934, "learning_rate": 0.01, "loss": 1.986, "step": 25863 }, { "epoch": 2.658649398704903, "grad_norm": 0.04877294600009918, "learning_rate": 0.01, "loss": 2.0157, "step": 25866 }, { "epoch": 2.6589577551649706, "grad_norm": 0.05192401632666588, "learning_rate": 0.01, "loss": 2.0321, "step": 25869 }, { "epoch": 2.6592661116250387, "grad_norm": 0.08188097178936005, "learning_rate": 0.01, "loss": 2.0074, "step": 25872 }, { "epoch": 2.6595744680851063, "grad_norm": 0.06215586140751839, "learning_rate": 0.01, "loss": 1.9925, "step": 25875 }, { "epoch": 2.659882824545174, "grad_norm": 0.10561207681894302, "learning_rate": 0.01, "loss": 2.0233, "step": 25878 }, { "epoch": 2.660191181005242, "grad_norm": 0.08467070758342743, "learning_rate": 0.01, "loss": 2.0503, "step": 25881 }, { "epoch": 2.6604995374653098, "grad_norm": 0.05138308182358742, "learning_rate": 0.01, "loss": 2.0492, "step": 25884 }, { "epoch": 2.660807893925378, "grad_norm": 0.07387588173151016, "learning_rate": 0.01, "loss": 2.0134, "step": 25887 }, { "epoch": 2.6611162503854455, "grad_norm": 0.06682645529508591, "learning_rate": 0.01, "loss": 2.0508, "step": 25890 }, { "epoch": 2.6614246068455136, "grad_norm": 0.056953735649585724, "learning_rate": 0.01, "loss": 2.0312, "step": 25893 }, { "epoch": 2.6617329633055813, "grad_norm": 0.03709590435028076, "learning_rate": 0.01, "loss": 2.0205, "step": 25896 }, { "epoch": 2.662041319765649, "grad_norm": 0.05895649641752243, "learning_rate": 0.01, "loss": 2.028, "step": 25899 }, { "epoch": 2.662349676225717, "grad_norm": 0.08740679174661636, "learning_rate": 0.01, "loss": 2.0156, "step": 25902 }, { "epoch": 2.6626580326857847, "grad_norm": 0.04745563119649887, "learning_rate": 0.01, "loss": 2.0338, "step": 25905 }, { "epoch": 2.6629663891458524, "grad_norm": 0.08748458325862885, "learning_rate": 0.01, "loss": 2.0392, "step": 25908 }, { "epoch": 2.6632747456059205, "grad_norm": 0.04833563417196274, "learning_rate": 0.01, "loss": 2.0128, "step": 25911 }, { "epoch": 2.663583102065988, "grad_norm": 0.03873222693800926, "learning_rate": 0.01, "loss": 2.0028, "step": 25914 }, { "epoch": 2.6638914585260562, "grad_norm": 0.07129956781864166, "learning_rate": 0.01, "loss": 2.0092, "step": 25917 }, { "epoch": 2.664199814986124, "grad_norm": 0.08984299004077911, "learning_rate": 0.01, "loss": 2.0206, "step": 25920 }, { "epoch": 2.664508171446192, "grad_norm": 0.03720667213201523, "learning_rate": 0.01, "loss": 2.0018, "step": 25923 }, { "epoch": 2.6648165279062597, "grad_norm": 0.09795575588941574, "learning_rate": 0.01, "loss": 2.0373, "step": 25926 }, { "epoch": 2.6651248843663273, "grad_norm": 0.1043957769870758, "learning_rate": 0.01, "loss": 2.0313, "step": 25929 }, { "epoch": 2.6654332408263954, "grad_norm": 0.04737646505236626, "learning_rate": 0.01, "loss": 2.0313, "step": 25932 }, { "epoch": 2.665741597286463, "grad_norm": 0.04864402487874031, "learning_rate": 0.01, "loss": 2.0068, "step": 25935 }, { "epoch": 2.6660499537465308, "grad_norm": 0.0386229082942009, "learning_rate": 0.01, "loss": 2.0259, "step": 25938 }, { "epoch": 2.666358310206599, "grad_norm": 0.04127798601984978, "learning_rate": 0.01, "loss": 2.0463, "step": 25941 }, { "epoch": 2.6666666666666665, "grad_norm": 0.07537275552749634, "learning_rate": 0.01, "loss": 2.0254, "step": 25944 }, { "epoch": 2.6669750231267346, "grad_norm": 0.041533395648002625, "learning_rate": 0.01, "loss": 2.0228, "step": 25947 }, { "epoch": 2.6672833795868023, "grad_norm": 0.045062318444252014, "learning_rate": 0.01, "loss": 2.0428, "step": 25950 }, { "epoch": 2.6675917360468704, "grad_norm": 0.08328119665384293, "learning_rate": 0.01, "loss": 2.0233, "step": 25953 }, { "epoch": 2.667900092506938, "grad_norm": 0.08402698487043381, "learning_rate": 0.01, "loss": 2.0303, "step": 25956 }, { "epoch": 2.6682084489670057, "grad_norm": 0.10571663826704025, "learning_rate": 0.01, "loss": 2.0292, "step": 25959 }, { "epoch": 2.668516805427074, "grad_norm": 0.05094289034605026, "learning_rate": 0.01, "loss": 2.0278, "step": 25962 }, { "epoch": 2.6688251618871415, "grad_norm": 0.034051552414894104, "learning_rate": 0.01, "loss": 2.0265, "step": 25965 }, { "epoch": 2.669133518347209, "grad_norm": 0.05703623592853546, "learning_rate": 0.01, "loss": 2.0272, "step": 25968 }, { "epoch": 2.6694418748072772, "grad_norm": 0.10606861114501953, "learning_rate": 0.01, "loss": 2.0179, "step": 25971 }, { "epoch": 2.669750231267345, "grad_norm": 0.11793088167905807, "learning_rate": 0.01, "loss": 2.0492, "step": 25974 }, { "epoch": 2.670058587727413, "grad_norm": 0.09058693051338196, "learning_rate": 0.01, "loss": 2.0213, "step": 25977 }, { "epoch": 2.6703669441874807, "grad_norm": 0.0677378848195076, "learning_rate": 0.01, "loss": 2.0036, "step": 25980 }, { "epoch": 2.6706753006475488, "grad_norm": 0.05636313930153847, "learning_rate": 0.01, "loss": 2.0318, "step": 25983 }, { "epoch": 2.6709836571076164, "grad_norm": 0.04547611251473427, "learning_rate": 0.01, "loss": 2.0236, "step": 25986 }, { "epoch": 2.671292013567684, "grad_norm": 0.04852728173136711, "learning_rate": 0.01, "loss": 2.0125, "step": 25989 }, { "epoch": 2.671600370027752, "grad_norm": 0.04465510696172714, "learning_rate": 0.01, "loss": 1.9907, "step": 25992 }, { "epoch": 2.67190872648782, "grad_norm": 0.043789103627204895, "learning_rate": 0.01, "loss": 2.0213, "step": 25995 }, { "epoch": 2.6722170829478875, "grad_norm": 0.12845320999622345, "learning_rate": 0.01, "loss": 2.0429, "step": 25998 }, { "epoch": 2.6725254394079556, "grad_norm": 0.054881785064935684, "learning_rate": 0.01, "loss": 2.0252, "step": 26001 }, { "epoch": 2.6728337958680233, "grad_norm": 0.0845002606511116, "learning_rate": 0.01, "loss": 2.0475, "step": 26004 }, { "epoch": 2.6731421523280914, "grad_norm": 0.04782318323850632, "learning_rate": 0.01, "loss": 2.0411, "step": 26007 }, { "epoch": 2.673450508788159, "grad_norm": 0.04288490489125252, "learning_rate": 0.01, "loss": 2.0376, "step": 26010 }, { "epoch": 2.673758865248227, "grad_norm": 0.10904238373041153, "learning_rate": 0.01, "loss": 2.0316, "step": 26013 }, { "epoch": 2.674067221708295, "grad_norm": 0.08276703208684921, "learning_rate": 0.01, "loss": 2.032, "step": 26016 }, { "epoch": 2.6743755781683625, "grad_norm": 0.06076609715819359, "learning_rate": 0.01, "loss": 2.0507, "step": 26019 }, { "epoch": 2.6746839346284306, "grad_norm": 0.043946780264377594, "learning_rate": 0.01, "loss": 2.0188, "step": 26022 }, { "epoch": 2.6749922910884982, "grad_norm": 0.03716238588094711, "learning_rate": 0.01, "loss": 2.0183, "step": 26025 }, { "epoch": 2.675300647548566, "grad_norm": 0.04982864856719971, "learning_rate": 0.01, "loss": 2.0114, "step": 26028 }, { "epoch": 2.675609004008634, "grad_norm": 0.05211315676569939, "learning_rate": 0.01, "loss": 2.0305, "step": 26031 }, { "epoch": 2.6759173604687017, "grad_norm": 0.10215940326452255, "learning_rate": 0.01, "loss": 2.0365, "step": 26034 }, { "epoch": 2.6762257169287698, "grad_norm": 0.07742898166179657, "learning_rate": 0.01, "loss": 2.0173, "step": 26037 }, { "epoch": 2.6765340733888374, "grad_norm": 0.047653328627347946, "learning_rate": 0.01, "loss": 2.0198, "step": 26040 }, { "epoch": 2.6768424298489055, "grad_norm": 0.07179111242294312, "learning_rate": 0.01, "loss": 2.0283, "step": 26043 }, { "epoch": 2.677150786308973, "grad_norm": 0.044947609305381775, "learning_rate": 0.01, "loss": 2.0044, "step": 26046 }, { "epoch": 2.677459142769041, "grad_norm": 0.03128395974636078, "learning_rate": 0.01, "loss": 2.0097, "step": 26049 }, { "epoch": 2.677767499229109, "grad_norm": 0.037758342921733856, "learning_rate": 0.01, "loss": 2.0073, "step": 26052 }, { "epoch": 2.6780758556891766, "grad_norm": 0.059724219143390656, "learning_rate": 0.01, "loss": 2.0304, "step": 26055 }, { "epoch": 2.6783842121492443, "grad_norm": 0.09603296220302582, "learning_rate": 0.01, "loss": 2.0106, "step": 26058 }, { "epoch": 2.6786925686093124, "grad_norm": 0.059488292783498764, "learning_rate": 0.01, "loss": 2.0114, "step": 26061 }, { "epoch": 2.6790009250693805, "grad_norm": 0.07343467324972153, "learning_rate": 0.01, "loss": 2.0494, "step": 26064 }, { "epoch": 2.679309281529448, "grad_norm": 0.054782915860414505, "learning_rate": 0.01, "loss": 2.023, "step": 26067 }, { "epoch": 2.679617637989516, "grad_norm": 0.042636722326278687, "learning_rate": 0.01, "loss": 2.0376, "step": 26070 }, { "epoch": 2.679925994449584, "grad_norm": 0.06803017109632492, "learning_rate": 0.01, "loss": 1.9864, "step": 26073 }, { "epoch": 2.6802343509096516, "grad_norm": 0.10990459471940994, "learning_rate": 0.01, "loss": 2.0343, "step": 26076 }, { "epoch": 2.6805427073697192, "grad_norm": 0.035727545619010925, "learning_rate": 0.01, "loss": 2.0195, "step": 26079 }, { "epoch": 2.6808510638297873, "grad_norm": 0.11241263896226883, "learning_rate": 0.01, "loss": 2.038, "step": 26082 }, { "epoch": 2.681159420289855, "grad_norm": 0.06156554073095322, "learning_rate": 0.01, "loss": 2.0191, "step": 26085 }, { "epoch": 2.6814677767499226, "grad_norm": 0.0609101839363575, "learning_rate": 0.01, "loss": 1.994, "step": 26088 }, { "epoch": 2.6817761332099908, "grad_norm": 0.046859260648489, "learning_rate": 0.01, "loss": 2.0226, "step": 26091 }, { "epoch": 2.682084489670059, "grad_norm": 0.05803006887435913, "learning_rate": 0.01, "loss": 2.0347, "step": 26094 }, { "epoch": 2.6823928461301265, "grad_norm": 0.09425931423902512, "learning_rate": 0.01, "loss": 2.0073, "step": 26097 }, { "epoch": 2.682701202590194, "grad_norm": 0.07893898338079453, "learning_rate": 0.01, "loss": 2.0248, "step": 26100 }, { "epoch": 2.6830095590502623, "grad_norm": 0.044163595885038376, "learning_rate": 0.01, "loss": 2.0217, "step": 26103 }, { "epoch": 2.68331791551033, "grad_norm": 0.061135292053222656, "learning_rate": 0.01, "loss": 2.0006, "step": 26106 }, { "epoch": 2.6836262719703976, "grad_norm": 0.03806147351861, "learning_rate": 0.01, "loss": 2.0226, "step": 26109 }, { "epoch": 2.6839346284304657, "grad_norm": 0.050713907927274704, "learning_rate": 0.01, "loss": 2.0088, "step": 26112 }, { "epoch": 2.6842429848905334, "grad_norm": 0.05512484535574913, "learning_rate": 0.01, "loss": 2.038, "step": 26115 }, { "epoch": 2.684551341350601, "grad_norm": 0.048581019043922424, "learning_rate": 0.01, "loss": 2.0388, "step": 26118 }, { "epoch": 2.684859697810669, "grad_norm": 0.04121122509241104, "learning_rate": 0.01, "loss": 2.0176, "step": 26121 }, { "epoch": 2.6851680542707372, "grad_norm": 0.036977533251047134, "learning_rate": 0.01, "loss": 2.0341, "step": 26124 }, { "epoch": 2.685476410730805, "grad_norm": 0.10436423867940903, "learning_rate": 0.01, "loss": 2.0122, "step": 26127 }, { "epoch": 2.6857847671908726, "grad_norm": 0.05357207730412483, "learning_rate": 0.01, "loss": 2.0076, "step": 26130 }, { "epoch": 2.6860931236509407, "grad_norm": 0.05386963114142418, "learning_rate": 0.01, "loss": 2.0012, "step": 26133 }, { "epoch": 2.6864014801110083, "grad_norm": 0.03591128811240196, "learning_rate": 0.01, "loss": 2.0291, "step": 26136 }, { "epoch": 2.686709836571076, "grad_norm": 0.036740854382514954, "learning_rate": 0.01, "loss": 2.0142, "step": 26139 }, { "epoch": 2.687018193031144, "grad_norm": 0.05966171249747276, "learning_rate": 0.01, "loss": 2.0145, "step": 26142 }, { "epoch": 2.6873265494912117, "grad_norm": 0.10529398918151855, "learning_rate": 0.01, "loss": 2.0091, "step": 26145 }, { "epoch": 2.6876349059512794, "grad_norm": 0.07834989577531815, "learning_rate": 0.01, "loss": 2.0211, "step": 26148 }, { "epoch": 2.6879432624113475, "grad_norm": 0.10702015459537506, "learning_rate": 0.01, "loss": 2.0368, "step": 26151 }, { "epoch": 2.6882516188714156, "grad_norm": 0.04647925868630409, "learning_rate": 0.01, "loss": 2.032, "step": 26154 }, { "epoch": 2.6885599753314833, "grad_norm": 0.04523247852921486, "learning_rate": 0.01, "loss": 2.0129, "step": 26157 }, { "epoch": 2.688868331791551, "grad_norm": 0.04293215274810791, "learning_rate": 0.01, "loss": 1.9982, "step": 26160 }, { "epoch": 2.689176688251619, "grad_norm": 0.06344863772392273, "learning_rate": 0.01, "loss": 2.028, "step": 26163 }, { "epoch": 2.6894850447116867, "grad_norm": 0.047177109867334366, "learning_rate": 0.01, "loss": 2.0154, "step": 26166 }, { "epoch": 2.6897934011717544, "grad_norm": 0.09941892325878143, "learning_rate": 0.01, "loss": 2.0204, "step": 26169 }, { "epoch": 2.6901017576318225, "grad_norm": 0.04238753393292427, "learning_rate": 0.01, "loss": 2.0164, "step": 26172 }, { "epoch": 2.69041011409189, "grad_norm": 0.0550382174551487, "learning_rate": 0.01, "loss": 2.0283, "step": 26175 }, { "epoch": 2.690718470551958, "grad_norm": 0.09788551181554794, "learning_rate": 0.01, "loss": 2.0071, "step": 26178 }, { "epoch": 2.691026827012026, "grad_norm": 0.044239919632673264, "learning_rate": 0.01, "loss": 2.0163, "step": 26181 }, { "epoch": 2.691335183472094, "grad_norm": 0.10677097737789154, "learning_rate": 0.01, "loss": 2.0041, "step": 26184 }, { "epoch": 2.6916435399321617, "grad_norm": 0.04113283380866051, "learning_rate": 0.01, "loss": 2.0061, "step": 26187 }, { "epoch": 2.6919518963922293, "grad_norm": 0.05748388543725014, "learning_rate": 0.01, "loss": 2.0144, "step": 26190 }, { "epoch": 2.6922602528522974, "grad_norm": 0.04931147024035454, "learning_rate": 0.01, "loss": 1.9967, "step": 26193 }, { "epoch": 2.692568609312365, "grad_norm": 0.051582470536231995, "learning_rate": 0.01, "loss": 2.0358, "step": 26196 }, { "epoch": 2.6928769657724327, "grad_norm": 0.1054171696305275, "learning_rate": 0.01, "loss": 2.0273, "step": 26199 }, { "epoch": 2.693185322232501, "grad_norm": 0.09005532413721085, "learning_rate": 0.01, "loss": 2.0256, "step": 26202 }, { "epoch": 2.6934936786925685, "grad_norm": 0.08124802261590958, "learning_rate": 0.01, "loss": 2.0252, "step": 26205 }, { "epoch": 2.6938020351526366, "grad_norm": 0.06238120049238205, "learning_rate": 0.01, "loss": 1.9943, "step": 26208 }, { "epoch": 2.6941103916127043, "grad_norm": 0.0375673770904541, "learning_rate": 0.01, "loss": 2.0124, "step": 26211 }, { "epoch": 2.6944187480727724, "grad_norm": 0.04710889235138893, "learning_rate": 0.01, "loss": 2.0343, "step": 26214 }, { "epoch": 2.69472710453284, "grad_norm": 0.0509529784321785, "learning_rate": 0.01, "loss": 2.0015, "step": 26217 }, { "epoch": 2.6950354609929077, "grad_norm": 0.043452613055706024, "learning_rate": 0.01, "loss": 2.0079, "step": 26220 }, { "epoch": 2.695343817452976, "grad_norm": 0.07350075244903564, "learning_rate": 0.01, "loss": 2.0141, "step": 26223 }, { "epoch": 2.6956521739130435, "grad_norm": 0.07672197371721268, "learning_rate": 0.01, "loss": 2.0002, "step": 26226 }, { "epoch": 2.695960530373111, "grad_norm": 0.1255597323179245, "learning_rate": 0.01, "loss": 2.0403, "step": 26229 }, { "epoch": 2.6962688868331792, "grad_norm": 0.0636356994509697, "learning_rate": 0.01, "loss": 2.0438, "step": 26232 }, { "epoch": 2.696577243293247, "grad_norm": 0.06334753334522247, "learning_rate": 0.01, "loss": 2.0339, "step": 26235 }, { "epoch": 2.696885599753315, "grad_norm": 0.0592232346534729, "learning_rate": 0.01, "loss": 2.0223, "step": 26238 }, { "epoch": 2.6971939562133826, "grad_norm": 0.06983037292957306, "learning_rate": 0.01, "loss": 2.0418, "step": 26241 }, { "epoch": 2.6975023126734508, "grad_norm": 0.042423997074365616, "learning_rate": 0.01, "loss": 2.0219, "step": 26244 }, { "epoch": 2.6978106691335184, "grad_norm": 0.12152963131666183, "learning_rate": 0.01, "loss": 2.026, "step": 26247 }, { "epoch": 2.698119025593586, "grad_norm": 0.03922561556100845, "learning_rate": 0.01, "loss": 1.9994, "step": 26250 }, { "epoch": 2.698427382053654, "grad_norm": 0.04790705442428589, "learning_rate": 0.01, "loss": 2.0155, "step": 26253 }, { "epoch": 2.698735738513722, "grad_norm": 0.0596231147646904, "learning_rate": 0.01, "loss": 2.0317, "step": 26256 }, { "epoch": 2.6990440949737895, "grad_norm": 0.06809087842702866, "learning_rate": 0.01, "loss": 2.001, "step": 26259 }, { "epoch": 2.6993524514338576, "grad_norm": 0.08235184103250504, "learning_rate": 0.01, "loss": 2.0284, "step": 26262 }, { "epoch": 2.6996608078939253, "grad_norm": 0.07293444871902466, "learning_rate": 0.01, "loss": 2.0309, "step": 26265 }, { "epoch": 2.6999691643539934, "grad_norm": 0.04277535527944565, "learning_rate": 0.01, "loss": 2.0236, "step": 26268 }, { "epoch": 2.700277520814061, "grad_norm": 0.10841301828622818, "learning_rate": 0.01, "loss": 2.0275, "step": 26271 }, { "epoch": 2.700585877274129, "grad_norm": 0.04575572907924652, "learning_rate": 0.01, "loss": 2.0331, "step": 26274 }, { "epoch": 2.700894233734197, "grad_norm": 0.0422062948346138, "learning_rate": 0.01, "loss": 2.0441, "step": 26277 }, { "epoch": 2.7012025901942645, "grad_norm": 0.034286245703697205, "learning_rate": 0.01, "loss": 2.0178, "step": 26280 }, { "epoch": 2.7015109466543326, "grad_norm": 0.040598925203084946, "learning_rate": 0.01, "loss": 2.0192, "step": 26283 }, { "epoch": 2.7018193031144, "grad_norm": 0.20375369489192963, "learning_rate": 0.01, "loss": 1.9977, "step": 26286 }, { "epoch": 2.702127659574468, "grad_norm": 0.1908276230096817, "learning_rate": 0.01, "loss": 2.0169, "step": 26289 }, { "epoch": 2.702436016034536, "grad_norm": 0.06826306879520416, "learning_rate": 0.01, "loss": 2.0231, "step": 26292 }, { "epoch": 2.7027443724946036, "grad_norm": 0.03917758911848068, "learning_rate": 0.01, "loss": 1.9938, "step": 26295 }, { "epoch": 2.7030527289546717, "grad_norm": 0.041581716388463974, "learning_rate": 0.01, "loss": 2.0375, "step": 26298 }, { "epoch": 2.7033610854147394, "grad_norm": 0.03759411349892616, "learning_rate": 0.01, "loss": 2.0156, "step": 26301 }, { "epoch": 2.7036694418748075, "grad_norm": 0.043274398893117905, "learning_rate": 0.01, "loss": 2.025, "step": 26304 }, { "epoch": 2.703977798334875, "grad_norm": 0.03156547620892525, "learning_rate": 0.01, "loss": 2.0151, "step": 26307 }, { "epoch": 2.704286154794943, "grad_norm": 0.042134515941143036, "learning_rate": 0.01, "loss": 2.0363, "step": 26310 }, { "epoch": 2.704594511255011, "grad_norm": 0.15473207831382751, "learning_rate": 0.01, "loss": 2.0455, "step": 26313 }, { "epoch": 2.7049028677150786, "grad_norm": 0.16576655209064484, "learning_rate": 0.01, "loss": 1.9929, "step": 26316 }, { "epoch": 2.7052112241751463, "grad_norm": 0.2106962651014328, "learning_rate": 0.01, "loss": 2.0215, "step": 26319 }, { "epoch": 2.7055195806352144, "grad_norm": 0.06543057411909103, "learning_rate": 0.01, "loss": 2.0368, "step": 26322 }, { "epoch": 2.705827937095282, "grad_norm": 0.05747131630778313, "learning_rate": 0.01, "loss": 2.0092, "step": 26325 }, { "epoch": 2.70613629355535, "grad_norm": 0.03801654651761055, "learning_rate": 0.01, "loss": 1.9988, "step": 26328 }, { "epoch": 2.706444650015418, "grad_norm": 0.047034505754709244, "learning_rate": 0.01, "loss": 2.0524, "step": 26331 }, { "epoch": 2.706753006475486, "grad_norm": 0.03980104252696037, "learning_rate": 0.01, "loss": 1.9928, "step": 26334 }, { "epoch": 2.7070613629355536, "grad_norm": 0.04470902681350708, "learning_rate": 0.01, "loss": 2.0276, "step": 26337 }, { "epoch": 2.707369719395621, "grad_norm": 0.03287111967802048, "learning_rate": 0.01, "loss": 2.0186, "step": 26340 }, { "epoch": 2.7076780758556893, "grad_norm": 0.04395715892314911, "learning_rate": 0.01, "loss": 2.0066, "step": 26343 }, { "epoch": 2.707986432315757, "grad_norm": 0.06358876079320908, "learning_rate": 0.01, "loss": 2.0505, "step": 26346 }, { "epoch": 2.7082947887758246, "grad_norm": 0.03925269469618797, "learning_rate": 0.01, "loss": 2.0149, "step": 26349 }, { "epoch": 2.7086031452358927, "grad_norm": 0.16810861229896545, "learning_rate": 0.01, "loss": 2.0433, "step": 26352 }, { "epoch": 2.7089115016959604, "grad_norm": 0.09612049907445908, "learning_rate": 0.01, "loss": 2.0014, "step": 26355 }, { "epoch": 2.7092198581560285, "grad_norm": 0.048840370029211044, "learning_rate": 0.01, "loss": 2.0062, "step": 26358 }, { "epoch": 2.709528214616096, "grad_norm": 0.03775126487016678, "learning_rate": 0.01, "loss": 2.0149, "step": 26361 }, { "epoch": 2.7098365710761643, "grad_norm": 0.04588973522186279, "learning_rate": 0.01, "loss": 2.031, "step": 26364 }, { "epoch": 2.710144927536232, "grad_norm": 0.061829451471567154, "learning_rate": 0.01, "loss": 2.0076, "step": 26367 }, { "epoch": 2.7104532839962996, "grad_norm": 0.053572434931993484, "learning_rate": 0.01, "loss": 2.0167, "step": 26370 }, { "epoch": 2.7107616404563677, "grad_norm": 0.0460902638733387, "learning_rate": 0.01, "loss": 2.0515, "step": 26373 }, { "epoch": 2.7110699969164354, "grad_norm": 0.07622378319501877, "learning_rate": 0.01, "loss": 2.0229, "step": 26376 }, { "epoch": 2.711378353376503, "grad_norm": 0.04137422889471054, "learning_rate": 0.01, "loss": 1.9985, "step": 26379 }, { "epoch": 2.711686709836571, "grad_norm": 0.052519541233778, "learning_rate": 0.01, "loss": 2.0166, "step": 26382 }, { "epoch": 2.711995066296639, "grad_norm": 0.13548482954502106, "learning_rate": 0.01, "loss": 2.0221, "step": 26385 }, { "epoch": 2.712303422756707, "grad_norm": 0.05124654993414879, "learning_rate": 0.01, "loss": 2.02, "step": 26388 }, { "epoch": 2.7126117792167745, "grad_norm": 0.04233216866850853, "learning_rate": 0.01, "loss": 2.0131, "step": 26391 }, { "epoch": 2.7129201356768426, "grad_norm": 0.04341414198279381, "learning_rate": 0.01, "loss": 2.0088, "step": 26394 }, { "epoch": 2.7132284921369103, "grad_norm": 0.078862264752388, "learning_rate": 0.01, "loss": 2.0098, "step": 26397 }, { "epoch": 2.713536848596978, "grad_norm": 0.08351139724254608, "learning_rate": 0.01, "loss": 2.0078, "step": 26400 }, { "epoch": 2.713845205057046, "grad_norm": 0.10899659991264343, "learning_rate": 0.01, "loss": 2.0354, "step": 26403 }, { "epoch": 2.7141535615171137, "grad_norm": 0.11437533795833588, "learning_rate": 0.01, "loss": 2.0145, "step": 26406 }, { "epoch": 2.7144619179771814, "grad_norm": 0.04197553172707558, "learning_rate": 0.01, "loss": 2.01, "step": 26409 }, { "epoch": 2.7147702744372495, "grad_norm": 0.054637305438518524, "learning_rate": 0.01, "loss": 2.0087, "step": 26412 }, { "epoch": 2.715078630897317, "grad_norm": 0.058579228818416595, "learning_rate": 0.01, "loss": 2.0132, "step": 26415 }, { "epoch": 2.7153869873573853, "grad_norm": 0.03501029685139656, "learning_rate": 0.01, "loss": 2.0404, "step": 26418 }, { "epoch": 2.715695343817453, "grad_norm": 0.04865582287311554, "learning_rate": 0.01, "loss": 2.0221, "step": 26421 }, { "epoch": 2.716003700277521, "grad_norm": 0.08878735452890396, "learning_rate": 0.01, "loss": 2.0376, "step": 26424 }, { "epoch": 2.7163120567375887, "grad_norm": 0.06919421255588531, "learning_rate": 0.01, "loss": 2.0125, "step": 26427 }, { "epoch": 2.7166204131976563, "grad_norm": 0.05429752171039581, "learning_rate": 0.01, "loss": 2.0217, "step": 26430 }, { "epoch": 2.7169287696577245, "grad_norm": 0.10294333100318909, "learning_rate": 0.01, "loss": 2.0036, "step": 26433 }, { "epoch": 2.717237126117792, "grad_norm": 0.0625937432050705, "learning_rate": 0.01, "loss": 2.0137, "step": 26436 }, { "epoch": 2.7175454825778598, "grad_norm": 0.048780061304569244, "learning_rate": 0.01, "loss": 2.0324, "step": 26439 }, { "epoch": 2.717853839037928, "grad_norm": 0.03840133175253868, "learning_rate": 0.01, "loss": 2.0035, "step": 26442 }, { "epoch": 2.7181621954979955, "grad_norm": 0.04021105915307999, "learning_rate": 0.01, "loss": 2.0172, "step": 26445 }, { "epoch": 2.7184705519580636, "grad_norm": 0.04241623356938362, "learning_rate": 0.01, "loss": 2.0192, "step": 26448 }, { "epoch": 2.7187789084181313, "grad_norm": 0.04488721489906311, "learning_rate": 0.01, "loss": 2.0275, "step": 26451 }, { "epoch": 2.7190872648781994, "grad_norm": 0.09730737656354904, "learning_rate": 0.01, "loss": 2.0064, "step": 26454 }, { "epoch": 2.719395621338267, "grad_norm": 0.05743822455406189, "learning_rate": 0.01, "loss": 2.0294, "step": 26457 }, { "epoch": 2.7197039777983347, "grad_norm": 0.042568083852529526, "learning_rate": 0.01, "loss": 2.0172, "step": 26460 }, { "epoch": 2.720012334258403, "grad_norm": 0.050452932715415955, "learning_rate": 0.01, "loss": 2.0354, "step": 26463 }, { "epoch": 2.7203206907184705, "grad_norm": 0.04751294478774071, "learning_rate": 0.01, "loss": 2.0242, "step": 26466 }, { "epoch": 2.720629047178538, "grad_norm": 0.12711678445339203, "learning_rate": 0.01, "loss": 2.0163, "step": 26469 }, { "epoch": 2.7209374036386063, "grad_norm": 0.043656837195158005, "learning_rate": 0.01, "loss": 2.0234, "step": 26472 }, { "epoch": 2.721245760098674, "grad_norm": 0.061641763895750046, "learning_rate": 0.01, "loss": 2.0436, "step": 26475 }, { "epoch": 2.721554116558742, "grad_norm": 0.04843028262257576, "learning_rate": 0.01, "loss": 2.0422, "step": 26478 }, { "epoch": 2.7218624730188097, "grad_norm": 0.05009736493229866, "learning_rate": 0.01, "loss": 2.025, "step": 26481 }, { "epoch": 2.722170829478878, "grad_norm": 0.0330355204641819, "learning_rate": 0.01, "loss": 2.038, "step": 26484 }, { "epoch": 2.7224791859389454, "grad_norm": 0.05958884581923485, "learning_rate": 0.01, "loss": 1.9874, "step": 26487 }, { "epoch": 2.722787542399013, "grad_norm": 0.10207222402095795, "learning_rate": 0.01, "loss": 2.0332, "step": 26490 }, { "epoch": 2.723095898859081, "grad_norm": 0.06909897923469543, "learning_rate": 0.01, "loss": 2.0262, "step": 26493 }, { "epoch": 2.723404255319149, "grad_norm": 0.09485501050949097, "learning_rate": 0.01, "loss": 2.0064, "step": 26496 }, { "epoch": 2.7237126117792165, "grad_norm": 0.0501030869781971, "learning_rate": 0.01, "loss": 2.03, "step": 26499 }, { "epoch": 2.7240209682392846, "grad_norm": 0.07009368389844894, "learning_rate": 0.01, "loss": 2.0152, "step": 26502 }, { "epoch": 2.7243293246993523, "grad_norm": 0.0766390711069107, "learning_rate": 0.01, "loss": 2.0315, "step": 26505 }, { "epoch": 2.7246376811594204, "grad_norm": 0.050900235772132874, "learning_rate": 0.01, "loss": 1.9763, "step": 26508 }, { "epoch": 2.724946037619488, "grad_norm": 0.043463658541440964, "learning_rate": 0.01, "loss": 1.9971, "step": 26511 }, { "epoch": 2.725254394079556, "grad_norm": 0.07331634312868118, "learning_rate": 0.01, "loss": 2.0331, "step": 26514 }, { "epoch": 2.725562750539624, "grad_norm": 0.06863999366760254, "learning_rate": 0.01, "loss": 2.0025, "step": 26517 }, { "epoch": 2.7258711069996915, "grad_norm": 0.12374615669250488, "learning_rate": 0.01, "loss": 2.019, "step": 26520 }, { "epoch": 2.7261794634597596, "grad_norm": 0.05284014344215393, "learning_rate": 0.01, "loss": 2.0116, "step": 26523 }, { "epoch": 2.7264878199198272, "grad_norm": 0.07776612043380737, "learning_rate": 0.01, "loss": 2.0139, "step": 26526 }, { "epoch": 2.726796176379895, "grad_norm": 0.0325651615858078, "learning_rate": 0.01, "loss": 2.0118, "step": 26529 }, { "epoch": 2.727104532839963, "grad_norm": 0.03690047189593315, "learning_rate": 0.01, "loss": 2.029, "step": 26532 }, { "epoch": 2.7274128893000307, "grad_norm": 0.12211088836193085, "learning_rate": 0.01, "loss": 2.0267, "step": 26535 }, { "epoch": 2.727721245760099, "grad_norm": 0.05561113730072975, "learning_rate": 0.01, "loss": 2.0318, "step": 26538 }, { "epoch": 2.7280296022201664, "grad_norm": 0.05541878193616867, "learning_rate": 0.01, "loss": 2.0095, "step": 26541 }, { "epoch": 2.7283379586802345, "grad_norm": 0.037987880408763885, "learning_rate": 0.01, "loss": 2.0295, "step": 26544 }, { "epoch": 2.728646315140302, "grad_norm": 0.0748986229300499, "learning_rate": 0.01, "loss": 2.039, "step": 26547 }, { "epoch": 2.72895467160037, "grad_norm": 0.05377615988254547, "learning_rate": 0.01, "loss": 2.0223, "step": 26550 }, { "epoch": 2.729263028060438, "grad_norm": 0.11756724119186401, "learning_rate": 0.01, "loss": 2.0119, "step": 26553 }, { "epoch": 2.7295713845205056, "grad_norm": 0.046288661658763885, "learning_rate": 0.01, "loss": 2.0479, "step": 26556 }, { "epoch": 2.7298797409805733, "grad_norm": 0.041496362537145615, "learning_rate": 0.01, "loss": 2.0247, "step": 26559 }, { "epoch": 2.7301880974406414, "grad_norm": 0.036013491451740265, "learning_rate": 0.01, "loss": 2.0143, "step": 26562 }, { "epoch": 2.7304964539007095, "grad_norm": 0.044654857367277145, "learning_rate": 0.01, "loss": 2.0032, "step": 26565 }, { "epoch": 2.730804810360777, "grad_norm": 0.10827535390853882, "learning_rate": 0.01, "loss": 2.037, "step": 26568 }, { "epoch": 2.731113166820845, "grad_norm": 0.053766943514347076, "learning_rate": 0.01, "loss": 2.0142, "step": 26571 }, { "epoch": 2.731421523280913, "grad_norm": 0.10854397714138031, "learning_rate": 0.01, "loss": 2.0367, "step": 26574 }, { "epoch": 2.7317298797409806, "grad_norm": 0.10521306842565536, "learning_rate": 0.01, "loss": 2.0242, "step": 26577 }, { "epoch": 2.7320382362010482, "grad_norm": 0.06532178819179535, "learning_rate": 0.01, "loss": 2.0305, "step": 26580 }, { "epoch": 2.7323465926611163, "grad_norm": 0.06266357749700546, "learning_rate": 0.01, "loss": 2.0257, "step": 26583 }, { "epoch": 2.732654949121184, "grad_norm": 0.04027451574802399, "learning_rate": 0.01, "loss": 2.0301, "step": 26586 }, { "epoch": 2.7329633055812517, "grad_norm": 0.06135137006640434, "learning_rate": 0.01, "loss": 2.0116, "step": 26589 }, { "epoch": 2.7332716620413198, "grad_norm": 0.08157611638307571, "learning_rate": 0.01, "loss": 2.041, "step": 26592 }, { "epoch": 2.733580018501388, "grad_norm": 0.060507632791996, "learning_rate": 0.01, "loss": 2.0404, "step": 26595 }, { "epoch": 2.7338883749614555, "grad_norm": 0.07385814189910889, "learning_rate": 0.01, "loss": 2.0496, "step": 26598 }, { "epoch": 2.734196731421523, "grad_norm": 0.12423606961965561, "learning_rate": 0.01, "loss": 2.0281, "step": 26601 }, { "epoch": 2.7345050878815913, "grad_norm": 0.09299054741859436, "learning_rate": 0.01, "loss": 1.9909, "step": 26604 }, { "epoch": 2.734813444341659, "grad_norm": 0.07102067768573761, "learning_rate": 0.01, "loss": 2.0175, "step": 26607 }, { "epoch": 2.7351218008017266, "grad_norm": 0.036449629813432693, "learning_rate": 0.01, "loss": 2.0253, "step": 26610 }, { "epoch": 2.7354301572617947, "grad_norm": 0.11397985368967056, "learning_rate": 0.01, "loss": 2.0372, "step": 26613 }, { "epoch": 2.7357385137218624, "grad_norm": 0.0781063437461853, "learning_rate": 0.01, "loss": 2.0191, "step": 26616 }, { "epoch": 2.73604687018193, "grad_norm": 0.05996118485927582, "learning_rate": 0.01, "loss": 2.0221, "step": 26619 }, { "epoch": 2.736355226641998, "grad_norm": 0.05300947278738022, "learning_rate": 0.01, "loss": 2.032, "step": 26622 }, { "epoch": 2.7366635831020663, "grad_norm": 0.048238106071949005, "learning_rate": 0.01, "loss": 2.0335, "step": 26625 }, { "epoch": 2.736971939562134, "grad_norm": 0.09588511288166046, "learning_rate": 0.01, "loss": 2.0418, "step": 26628 }, { "epoch": 2.7372802960222016, "grad_norm": 0.04981528967618942, "learning_rate": 0.01, "loss": 2.0125, "step": 26631 }, { "epoch": 2.7375886524822697, "grad_norm": 0.06809774041175842, "learning_rate": 0.01, "loss": 2.0033, "step": 26634 }, { "epoch": 2.7378970089423373, "grad_norm": 0.08924854546785355, "learning_rate": 0.01, "loss": 2.0026, "step": 26637 }, { "epoch": 2.738205365402405, "grad_norm": 0.06717575341463089, "learning_rate": 0.01, "loss": 2.039, "step": 26640 }, { "epoch": 2.738513721862473, "grad_norm": 0.08358625322580338, "learning_rate": 0.01, "loss": 2.0165, "step": 26643 }, { "epoch": 2.7388220783225408, "grad_norm": 0.14468573033809662, "learning_rate": 0.01, "loss": 2.014, "step": 26646 }, { "epoch": 2.7391304347826084, "grad_norm": 0.0533704049885273, "learning_rate": 0.01, "loss": 2.0268, "step": 26649 }, { "epoch": 2.7394387912426765, "grad_norm": 0.052842382341623306, "learning_rate": 0.01, "loss": 2.0334, "step": 26652 }, { "epoch": 2.7397471477027446, "grad_norm": 0.04230334609746933, "learning_rate": 0.01, "loss": 2.0077, "step": 26655 }, { "epoch": 2.7400555041628123, "grad_norm": 0.1004776731133461, "learning_rate": 0.01, "loss": 2.0254, "step": 26658 }, { "epoch": 2.74036386062288, "grad_norm": 0.03724560886621475, "learning_rate": 0.01, "loss": 2.0188, "step": 26661 }, { "epoch": 2.740672217082948, "grad_norm": 0.04997468739748001, "learning_rate": 0.01, "loss": 2.0229, "step": 26664 }, { "epoch": 2.7409805735430157, "grad_norm": 0.08138549327850342, "learning_rate": 0.01, "loss": 2.0138, "step": 26667 }, { "epoch": 2.7412889300030834, "grad_norm": 0.06195824220776558, "learning_rate": 0.01, "loss": 2.0282, "step": 26670 }, { "epoch": 2.7415972864631515, "grad_norm": 0.07340802997350693, "learning_rate": 0.01, "loss": 2.0226, "step": 26673 }, { "epoch": 2.741905642923219, "grad_norm": 0.09338308125734329, "learning_rate": 0.01, "loss": 2.0374, "step": 26676 }, { "epoch": 2.742213999383287, "grad_norm": 0.03742242231965065, "learning_rate": 0.01, "loss": 2.0313, "step": 26679 }, { "epoch": 2.742522355843355, "grad_norm": 0.04461616277694702, "learning_rate": 0.01, "loss": 2.0463, "step": 26682 }, { "epoch": 2.742830712303423, "grad_norm": 0.062195319682359695, "learning_rate": 0.01, "loss": 1.9822, "step": 26685 }, { "epoch": 2.7431390687634907, "grad_norm": 0.12009324878454208, "learning_rate": 0.01, "loss": 2.003, "step": 26688 }, { "epoch": 2.7434474252235583, "grad_norm": 0.04727381840348244, "learning_rate": 0.01, "loss": 2.0264, "step": 26691 }, { "epoch": 2.7437557816836264, "grad_norm": 0.05080636590719223, "learning_rate": 0.01, "loss": 2.033, "step": 26694 }, { "epoch": 2.744064138143694, "grad_norm": 0.11046820133924484, "learning_rate": 0.01, "loss": 2.0399, "step": 26697 }, { "epoch": 2.7443724946037618, "grad_norm": 0.06709396839141846, "learning_rate": 0.01, "loss": 2.0171, "step": 26700 }, { "epoch": 2.74468085106383, "grad_norm": 0.05088644102215767, "learning_rate": 0.01, "loss": 2.0221, "step": 26703 }, { "epoch": 2.7449892075238975, "grad_norm": 0.06446198374032974, "learning_rate": 0.01, "loss": 2.0061, "step": 26706 }, { "epoch": 2.7452975639839656, "grad_norm": 0.07209669798612595, "learning_rate": 0.01, "loss": 2.0058, "step": 26709 }, { "epoch": 2.7456059204440333, "grad_norm": 0.0958387553691864, "learning_rate": 0.01, "loss": 2.0106, "step": 26712 }, { "epoch": 2.7459142769041014, "grad_norm": 0.04666278511285782, "learning_rate": 0.01, "loss": 2.0223, "step": 26715 }, { "epoch": 2.746222633364169, "grad_norm": 0.0713481456041336, "learning_rate": 0.01, "loss": 2.0357, "step": 26718 }, { "epoch": 2.7465309898242367, "grad_norm": 0.060584791004657745, "learning_rate": 0.01, "loss": 1.999, "step": 26721 }, { "epoch": 2.746839346284305, "grad_norm": 0.08711333572864532, "learning_rate": 0.01, "loss": 2.0386, "step": 26724 }, { "epoch": 2.7471477027443725, "grad_norm": 0.04297727718949318, "learning_rate": 0.01, "loss": 2.0163, "step": 26727 }, { "epoch": 2.74745605920444, "grad_norm": 0.06777355074882507, "learning_rate": 0.01, "loss": 2.0318, "step": 26730 }, { "epoch": 2.7477644156645082, "grad_norm": 0.05725346878170967, "learning_rate": 0.01, "loss": 2.0395, "step": 26733 }, { "epoch": 2.748072772124576, "grad_norm": 0.0601140521466732, "learning_rate": 0.01, "loss": 2.0176, "step": 26736 }, { "epoch": 2.748381128584644, "grad_norm": 0.04107888415455818, "learning_rate": 0.01, "loss": 1.9972, "step": 26739 }, { "epoch": 2.7486894850447117, "grad_norm": 0.03484128415584564, "learning_rate": 0.01, "loss": 2.0018, "step": 26742 }, { "epoch": 2.7489978415047798, "grad_norm": 0.05098375305533409, "learning_rate": 0.01, "loss": 1.9977, "step": 26745 }, { "epoch": 2.7493061979648474, "grad_norm": 0.1751684993505478, "learning_rate": 0.01, "loss": 2.0183, "step": 26748 }, { "epoch": 2.749614554424915, "grad_norm": 0.09155084937810898, "learning_rate": 0.01, "loss": 2.0239, "step": 26751 }, { "epoch": 2.749922910884983, "grad_norm": 0.05078737810254097, "learning_rate": 0.01, "loss": 2.0396, "step": 26754 }, { "epoch": 2.750231267345051, "grad_norm": 0.03406425565481186, "learning_rate": 0.01, "loss": 2.0378, "step": 26757 }, { "epoch": 2.7505396238051185, "grad_norm": 0.07126470655202866, "learning_rate": 0.01, "loss": 2.0391, "step": 26760 }, { "epoch": 2.7508479802651866, "grad_norm": 0.05165224149823189, "learning_rate": 0.01, "loss": 2.0454, "step": 26763 }, { "epoch": 2.7511563367252543, "grad_norm": 0.04024217650294304, "learning_rate": 0.01, "loss": 1.9988, "step": 26766 }, { "epoch": 2.7514646931853224, "grad_norm": 0.05387435853481293, "learning_rate": 0.01, "loss": 2.0263, "step": 26769 }, { "epoch": 2.75177304964539, "grad_norm": 0.11387746036052704, "learning_rate": 0.01, "loss": 2.0191, "step": 26772 }, { "epoch": 2.752081406105458, "grad_norm": 0.06504768133163452, "learning_rate": 0.01, "loss": 2.0359, "step": 26775 }, { "epoch": 2.752389762565526, "grad_norm": 0.07217580825090408, "learning_rate": 0.01, "loss": 1.9967, "step": 26778 }, { "epoch": 2.7526981190255935, "grad_norm": 0.07751470804214478, "learning_rate": 0.01, "loss": 2.0171, "step": 26781 }, { "epoch": 2.7530064754856616, "grad_norm": 0.06282947957515717, "learning_rate": 0.01, "loss": 2.0022, "step": 26784 }, { "epoch": 2.7533148319457292, "grad_norm": 0.04301406815648079, "learning_rate": 0.01, "loss": 2.0062, "step": 26787 }, { "epoch": 2.753623188405797, "grad_norm": 0.06154756247997284, "learning_rate": 0.01, "loss": 2.0212, "step": 26790 }, { "epoch": 2.753931544865865, "grad_norm": 0.04729504510760307, "learning_rate": 0.01, "loss": 2.0192, "step": 26793 }, { "epoch": 2.7542399013259327, "grad_norm": 0.06597118079662323, "learning_rate": 0.01, "loss": 2.0081, "step": 26796 }, { "epoch": 2.7545482577860008, "grad_norm": 0.050065554678440094, "learning_rate": 0.01, "loss": 2.0004, "step": 26799 }, { "epoch": 2.7548566142460684, "grad_norm": 0.048531875014305115, "learning_rate": 0.01, "loss": 2.0158, "step": 26802 }, { "epoch": 2.7551649707061365, "grad_norm": 0.04954374581575394, "learning_rate": 0.01, "loss": 2.007, "step": 26805 }, { "epoch": 2.755473327166204, "grad_norm": 0.06998781114816666, "learning_rate": 0.01, "loss": 2.0537, "step": 26808 }, { "epoch": 2.755781683626272, "grad_norm": 0.08933461457490921, "learning_rate": 0.01, "loss": 2.0058, "step": 26811 }, { "epoch": 2.75609004008634, "grad_norm": 0.06067082658410072, "learning_rate": 0.01, "loss": 2.0318, "step": 26814 }, { "epoch": 2.7563983965464076, "grad_norm": 0.07710520923137665, "learning_rate": 0.01, "loss": 2.0029, "step": 26817 }, { "epoch": 2.7567067530064753, "grad_norm": 0.05703970044851303, "learning_rate": 0.01, "loss": 2.0505, "step": 26820 }, { "epoch": 2.7570151094665434, "grad_norm": 0.1251015067100525, "learning_rate": 0.01, "loss": 2.0264, "step": 26823 }, { "epoch": 2.757323465926611, "grad_norm": 0.05942771956324577, "learning_rate": 0.01, "loss": 2.0368, "step": 26826 }, { "epoch": 2.757631822386679, "grad_norm": 0.03195277228951454, "learning_rate": 0.01, "loss": 1.9868, "step": 26829 }, { "epoch": 2.757940178846747, "grad_norm": 0.03609882667660713, "learning_rate": 0.01, "loss": 2.0208, "step": 26832 }, { "epoch": 2.758248535306815, "grad_norm": 0.04004659876227379, "learning_rate": 0.01, "loss": 1.9996, "step": 26835 }, { "epoch": 2.7585568917668826, "grad_norm": 0.06475567817687988, "learning_rate": 0.01, "loss": 2.0187, "step": 26838 }, { "epoch": 2.7588652482269502, "grad_norm": 0.04894067347049713, "learning_rate": 0.01, "loss": 2.0342, "step": 26841 }, { "epoch": 2.7591736046870183, "grad_norm": 0.11116486042737961, "learning_rate": 0.01, "loss": 2.0111, "step": 26844 }, { "epoch": 2.759481961147086, "grad_norm": 0.0832393690943718, "learning_rate": 0.01, "loss": 1.9986, "step": 26847 }, { "epoch": 2.7597903176071537, "grad_norm": 0.06433276832103729, "learning_rate": 0.01, "loss": 2.0233, "step": 26850 }, { "epoch": 2.7600986740672218, "grad_norm": 0.06255584210157394, "learning_rate": 0.01, "loss": 2.0097, "step": 26853 }, { "epoch": 2.7604070305272894, "grad_norm": 0.06610293686389923, "learning_rate": 0.01, "loss": 2.0223, "step": 26856 }, { "epoch": 2.7607153869873575, "grad_norm": 0.048567045480012894, "learning_rate": 0.01, "loss": 1.9865, "step": 26859 }, { "epoch": 2.761023743447425, "grad_norm": 0.06945166736841202, "learning_rate": 0.01, "loss": 2.0189, "step": 26862 }, { "epoch": 2.7613320999074933, "grad_norm": 0.07567547261714935, "learning_rate": 0.01, "loss": 2.0356, "step": 26865 }, { "epoch": 2.761640456367561, "grad_norm": 0.09087162464857101, "learning_rate": 0.01, "loss": 2.0395, "step": 26868 }, { "epoch": 2.7619488128276286, "grad_norm": 0.07904476672410965, "learning_rate": 0.01, "loss": 2.0028, "step": 26871 }, { "epoch": 2.7622571692876967, "grad_norm": 0.08835271000862122, "learning_rate": 0.01, "loss": 2.0364, "step": 26874 }, { "epoch": 2.7625655257477644, "grad_norm": 0.042680736631155014, "learning_rate": 0.01, "loss": 2.0206, "step": 26877 }, { "epoch": 2.762873882207832, "grad_norm": 0.034312695264816284, "learning_rate": 0.01, "loss": 2.0124, "step": 26880 }, { "epoch": 2.7631822386679, "grad_norm": 0.04073645919561386, "learning_rate": 0.01, "loss": 2.0207, "step": 26883 }, { "epoch": 2.763490595127968, "grad_norm": 0.11242496222257614, "learning_rate": 0.01, "loss": 2.0234, "step": 26886 }, { "epoch": 2.763798951588036, "grad_norm": 0.048365022987127304, "learning_rate": 0.01, "loss": 2.0223, "step": 26889 }, { "epoch": 2.7641073080481036, "grad_norm": 0.06508596241474152, "learning_rate": 0.01, "loss": 2.0067, "step": 26892 }, { "epoch": 2.7644156645081717, "grad_norm": 0.08372494578361511, "learning_rate": 0.01, "loss": 2.024, "step": 26895 }, { "epoch": 2.7647240209682393, "grad_norm": 0.08499015122652054, "learning_rate": 0.01, "loss": 2.0337, "step": 26898 }, { "epoch": 2.765032377428307, "grad_norm": 0.08214948326349258, "learning_rate": 0.01, "loss": 2.0235, "step": 26901 }, { "epoch": 2.765340733888375, "grad_norm": 0.0902111828327179, "learning_rate": 0.01, "loss": 2.0319, "step": 26904 }, { "epoch": 2.7656490903484428, "grad_norm": 0.0556274875998497, "learning_rate": 0.01, "loss": 2.0096, "step": 26907 }, { "epoch": 2.7659574468085104, "grad_norm": 0.06845957040786743, "learning_rate": 0.01, "loss": 1.9983, "step": 26910 }, { "epoch": 2.7662658032685785, "grad_norm": 0.051283106207847595, "learning_rate": 0.01, "loss": 2.009, "step": 26913 }, { "epoch": 2.766574159728646, "grad_norm": 0.03301118686795235, "learning_rate": 0.01, "loss": 1.9983, "step": 26916 }, { "epoch": 2.7668825161887143, "grad_norm": 0.05253078415989876, "learning_rate": 0.01, "loss": 2.0274, "step": 26919 }, { "epoch": 2.767190872648782, "grad_norm": 0.1089513972401619, "learning_rate": 0.01, "loss": 2.044, "step": 26922 }, { "epoch": 2.76749922910885, "grad_norm": 0.06042607128620148, "learning_rate": 0.01, "loss": 1.9767, "step": 26925 }, { "epoch": 2.7678075855689177, "grad_norm": 0.09722250699996948, "learning_rate": 0.01, "loss": 2.0136, "step": 26928 }, { "epoch": 2.7681159420289854, "grad_norm": 0.04542861133813858, "learning_rate": 0.01, "loss": 2.0, "step": 26931 }, { "epoch": 2.7684242984890535, "grad_norm": 0.14705310761928558, "learning_rate": 0.01, "loss": 2.0074, "step": 26934 }, { "epoch": 2.768732654949121, "grad_norm": 0.03334478661417961, "learning_rate": 0.01, "loss": 2.0104, "step": 26937 }, { "epoch": 2.769041011409189, "grad_norm": 0.054583244025707245, "learning_rate": 0.01, "loss": 2.0111, "step": 26940 }, { "epoch": 2.769349367869257, "grad_norm": 0.03972140699625015, "learning_rate": 0.01, "loss": 2.0327, "step": 26943 }, { "epoch": 2.7696577243293246, "grad_norm": 0.04227004200220108, "learning_rate": 0.01, "loss": 2.052, "step": 26946 }, { "epoch": 2.7699660807893927, "grad_norm": 0.035285089164972305, "learning_rate": 0.01, "loss": 2.0224, "step": 26949 }, { "epoch": 2.7702744372494603, "grad_norm": 0.08037975430488586, "learning_rate": 0.01, "loss": 2.0201, "step": 26952 }, { "epoch": 2.7705827937095284, "grad_norm": 0.06296487152576447, "learning_rate": 0.01, "loss": 2.0222, "step": 26955 }, { "epoch": 2.770891150169596, "grad_norm": 0.08724946528673172, "learning_rate": 0.01, "loss": 2.0515, "step": 26958 }, { "epoch": 2.7711995066296637, "grad_norm": 0.06455433368682861, "learning_rate": 0.01, "loss": 2.0573, "step": 26961 }, { "epoch": 2.771507863089732, "grad_norm": 0.07615622133016586, "learning_rate": 0.01, "loss": 2.0177, "step": 26964 }, { "epoch": 2.7718162195497995, "grad_norm": 0.06695935130119324, "learning_rate": 0.01, "loss": 2.031, "step": 26967 }, { "epoch": 2.772124576009867, "grad_norm": 0.04257410392165184, "learning_rate": 0.01, "loss": 2.0186, "step": 26970 }, { "epoch": 2.7724329324699353, "grad_norm": 0.1329718828201294, "learning_rate": 0.01, "loss": 2.014, "step": 26973 }, { "epoch": 2.772741288930003, "grad_norm": 0.06937066465616226, "learning_rate": 0.01, "loss": 2.0185, "step": 26976 }, { "epoch": 2.773049645390071, "grad_norm": 0.07017137110233307, "learning_rate": 0.01, "loss": 2.0066, "step": 26979 }, { "epoch": 2.7733580018501387, "grad_norm": 0.03589661046862602, "learning_rate": 0.01, "loss": 2.0338, "step": 26982 }, { "epoch": 2.773666358310207, "grad_norm": 0.04110831022262573, "learning_rate": 0.01, "loss": 2.0054, "step": 26985 }, { "epoch": 2.7739747147702745, "grad_norm": 0.06182010471820831, "learning_rate": 0.01, "loss": 2.029, "step": 26988 }, { "epoch": 2.774283071230342, "grad_norm": 0.08161917328834534, "learning_rate": 0.01, "loss": 2.0191, "step": 26991 }, { "epoch": 2.7745914276904102, "grad_norm": 0.05832149460911751, "learning_rate": 0.01, "loss": 2.0279, "step": 26994 }, { "epoch": 2.774899784150478, "grad_norm": 0.08298707008361816, "learning_rate": 0.01, "loss": 2.0351, "step": 26997 }, { "epoch": 2.7752081406105455, "grad_norm": 0.04794152081012726, "learning_rate": 0.01, "loss": 2.0158, "step": 27000 }, { "epoch": 2.7755164970706137, "grad_norm": 0.04620853811502457, "learning_rate": 0.01, "loss": 2.029, "step": 27003 }, { "epoch": 2.7758248535306813, "grad_norm": 0.045553676784038544, "learning_rate": 0.01, "loss": 1.9998, "step": 27006 }, { "epoch": 2.7761332099907494, "grad_norm": 0.058516908437013626, "learning_rate": 0.01, "loss": 2.0324, "step": 27009 }, { "epoch": 2.776441566450817, "grad_norm": 0.05661854147911072, "learning_rate": 0.01, "loss": 2.0331, "step": 27012 }, { "epoch": 2.776749922910885, "grad_norm": 0.05626550316810608, "learning_rate": 0.01, "loss": 2.016, "step": 27015 }, { "epoch": 2.777058279370953, "grad_norm": 0.08505663275718689, "learning_rate": 0.01, "loss": 2.014, "step": 27018 }, { "epoch": 2.7773666358310205, "grad_norm": 0.08449403196573257, "learning_rate": 0.01, "loss": 2.0141, "step": 27021 }, { "epoch": 2.7776749922910886, "grad_norm": 0.08090987801551819, "learning_rate": 0.01, "loss": 2.0313, "step": 27024 }, { "epoch": 2.7779833487511563, "grad_norm": 0.03180038183927536, "learning_rate": 0.01, "loss": 2.0105, "step": 27027 }, { "epoch": 2.778291705211224, "grad_norm": 0.03052234649658203, "learning_rate": 0.01, "loss": 2.0144, "step": 27030 }, { "epoch": 2.778600061671292, "grad_norm": 0.07030376046895981, "learning_rate": 0.01, "loss": 2.0101, "step": 27033 }, { "epoch": 2.7789084181313597, "grad_norm": 0.1097898781299591, "learning_rate": 0.01, "loss": 2.016, "step": 27036 }, { "epoch": 2.779216774591428, "grad_norm": 0.10938852280378342, "learning_rate": 0.01, "loss": 2.0358, "step": 27039 }, { "epoch": 2.7795251310514955, "grad_norm": 0.17036540806293488, "learning_rate": 0.01, "loss": 2.0242, "step": 27042 }, { "epoch": 2.7798334875115636, "grad_norm": 0.07512038946151733, "learning_rate": 0.01, "loss": 2.0142, "step": 27045 }, { "epoch": 2.780141843971631, "grad_norm": 0.03864987567067146, "learning_rate": 0.01, "loss": 1.993, "step": 27048 }, { "epoch": 2.780450200431699, "grad_norm": 0.07508687674999237, "learning_rate": 0.01, "loss": 2.0404, "step": 27051 }, { "epoch": 2.780758556891767, "grad_norm": 0.03866630047559738, "learning_rate": 0.01, "loss": 2.0285, "step": 27054 }, { "epoch": 2.7810669133518346, "grad_norm": 0.044196490198373795, "learning_rate": 0.01, "loss": 2.0015, "step": 27057 }, { "epoch": 2.7813752698119023, "grad_norm": 0.03751998767256737, "learning_rate": 0.01, "loss": 2.0142, "step": 27060 }, { "epoch": 2.7816836262719704, "grad_norm": 0.04259081184864044, "learning_rate": 0.01, "loss": 2.0344, "step": 27063 }, { "epoch": 2.781991982732038, "grad_norm": 0.04910355806350708, "learning_rate": 0.01, "loss": 2.0345, "step": 27066 }, { "epoch": 2.782300339192106, "grad_norm": 0.07969753444194794, "learning_rate": 0.01, "loss": 2.0147, "step": 27069 }, { "epoch": 2.782608695652174, "grad_norm": 0.04511536657810211, "learning_rate": 0.01, "loss": 2.0265, "step": 27072 }, { "epoch": 2.782917052112242, "grad_norm": 0.03906785696744919, "learning_rate": 0.01, "loss": 2.0169, "step": 27075 }, { "epoch": 2.7832254085723096, "grad_norm": 0.045391857624053955, "learning_rate": 0.01, "loss": 2.0256, "step": 27078 }, { "epoch": 2.7835337650323773, "grad_norm": 0.04607332870364189, "learning_rate": 0.01, "loss": 2.0379, "step": 27081 }, { "epoch": 2.7838421214924454, "grad_norm": 0.10543697327375412, "learning_rate": 0.01, "loss": 2.0022, "step": 27084 }, { "epoch": 2.784150477952513, "grad_norm": 0.045777421444654465, "learning_rate": 0.01, "loss": 2.0065, "step": 27087 }, { "epoch": 2.7844588344125807, "grad_norm": 0.11416932195425034, "learning_rate": 0.01, "loss": 2.0156, "step": 27090 }, { "epoch": 2.784767190872649, "grad_norm": 0.04630710557103157, "learning_rate": 0.01, "loss": 2.0109, "step": 27093 }, { "epoch": 2.785075547332717, "grad_norm": 0.1050376445055008, "learning_rate": 0.01, "loss": 2.0239, "step": 27096 }, { "epoch": 2.7853839037927846, "grad_norm": 0.04592348635196686, "learning_rate": 0.01, "loss": 2.0032, "step": 27099 }, { "epoch": 2.785692260252852, "grad_norm": 0.058079514652490616, "learning_rate": 0.01, "loss": 2.0067, "step": 27102 }, { "epoch": 2.7860006167129203, "grad_norm": 0.12133990228176117, "learning_rate": 0.01, "loss": 2.0296, "step": 27105 }, { "epoch": 2.786308973172988, "grad_norm": 0.09523560851812363, "learning_rate": 0.01, "loss": 2.0022, "step": 27108 }, { "epoch": 2.7866173296330556, "grad_norm": 0.05585847795009613, "learning_rate": 0.01, "loss": 2.0458, "step": 27111 }, { "epoch": 2.7869256860931237, "grad_norm": 0.03669681027531624, "learning_rate": 0.01, "loss": 2.0072, "step": 27114 }, { "epoch": 2.7872340425531914, "grad_norm": 0.05370241776108742, "learning_rate": 0.01, "loss": 2.0034, "step": 27117 }, { "epoch": 2.787542399013259, "grad_norm": 0.04395952448248863, "learning_rate": 0.01, "loss": 1.992, "step": 27120 }, { "epoch": 2.787850755473327, "grad_norm": 0.04012378677725792, "learning_rate": 0.01, "loss": 2.0269, "step": 27123 }, { "epoch": 2.7881591119333953, "grad_norm": 0.08143822848796844, "learning_rate": 0.01, "loss": 2.0385, "step": 27126 }, { "epoch": 2.788467468393463, "grad_norm": 0.05250631272792816, "learning_rate": 0.01, "loss": 2.0132, "step": 27129 }, { "epoch": 2.7887758248535306, "grad_norm": 0.09945302456617355, "learning_rate": 0.01, "loss": 2.0135, "step": 27132 }, { "epoch": 2.7890841813135987, "grad_norm": 0.06001584604382515, "learning_rate": 0.01, "loss": 1.9975, "step": 27135 }, { "epoch": 2.7893925377736664, "grad_norm": 0.05244366079568863, "learning_rate": 0.01, "loss": 2.0247, "step": 27138 }, { "epoch": 2.789700894233734, "grad_norm": 0.06054399535059929, "learning_rate": 0.01, "loss": 1.9925, "step": 27141 }, { "epoch": 2.790009250693802, "grad_norm": 0.10409083217382431, "learning_rate": 0.01, "loss": 2.005, "step": 27144 }, { "epoch": 2.79031760715387, "grad_norm": 0.06229739263653755, "learning_rate": 0.01, "loss": 2.0175, "step": 27147 }, { "epoch": 2.7906259636139374, "grad_norm": 0.06340862810611725, "learning_rate": 0.01, "loss": 2.0205, "step": 27150 }, { "epoch": 2.7909343200740055, "grad_norm": 0.035666175186634064, "learning_rate": 0.01, "loss": 2.0039, "step": 27153 }, { "epoch": 2.7912426765340737, "grad_norm": 0.043176453560590744, "learning_rate": 0.01, "loss": 2.0045, "step": 27156 }, { "epoch": 2.7915510329941413, "grad_norm": 0.042664237320423126, "learning_rate": 0.01, "loss": 2.0133, "step": 27159 }, { "epoch": 2.791859389454209, "grad_norm": 0.08263540267944336, "learning_rate": 0.01, "loss": 2.0327, "step": 27162 }, { "epoch": 2.792167745914277, "grad_norm": 0.05592063441872597, "learning_rate": 0.01, "loss": 2.012, "step": 27165 }, { "epoch": 2.7924761023743447, "grad_norm": 0.0740666538476944, "learning_rate": 0.01, "loss": 2.0348, "step": 27168 }, { "epoch": 2.7927844588344124, "grad_norm": 0.07478248327970505, "learning_rate": 0.01, "loss": 2.0143, "step": 27171 }, { "epoch": 2.7930928152944805, "grad_norm": 0.08470360189676285, "learning_rate": 0.01, "loss": 2.0075, "step": 27174 }, { "epoch": 2.793401171754548, "grad_norm": 0.04486660659313202, "learning_rate": 0.01, "loss": 1.9898, "step": 27177 }, { "epoch": 2.793709528214616, "grad_norm": 0.12479998171329498, "learning_rate": 0.01, "loss": 2.005, "step": 27180 }, { "epoch": 2.794017884674684, "grad_norm": 0.05652941018342972, "learning_rate": 0.01, "loss": 2.0248, "step": 27183 }, { "epoch": 2.794326241134752, "grad_norm": 0.054577797651290894, "learning_rate": 0.01, "loss": 2.0249, "step": 27186 }, { "epoch": 2.7946345975948197, "grad_norm": 0.03588878735899925, "learning_rate": 0.01, "loss": 2.0321, "step": 27189 }, { "epoch": 2.7949429540548874, "grad_norm": 0.0890214741230011, "learning_rate": 0.01, "loss": 2.0066, "step": 27192 }, { "epoch": 2.7952513105149555, "grad_norm": 0.0901033952832222, "learning_rate": 0.01, "loss": 2.0203, "step": 27195 }, { "epoch": 2.795559666975023, "grad_norm": 0.09306314587593079, "learning_rate": 0.01, "loss": 2.0337, "step": 27198 }, { "epoch": 2.7958680234350908, "grad_norm": 0.07897783815860748, "learning_rate": 0.01, "loss": 2.004, "step": 27201 }, { "epoch": 2.796176379895159, "grad_norm": 0.08055876195430756, "learning_rate": 0.01, "loss": 2.0122, "step": 27204 }, { "epoch": 2.7964847363552265, "grad_norm": 0.058927252888679504, "learning_rate": 0.01, "loss": 2.0178, "step": 27207 }, { "epoch": 2.7967930928152946, "grad_norm": 0.1303315907716751, "learning_rate": 0.01, "loss": 2.0186, "step": 27210 }, { "epoch": 2.7971014492753623, "grad_norm": 0.08321187645196915, "learning_rate": 0.01, "loss": 2.0038, "step": 27213 }, { "epoch": 2.7974098057354304, "grad_norm": 0.0715765729546547, "learning_rate": 0.01, "loss": 2.02, "step": 27216 }, { "epoch": 2.797718162195498, "grad_norm": 0.047501109540462494, "learning_rate": 0.01, "loss": 2.0185, "step": 27219 }, { "epoch": 2.7980265186555657, "grad_norm": 0.04297725111246109, "learning_rate": 0.01, "loss": 1.9893, "step": 27222 }, { "epoch": 2.798334875115634, "grad_norm": 0.08612347394227982, "learning_rate": 0.01, "loss": 2.0351, "step": 27225 }, { "epoch": 2.7986432315757015, "grad_norm": 0.06801822036504745, "learning_rate": 0.01, "loss": 2.0012, "step": 27228 }, { "epoch": 2.798951588035769, "grad_norm": 0.103379026055336, "learning_rate": 0.01, "loss": 2.0172, "step": 27231 }, { "epoch": 2.7992599444958373, "grad_norm": 0.040429919958114624, "learning_rate": 0.01, "loss": 2.0289, "step": 27234 }, { "epoch": 2.799568300955905, "grad_norm": 0.09817671030759811, "learning_rate": 0.01, "loss": 2.0055, "step": 27237 }, { "epoch": 2.799876657415973, "grad_norm": 0.06191599741578102, "learning_rate": 0.01, "loss": 2.0032, "step": 27240 }, { "epoch": 2.8001850138760407, "grad_norm": 0.07992551475763321, "learning_rate": 0.01, "loss": 2.0203, "step": 27243 }, { "epoch": 2.800493370336109, "grad_norm": 0.055606722831726074, "learning_rate": 0.01, "loss": 2.0185, "step": 27246 }, { "epoch": 2.8008017267961764, "grad_norm": 0.1322477012872696, "learning_rate": 0.01, "loss": 2.0215, "step": 27249 }, { "epoch": 2.801110083256244, "grad_norm": 0.05603817105293274, "learning_rate": 0.01, "loss": 2.039, "step": 27252 }, { "epoch": 2.801418439716312, "grad_norm": 0.06935392320156097, "learning_rate": 0.01, "loss": 2.0119, "step": 27255 }, { "epoch": 2.80172679617638, "grad_norm": 0.04515109956264496, "learning_rate": 0.01, "loss": 2.053, "step": 27258 }, { "epoch": 2.8020351526364475, "grad_norm": 0.050864629447460175, "learning_rate": 0.01, "loss": 2.0042, "step": 27261 }, { "epoch": 2.8023435090965156, "grad_norm": 0.038876548409461975, "learning_rate": 0.01, "loss": 1.9945, "step": 27264 }, { "epoch": 2.8026518655565833, "grad_norm": 0.03906438872218132, "learning_rate": 0.01, "loss": 1.9832, "step": 27267 }, { "epoch": 2.8029602220166514, "grad_norm": 0.08978519588708878, "learning_rate": 0.01, "loss": 2.0292, "step": 27270 }, { "epoch": 2.803268578476719, "grad_norm": 0.07353704422712326, "learning_rate": 0.01, "loss": 2.0117, "step": 27273 }, { "epoch": 2.803576934936787, "grad_norm": 0.0424012616276741, "learning_rate": 0.01, "loss": 1.9931, "step": 27276 }, { "epoch": 2.803885291396855, "grad_norm": 0.05939796194434166, "learning_rate": 0.01, "loss": 2.0089, "step": 27279 }, { "epoch": 2.8041936478569225, "grad_norm": 0.06859377771615982, "learning_rate": 0.01, "loss": 2.005, "step": 27282 }, { "epoch": 2.8045020043169906, "grad_norm": 0.1016155332326889, "learning_rate": 0.01, "loss": 2.0207, "step": 27285 }, { "epoch": 2.8048103607770583, "grad_norm": 0.05608060583472252, "learning_rate": 0.01, "loss": 2.0117, "step": 27288 }, { "epoch": 2.805118717237126, "grad_norm": 0.043284934014081955, "learning_rate": 0.01, "loss": 1.9931, "step": 27291 }, { "epoch": 2.805427073697194, "grad_norm": 0.04967540502548218, "learning_rate": 0.01, "loss": 2.0222, "step": 27294 }, { "epoch": 2.8057354301572617, "grad_norm": 0.040739741176366806, "learning_rate": 0.01, "loss": 2.0148, "step": 27297 }, { "epoch": 2.80604378661733, "grad_norm": 0.039170749485492706, "learning_rate": 0.01, "loss": 2.0277, "step": 27300 }, { "epoch": 2.8063521430773974, "grad_norm": 0.040820952504873276, "learning_rate": 0.01, "loss": 2.0151, "step": 27303 }, { "epoch": 2.8066604995374655, "grad_norm": 0.07061021775007248, "learning_rate": 0.01, "loss": 2.0072, "step": 27306 }, { "epoch": 2.806968855997533, "grad_norm": 0.054698627442121506, "learning_rate": 0.01, "loss": 2.0059, "step": 27309 }, { "epoch": 2.807277212457601, "grad_norm": 0.04737408831715584, "learning_rate": 0.01, "loss": 2.0109, "step": 27312 }, { "epoch": 2.807585568917669, "grad_norm": 0.11695525050163269, "learning_rate": 0.01, "loss": 2.0076, "step": 27315 }, { "epoch": 2.8078939253777366, "grad_norm": 0.0921463891863823, "learning_rate": 0.01, "loss": 2.039, "step": 27318 }, { "epoch": 2.8082022818378043, "grad_norm": 0.03874325007200241, "learning_rate": 0.01, "loss": 2.0212, "step": 27321 }, { "epoch": 2.8085106382978724, "grad_norm": 0.04040201008319855, "learning_rate": 0.01, "loss": 1.9732, "step": 27324 }, { "epoch": 2.80881899475794, "grad_norm": 0.035648833960294724, "learning_rate": 0.01, "loss": 2.0269, "step": 27327 }, { "epoch": 2.809127351218008, "grad_norm": 0.0893108919262886, "learning_rate": 0.01, "loss": 1.9982, "step": 27330 }, { "epoch": 2.809435707678076, "grad_norm": 0.1203017309308052, "learning_rate": 0.01, "loss": 2.0138, "step": 27333 }, { "epoch": 2.809744064138144, "grad_norm": 0.045630257576704025, "learning_rate": 0.01, "loss": 2.0178, "step": 27336 }, { "epoch": 2.8100524205982116, "grad_norm": 0.04097437858581543, "learning_rate": 0.01, "loss": 1.9842, "step": 27339 }, { "epoch": 2.8103607770582792, "grad_norm": 0.05476262420415878, "learning_rate": 0.01, "loss": 2.0238, "step": 27342 }, { "epoch": 2.8106691335183474, "grad_norm": 0.05577860027551651, "learning_rate": 0.01, "loss": 2.0396, "step": 27345 }, { "epoch": 2.810977489978415, "grad_norm": 0.04357774555683136, "learning_rate": 0.01, "loss": 2.0158, "step": 27348 }, { "epoch": 2.8112858464384827, "grad_norm": 0.04594416916370392, "learning_rate": 0.01, "loss": 2.0197, "step": 27351 }, { "epoch": 2.8115942028985508, "grad_norm": 0.041454486548900604, "learning_rate": 0.01, "loss": 2.014, "step": 27354 }, { "epoch": 2.8119025593586184, "grad_norm": 0.05279424414038658, "learning_rate": 0.01, "loss": 2.0261, "step": 27357 }, { "epoch": 2.8122109158186865, "grad_norm": 0.10371188074350357, "learning_rate": 0.01, "loss": 2.0333, "step": 27360 }, { "epoch": 2.812519272278754, "grad_norm": 0.053941383957862854, "learning_rate": 0.01, "loss": 2.0067, "step": 27363 }, { "epoch": 2.8128276287388223, "grad_norm": 0.12082924693822861, "learning_rate": 0.01, "loss": 2.0177, "step": 27366 }, { "epoch": 2.81313598519889, "grad_norm": 0.11066311597824097, "learning_rate": 0.01, "loss": 2.0101, "step": 27369 }, { "epoch": 2.8134443416589576, "grad_norm": 0.07547413557767868, "learning_rate": 0.01, "loss": 2.0079, "step": 27372 }, { "epoch": 2.8137526981190257, "grad_norm": 0.06772000342607498, "learning_rate": 0.01, "loss": 2.0077, "step": 27375 }, { "epoch": 2.8140610545790934, "grad_norm": 0.03906597942113876, "learning_rate": 0.01, "loss": 1.9984, "step": 27378 }, { "epoch": 2.814369411039161, "grad_norm": 0.048235367983579636, "learning_rate": 0.01, "loss": 2.0233, "step": 27381 }, { "epoch": 2.814677767499229, "grad_norm": 0.08060193806886673, "learning_rate": 0.01, "loss": 2.0238, "step": 27384 }, { "epoch": 2.814986123959297, "grad_norm": 0.06527181714773178, "learning_rate": 0.01, "loss": 1.9996, "step": 27387 }, { "epoch": 2.815294480419365, "grad_norm": 0.04199523106217384, "learning_rate": 0.01, "loss": 2.0233, "step": 27390 }, { "epoch": 2.8156028368794326, "grad_norm": 0.07436412572860718, "learning_rate": 0.01, "loss": 1.992, "step": 27393 }, { "epoch": 2.8159111933395007, "grad_norm": 0.05724874138832092, "learning_rate": 0.01, "loss": 1.9834, "step": 27396 }, { "epoch": 2.8162195497995683, "grad_norm": 0.03609538823366165, "learning_rate": 0.01, "loss": 2.0376, "step": 27399 }, { "epoch": 2.816527906259636, "grad_norm": 0.03008181042969227, "learning_rate": 0.01, "loss": 2.0129, "step": 27402 }, { "epoch": 2.816836262719704, "grad_norm": 0.0635017454624176, "learning_rate": 0.01, "loss": 2.0, "step": 27405 }, { "epoch": 2.8171446191797718, "grad_norm": 0.07800552248954773, "learning_rate": 0.01, "loss": 2.0143, "step": 27408 }, { "epoch": 2.8174529756398394, "grad_norm": 0.09289496392011642, "learning_rate": 0.01, "loss": 1.991, "step": 27411 }, { "epoch": 2.8177613320999075, "grad_norm": 0.0791633352637291, "learning_rate": 0.01, "loss": 2.033, "step": 27414 }, { "epoch": 2.818069688559975, "grad_norm": 0.06715335696935654, "learning_rate": 0.01, "loss": 2.0149, "step": 27417 }, { "epoch": 2.8183780450200433, "grad_norm": 0.0814514085650444, "learning_rate": 0.01, "loss": 2.025, "step": 27420 }, { "epoch": 2.818686401480111, "grad_norm": 0.09124473482370377, "learning_rate": 0.01, "loss": 2.0017, "step": 27423 }, { "epoch": 2.818994757940179, "grad_norm": 0.03711434826254845, "learning_rate": 0.01, "loss": 2.0231, "step": 27426 }, { "epoch": 2.8193031144002467, "grad_norm": 0.05914726108312607, "learning_rate": 0.01, "loss": 2.0047, "step": 27429 }, { "epoch": 2.8196114708603144, "grad_norm": 0.04358556494116783, "learning_rate": 0.01, "loss": 2.0211, "step": 27432 }, { "epoch": 2.8199198273203825, "grad_norm": 0.1319088488817215, "learning_rate": 0.01, "loss": 2.0186, "step": 27435 }, { "epoch": 2.82022818378045, "grad_norm": 0.07464331388473511, "learning_rate": 0.01, "loss": 2.0434, "step": 27438 }, { "epoch": 2.820536540240518, "grad_norm": 0.08253163844347, "learning_rate": 0.01, "loss": 2.0172, "step": 27441 }, { "epoch": 2.820844896700586, "grad_norm": 0.042612046003341675, "learning_rate": 0.01, "loss": 2.0227, "step": 27444 }, { "epoch": 2.8211532531606536, "grad_norm": 0.04726095870137215, "learning_rate": 0.01, "loss": 2.0301, "step": 27447 }, { "epoch": 2.8214616096207217, "grad_norm": 0.06982813775539398, "learning_rate": 0.01, "loss": 2.0424, "step": 27450 }, { "epoch": 2.8217699660807893, "grad_norm": 0.051457736641168594, "learning_rate": 0.01, "loss": 2.0424, "step": 27453 }, { "epoch": 2.8220783225408574, "grad_norm": 0.07463426142930984, "learning_rate": 0.01, "loss": 2.012, "step": 27456 }, { "epoch": 2.822386679000925, "grad_norm": 0.048826638609170914, "learning_rate": 0.01, "loss": 2.0303, "step": 27459 }, { "epoch": 2.8226950354609928, "grad_norm": 0.05443079397082329, "learning_rate": 0.01, "loss": 2.0424, "step": 27462 }, { "epoch": 2.823003391921061, "grad_norm": 0.10064379125833511, "learning_rate": 0.01, "loss": 2.0166, "step": 27465 }, { "epoch": 2.8233117483811285, "grad_norm": 0.0755578950047493, "learning_rate": 0.01, "loss": 1.9922, "step": 27468 }, { "epoch": 2.823620104841196, "grad_norm": 0.05240621045231819, "learning_rate": 0.01, "loss": 2.0092, "step": 27471 }, { "epoch": 2.8239284613012643, "grad_norm": 0.08781099319458008, "learning_rate": 0.01, "loss": 1.9954, "step": 27474 }, { "epoch": 2.824236817761332, "grad_norm": 0.04524267092347145, "learning_rate": 0.01, "loss": 2.0327, "step": 27477 }, { "epoch": 2.8245451742214, "grad_norm": 0.05119558051228523, "learning_rate": 0.01, "loss": 2.0062, "step": 27480 }, { "epoch": 2.8248535306814677, "grad_norm": 0.06969384104013443, "learning_rate": 0.01, "loss": 2.0216, "step": 27483 }, { "epoch": 2.825161887141536, "grad_norm": 0.09887497872114182, "learning_rate": 0.01, "loss": 2.0033, "step": 27486 }, { "epoch": 2.8254702436016035, "grad_norm": 0.04231324791908264, "learning_rate": 0.01, "loss": 2.0145, "step": 27489 }, { "epoch": 2.825778600061671, "grad_norm": 0.11022655665874481, "learning_rate": 0.01, "loss": 2.0218, "step": 27492 }, { "epoch": 2.8260869565217392, "grad_norm": 0.16821467876434326, "learning_rate": 0.01, "loss": 2.0137, "step": 27495 }, { "epoch": 2.826395312981807, "grad_norm": 0.13946877419948578, "learning_rate": 0.01, "loss": 2.0046, "step": 27498 }, { "epoch": 2.8267036694418746, "grad_norm": 0.07387804985046387, "learning_rate": 0.01, "loss": 2.0294, "step": 27501 }, { "epoch": 2.8270120259019427, "grad_norm": 0.06437870115041733, "learning_rate": 0.01, "loss": 2.0307, "step": 27504 }, { "epoch": 2.8273203823620103, "grad_norm": 0.04310622811317444, "learning_rate": 0.01, "loss": 2.0155, "step": 27507 }, { "epoch": 2.8276287388220784, "grad_norm": 0.04511455446481705, "learning_rate": 0.01, "loss": 2.0243, "step": 27510 }, { "epoch": 2.827937095282146, "grad_norm": 0.05232998728752136, "learning_rate": 0.01, "loss": 2.0523, "step": 27513 }, { "epoch": 2.828245451742214, "grad_norm": 0.03596142679452896, "learning_rate": 0.01, "loss": 2.0137, "step": 27516 }, { "epoch": 2.828553808202282, "grad_norm": 0.035050373524427414, "learning_rate": 0.01, "loss": 2.0141, "step": 27519 }, { "epoch": 2.8288621646623495, "grad_norm": 0.03525279834866524, "learning_rate": 0.01, "loss": 2.007, "step": 27522 }, { "epoch": 2.8291705211224176, "grad_norm": 0.1629800945520401, "learning_rate": 0.01, "loss": 2.0266, "step": 27525 }, { "epoch": 2.8294788775824853, "grad_norm": 0.06687575578689575, "learning_rate": 0.01, "loss": 2.0099, "step": 27528 }, { "epoch": 2.829787234042553, "grad_norm": 0.05308271199464798, "learning_rate": 0.01, "loss": 2.0116, "step": 27531 }, { "epoch": 2.830095590502621, "grad_norm": 0.05350314825773239, "learning_rate": 0.01, "loss": 1.9883, "step": 27534 }, { "epoch": 2.8304039469626887, "grad_norm": 0.03929050639271736, "learning_rate": 0.01, "loss": 2.024, "step": 27537 }, { "epoch": 2.830712303422757, "grad_norm": 0.037572652101516724, "learning_rate": 0.01, "loss": 2.02, "step": 27540 }, { "epoch": 2.8310206598828245, "grad_norm": 0.05316625535488129, "learning_rate": 0.01, "loss": 2.0101, "step": 27543 }, { "epoch": 2.8313290163428926, "grad_norm": 0.04561341553926468, "learning_rate": 0.01, "loss": 2.0185, "step": 27546 }, { "epoch": 2.8316373728029602, "grad_norm": 0.055242884904146194, "learning_rate": 0.01, "loss": 2.0114, "step": 27549 }, { "epoch": 2.831945729263028, "grad_norm": 0.057326629757881165, "learning_rate": 0.01, "loss": 2.0339, "step": 27552 }, { "epoch": 2.832254085723096, "grad_norm": 0.050966937094926834, "learning_rate": 0.01, "loss": 2.0338, "step": 27555 }, { "epoch": 2.8325624421831637, "grad_norm": 0.07518629729747772, "learning_rate": 0.01, "loss": 2.0217, "step": 27558 }, { "epoch": 2.8328707986432313, "grad_norm": 0.10435313731431961, "learning_rate": 0.01, "loss": 2.0082, "step": 27561 }, { "epoch": 2.8331791551032994, "grad_norm": 0.07228770107030869, "learning_rate": 0.01, "loss": 2.0041, "step": 27564 }, { "epoch": 2.833487511563367, "grad_norm": 0.06778567284345627, "learning_rate": 0.01, "loss": 2.0248, "step": 27567 }, { "epoch": 2.833795868023435, "grad_norm": 0.03698448836803436, "learning_rate": 0.01, "loss": 1.9949, "step": 27570 }, { "epoch": 2.834104224483503, "grad_norm": 0.027820177376270294, "learning_rate": 0.01, "loss": 2.0141, "step": 27573 }, { "epoch": 2.834412580943571, "grad_norm": 0.07996013760566711, "learning_rate": 0.01, "loss": 2.0249, "step": 27576 }, { "epoch": 2.8347209374036386, "grad_norm": 0.04629092290997505, "learning_rate": 0.01, "loss": 1.9933, "step": 27579 }, { "epoch": 2.8350292938637063, "grad_norm": 0.04143141210079193, "learning_rate": 0.01, "loss": 2.039, "step": 27582 }, { "epoch": 2.8353376503237744, "grad_norm": 0.12624233961105347, "learning_rate": 0.01, "loss": 2.0228, "step": 27585 }, { "epoch": 2.835646006783842, "grad_norm": 0.1815887838602066, "learning_rate": 0.01, "loss": 2.0228, "step": 27588 }, { "epoch": 2.8359543632439097, "grad_norm": 0.14360418915748596, "learning_rate": 0.01, "loss": 2.0339, "step": 27591 }, { "epoch": 2.836262719703978, "grad_norm": 0.10748742520809174, "learning_rate": 0.01, "loss": 2.0204, "step": 27594 }, { "epoch": 2.836571076164046, "grad_norm": 0.0488545261323452, "learning_rate": 0.01, "loss": 2.0278, "step": 27597 }, { "epoch": 2.8368794326241136, "grad_norm": 0.03699369728565216, "learning_rate": 0.01, "loss": 2.0013, "step": 27600 }, { "epoch": 2.8371877890841812, "grad_norm": 0.04813402146100998, "learning_rate": 0.01, "loss": 2.0025, "step": 27603 }, { "epoch": 2.8374961455442493, "grad_norm": 0.06493838876485825, "learning_rate": 0.01, "loss": 2.0502, "step": 27606 }, { "epoch": 2.837804502004317, "grad_norm": 0.05451219528913498, "learning_rate": 0.01, "loss": 1.9959, "step": 27609 }, { "epoch": 2.8381128584643847, "grad_norm": 0.04877667874097824, "learning_rate": 0.01, "loss": 2.0236, "step": 27612 }, { "epoch": 2.8384212149244528, "grad_norm": 0.05827799066901207, "learning_rate": 0.01, "loss": 2.0247, "step": 27615 }, { "epoch": 2.8387295713845204, "grad_norm": 0.040546271950006485, "learning_rate": 0.01, "loss": 2.0061, "step": 27618 }, { "epoch": 2.839037927844588, "grad_norm": 0.03764180466532707, "learning_rate": 0.01, "loss": 2.0076, "step": 27621 }, { "epoch": 2.839346284304656, "grad_norm": 0.06349320709705353, "learning_rate": 0.01, "loss": 2.0162, "step": 27624 }, { "epoch": 2.8396546407647243, "grad_norm": 0.07079531252384186, "learning_rate": 0.01, "loss": 2.0038, "step": 27627 }, { "epoch": 2.839962997224792, "grad_norm": 0.05074724927544594, "learning_rate": 0.01, "loss": 2.0329, "step": 27630 }, { "epoch": 2.8402713536848596, "grad_norm": 0.10624159872531891, "learning_rate": 0.01, "loss": 2.0454, "step": 27633 }, { "epoch": 2.8405797101449277, "grad_norm": 0.08726594597101212, "learning_rate": 0.01, "loss": 1.9982, "step": 27636 }, { "epoch": 2.8408880666049954, "grad_norm": 0.04581126943230629, "learning_rate": 0.01, "loss": 2.0092, "step": 27639 }, { "epoch": 2.841196423065063, "grad_norm": 0.042876582592725754, "learning_rate": 0.01, "loss": 1.9851, "step": 27642 }, { "epoch": 2.841504779525131, "grad_norm": 0.03951287642121315, "learning_rate": 0.01, "loss": 2.0118, "step": 27645 }, { "epoch": 2.841813135985199, "grad_norm": 0.04439757391810417, "learning_rate": 0.01, "loss": 2.0018, "step": 27648 }, { "epoch": 2.8421214924452665, "grad_norm": 0.05910231173038483, "learning_rate": 0.01, "loss": 2.025, "step": 27651 }, { "epoch": 2.8424298489053346, "grad_norm": 0.1063590720295906, "learning_rate": 0.01, "loss": 2.0129, "step": 27654 }, { "epoch": 2.8427382053654027, "grad_norm": 0.042655881494283676, "learning_rate": 0.01, "loss": 2.004, "step": 27657 }, { "epoch": 2.8430465618254703, "grad_norm": 0.037402208894491196, "learning_rate": 0.01, "loss": 2.0025, "step": 27660 }, { "epoch": 2.843354918285538, "grad_norm": 0.03912290558218956, "learning_rate": 0.01, "loss": 2.0187, "step": 27663 }, { "epoch": 2.843663274745606, "grad_norm": 0.1316000372171402, "learning_rate": 0.01, "loss": 2.0089, "step": 27666 }, { "epoch": 2.8439716312056738, "grad_norm": 0.10243986546993256, "learning_rate": 0.01, "loss": 2.0419, "step": 27669 }, { "epoch": 2.8442799876657414, "grad_norm": 0.09918151050806046, "learning_rate": 0.01, "loss": 2.0299, "step": 27672 }, { "epoch": 2.8445883441258095, "grad_norm": 0.0572928749024868, "learning_rate": 0.01, "loss": 2.0254, "step": 27675 }, { "epoch": 2.844896700585877, "grad_norm": 0.09781496226787567, "learning_rate": 0.01, "loss": 1.9992, "step": 27678 }, { "epoch": 2.845205057045945, "grad_norm": 0.04969809949398041, "learning_rate": 0.01, "loss": 2.0099, "step": 27681 }, { "epoch": 2.845513413506013, "grad_norm": 0.0894259363412857, "learning_rate": 0.01, "loss": 2.0065, "step": 27684 }, { "epoch": 2.845821769966081, "grad_norm": 0.05788953974843025, "learning_rate": 0.01, "loss": 2.0217, "step": 27687 }, { "epoch": 2.8461301264261487, "grad_norm": 0.07628165185451508, "learning_rate": 0.01, "loss": 2.0191, "step": 27690 }, { "epoch": 2.8464384828862164, "grad_norm": 0.07799988240003586, "learning_rate": 0.01, "loss": 1.9947, "step": 27693 }, { "epoch": 2.8467468393462845, "grad_norm": 0.08373581618070602, "learning_rate": 0.01, "loss": 2.0351, "step": 27696 }, { "epoch": 2.847055195806352, "grad_norm": 0.054138489067554474, "learning_rate": 0.01, "loss": 2.0127, "step": 27699 }, { "epoch": 2.84736355226642, "grad_norm": 0.03457311540842056, "learning_rate": 0.01, "loss": 2.0082, "step": 27702 }, { "epoch": 2.847671908726488, "grad_norm": 0.04888417571783066, "learning_rate": 0.01, "loss": 2.0034, "step": 27705 }, { "epoch": 2.8479802651865556, "grad_norm": 0.09924766421318054, "learning_rate": 0.01, "loss": 2.0514, "step": 27708 }, { "epoch": 2.848288621646623, "grad_norm": 0.0531487911939621, "learning_rate": 0.01, "loss": 2.01, "step": 27711 }, { "epoch": 2.8485969781066913, "grad_norm": 0.07373910397291183, "learning_rate": 0.01, "loss": 2.0177, "step": 27714 }, { "epoch": 2.8489053345667594, "grad_norm": 0.09154459089040756, "learning_rate": 0.01, "loss": 2.0375, "step": 27717 }, { "epoch": 2.849213691026827, "grad_norm": 0.04007700830698013, "learning_rate": 0.01, "loss": 2.0447, "step": 27720 }, { "epoch": 2.8495220474868947, "grad_norm": 0.06406736373901367, "learning_rate": 0.01, "loss": 2.0262, "step": 27723 }, { "epoch": 2.849830403946963, "grad_norm": 0.06983067840337753, "learning_rate": 0.01, "loss": 2.0126, "step": 27726 }, { "epoch": 2.8501387604070305, "grad_norm": 0.04832616075873375, "learning_rate": 0.01, "loss": 2.0454, "step": 27729 }, { "epoch": 2.850447116867098, "grad_norm": 0.09404818713665009, "learning_rate": 0.01, "loss": 2.0239, "step": 27732 }, { "epoch": 2.8507554733271663, "grad_norm": 0.12212470918893814, "learning_rate": 0.01, "loss": 1.9912, "step": 27735 }, { "epoch": 2.851063829787234, "grad_norm": 0.08778437972068787, "learning_rate": 0.01, "loss": 2.0503, "step": 27738 }, { "epoch": 2.851372186247302, "grad_norm": 0.08406031876802444, "learning_rate": 0.01, "loss": 2.0432, "step": 27741 }, { "epoch": 2.8516805427073697, "grad_norm": 0.07672454416751862, "learning_rate": 0.01, "loss": 1.999, "step": 27744 }, { "epoch": 2.851988899167438, "grad_norm": 0.12199164927005768, "learning_rate": 0.01, "loss": 2.011, "step": 27747 }, { "epoch": 2.8522972556275055, "grad_norm": 0.05166463926434517, "learning_rate": 0.01, "loss": 2.0053, "step": 27750 }, { "epoch": 2.852605612087573, "grad_norm": 0.11618038266897202, "learning_rate": 0.01, "loss": 1.9846, "step": 27753 }, { "epoch": 2.8529139685476412, "grad_norm": 0.06255706399679184, "learning_rate": 0.01, "loss": 2.0253, "step": 27756 }, { "epoch": 2.853222325007709, "grad_norm": 0.04340292140841484, "learning_rate": 0.01, "loss": 2.0085, "step": 27759 }, { "epoch": 2.8535306814677766, "grad_norm": 0.06595482677221298, "learning_rate": 0.01, "loss": 1.9913, "step": 27762 }, { "epoch": 2.8538390379278447, "grad_norm": 0.03980451449751854, "learning_rate": 0.01, "loss": 1.9994, "step": 27765 }, { "epoch": 2.8541473943879123, "grad_norm": 0.035016849637031555, "learning_rate": 0.01, "loss": 2.0156, "step": 27768 }, { "epoch": 2.8544557508479804, "grad_norm": 0.07189803570508957, "learning_rate": 0.01, "loss": 2.0156, "step": 27771 }, { "epoch": 2.854764107308048, "grad_norm": 0.0759616568684578, "learning_rate": 0.01, "loss": 2.0058, "step": 27774 }, { "epoch": 2.855072463768116, "grad_norm": 0.05985911190509796, "learning_rate": 0.01, "loss": 2.0115, "step": 27777 }, { "epoch": 2.855380820228184, "grad_norm": 0.0529702752828598, "learning_rate": 0.01, "loss": 2.0315, "step": 27780 }, { "epoch": 2.8556891766882515, "grad_norm": 0.046540237963199615, "learning_rate": 0.01, "loss": 2.0035, "step": 27783 }, { "epoch": 2.8559975331483196, "grad_norm": 0.04505128040909767, "learning_rate": 0.01, "loss": 2.0385, "step": 27786 }, { "epoch": 2.8563058896083873, "grad_norm": 0.041494566947221756, "learning_rate": 0.01, "loss": 2.0239, "step": 27789 }, { "epoch": 2.856614246068455, "grad_norm": 0.038743190467357635, "learning_rate": 0.01, "loss": 2.0207, "step": 27792 }, { "epoch": 2.856922602528523, "grad_norm": 0.062206536531448364, "learning_rate": 0.01, "loss": 2.0133, "step": 27795 }, { "epoch": 2.8572309589885907, "grad_norm": 0.19090187549591064, "learning_rate": 0.01, "loss": 2.0292, "step": 27798 }, { "epoch": 2.857539315448659, "grad_norm": 0.06649639457464218, "learning_rate": 0.01, "loss": 2.016, "step": 27801 }, { "epoch": 2.8578476719087265, "grad_norm": 0.07956647872924805, "learning_rate": 0.01, "loss": 1.9797, "step": 27804 }, { "epoch": 2.8581560283687946, "grad_norm": 0.050361573696136475, "learning_rate": 0.01, "loss": 1.9901, "step": 27807 }, { "epoch": 2.8584643848288622, "grad_norm": 0.036132823675870895, "learning_rate": 0.01, "loss": 2.0122, "step": 27810 }, { "epoch": 2.85877274128893, "grad_norm": 0.04948203265666962, "learning_rate": 0.01, "loss": 2.0104, "step": 27813 }, { "epoch": 2.859081097748998, "grad_norm": 0.07491700351238251, "learning_rate": 0.01, "loss": 2.01, "step": 27816 }, { "epoch": 2.8593894542090657, "grad_norm": 0.03278898447751999, "learning_rate": 0.01, "loss": 2.0224, "step": 27819 }, { "epoch": 2.8596978106691333, "grad_norm": 0.04186544567346573, "learning_rate": 0.01, "loss": 1.9922, "step": 27822 }, { "epoch": 2.8600061671292014, "grad_norm": 0.07123599201440811, "learning_rate": 0.01, "loss": 2.0019, "step": 27825 }, { "epoch": 2.860314523589269, "grad_norm": 0.038315288722515106, "learning_rate": 0.01, "loss": 1.9854, "step": 27828 }, { "epoch": 2.860622880049337, "grad_norm": 0.06678714603185654, "learning_rate": 0.01, "loss": 2.0151, "step": 27831 }, { "epoch": 2.860931236509405, "grad_norm": 0.10680954158306122, "learning_rate": 0.01, "loss": 2.0067, "step": 27834 }, { "epoch": 2.861239592969473, "grad_norm": 0.13149844110012054, "learning_rate": 0.01, "loss": 2.0266, "step": 27837 }, { "epoch": 2.8615479494295406, "grad_norm": 0.04700513929128647, "learning_rate": 0.01, "loss": 2.0548, "step": 27840 }, { "epoch": 2.8618563058896083, "grad_norm": 0.03596799075603485, "learning_rate": 0.01, "loss": 2.0206, "step": 27843 }, { "epoch": 2.8621646623496764, "grad_norm": 0.042376477271318436, "learning_rate": 0.01, "loss": 2.0363, "step": 27846 }, { "epoch": 2.862473018809744, "grad_norm": 0.054233819246292114, "learning_rate": 0.01, "loss": 2.0324, "step": 27849 }, { "epoch": 2.8627813752698117, "grad_norm": 0.04501786455512047, "learning_rate": 0.01, "loss": 2.0211, "step": 27852 }, { "epoch": 2.86308973172988, "grad_norm": 0.06443289667367935, "learning_rate": 0.01, "loss": 1.9847, "step": 27855 }, { "epoch": 2.8633980881899475, "grad_norm": 0.05772462114691734, "learning_rate": 0.01, "loss": 2.0152, "step": 27858 }, { "epoch": 2.8637064446500156, "grad_norm": 0.039729420095682144, "learning_rate": 0.01, "loss": 2.0233, "step": 27861 }, { "epoch": 2.864014801110083, "grad_norm": 0.1330452859401703, "learning_rate": 0.01, "loss": 2.0166, "step": 27864 }, { "epoch": 2.8643231575701513, "grad_norm": 0.03952759504318237, "learning_rate": 0.01, "loss": 2.0151, "step": 27867 }, { "epoch": 2.864631514030219, "grad_norm": 0.05294906720519066, "learning_rate": 0.01, "loss": 2.0078, "step": 27870 }, { "epoch": 2.8649398704902866, "grad_norm": 0.07945666462182999, "learning_rate": 0.01, "loss": 2.0237, "step": 27873 }, { "epoch": 2.8652482269503547, "grad_norm": 0.09529011696577072, "learning_rate": 0.01, "loss": 2.0017, "step": 27876 }, { "epoch": 2.8655565834104224, "grad_norm": 0.038869790732860565, "learning_rate": 0.01, "loss": 2.0051, "step": 27879 }, { "epoch": 2.86586493987049, "grad_norm": 0.0421532541513443, "learning_rate": 0.01, "loss": 2.019, "step": 27882 }, { "epoch": 2.866173296330558, "grad_norm": 0.037143412977457047, "learning_rate": 0.01, "loss": 2.0196, "step": 27885 }, { "epoch": 2.866481652790626, "grad_norm": 0.0359092615544796, "learning_rate": 0.01, "loss": 2.0076, "step": 27888 }, { "epoch": 2.866790009250694, "grad_norm": 0.04631698876619339, "learning_rate": 0.01, "loss": 2.0131, "step": 27891 }, { "epoch": 2.8670983657107616, "grad_norm": 0.06015830859541893, "learning_rate": 0.01, "loss": 2.0341, "step": 27894 }, { "epoch": 2.8674067221708297, "grad_norm": 0.09565176069736481, "learning_rate": 0.01, "loss": 2.0251, "step": 27897 }, { "epoch": 2.8677150786308974, "grad_norm": 0.05290444567799568, "learning_rate": 0.01, "loss": 2.0412, "step": 27900 }, { "epoch": 2.868023435090965, "grad_norm": 0.03383943438529968, "learning_rate": 0.01, "loss": 1.9902, "step": 27903 }, { "epoch": 2.868331791551033, "grad_norm": 0.05390051752328873, "learning_rate": 0.01, "loss": 2.0159, "step": 27906 }, { "epoch": 2.868640148011101, "grad_norm": 0.08597470074892044, "learning_rate": 0.01, "loss": 2.0308, "step": 27909 }, { "epoch": 2.8689485044711684, "grad_norm": 0.059518035501241684, "learning_rate": 0.01, "loss": 2.0169, "step": 27912 }, { "epoch": 2.8692568609312366, "grad_norm": 0.04992047697305679, "learning_rate": 0.01, "loss": 2.0267, "step": 27915 }, { "epoch": 2.869565217391304, "grad_norm": 0.04728172719478607, "learning_rate": 0.01, "loss": 2.0266, "step": 27918 }, { "epoch": 2.8698735738513723, "grad_norm": 0.038830891251564026, "learning_rate": 0.01, "loss": 2.0197, "step": 27921 }, { "epoch": 2.87018193031144, "grad_norm": 0.039699580520391464, "learning_rate": 0.01, "loss": 2.0144, "step": 27924 }, { "epoch": 2.870490286771508, "grad_norm": 0.10109611600637436, "learning_rate": 0.01, "loss": 2.0033, "step": 27927 }, { "epoch": 2.8707986432315757, "grad_norm": 0.07930929958820343, "learning_rate": 0.01, "loss": 2.0463, "step": 27930 }, { "epoch": 2.8711069996916434, "grad_norm": 0.05544453486800194, "learning_rate": 0.01, "loss": 2.0342, "step": 27933 }, { "epoch": 2.8714153561517115, "grad_norm": 0.11004924774169922, "learning_rate": 0.01, "loss": 2.0199, "step": 27936 }, { "epoch": 2.871723712611779, "grad_norm": 0.09423034638166428, "learning_rate": 0.01, "loss": 2.0232, "step": 27939 }, { "epoch": 2.872032069071847, "grad_norm": 0.05001804232597351, "learning_rate": 0.01, "loss": 2.0032, "step": 27942 }, { "epoch": 2.872340425531915, "grad_norm": 0.04487982019782066, "learning_rate": 0.01, "loss": 2.0154, "step": 27945 }, { "epoch": 2.8726487819919826, "grad_norm": 0.033674515783786774, "learning_rate": 0.01, "loss": 2.0138, "step": 27948 }, { "epoch": 2.8729571384520507, "grad_norm": 0.040895912796258926, "learning_rate": 0.01, "loss": 1.9902, "step": 27951 }, { "epoch": 2.8732654949121184, "grad_norm": 0.03655741363763809, "learning_rate": 0.01, "loss": 2.0201, "step": 27954 }, { "epoch": 2.8735738513721865, "grad_norm": 0.06042269244790077, "learning_rate": 0.01, "loss": 2.0354, "step": 27957 }, { "epoch": 2.873882207832254, "grad_norm": 0.0660943016409874, "learning_rate": 0.01, "loss": 2.0312, "step": 27960 }, { "epoch": 2.874190564292322, "grad_norm": 0.0681222453713417, "learning_rate": 0.01, "loss": 2.0059, "step": 27963 }, { "epoch": 2.87449892075239, "grad_norm": 0.05666188523173332, "learning_rate": 0.01, "loss": 2.022, "step": 27966 }, { "epoch": 2.8748072772124575, "grad_norm": 0.09911254048347473, "learning_rate": 0.01, "loss": 2.0056, "step": 27969 }, { "epoch": 2.875115633672525, "grad_norm": 0.0730089545249939, "learning_rate": 0.01, "loss": 1.9927, "step": 27972 }, { "epoch": 2.8754239901325933, "grad_norm": 0.05434833839535713, "learning_rate": 0.01, "loss": 1.9998, "step": 27975 }, { "epoch": 2.875732346592661, "grad_norm": 0.07539419084787369, "learning_rate": 0.01, "loss": 2.0304, "step": 27978 }, { "epoch": 2.876040703052729, "grad_norm": 0.04300360381603241, "learning_rate": 0.01, "loss": 2.0046, "step": 27981 }, { "epoch": 2.8763490595127967, "grad_norm": 0.1077357679605484, "learning_rate": 0.01, "loss": 2.0344, "step": 27984 }, { "epoch": 2.876657415972865, "grad_norm": 0.08229992538690567, "learning_rate": 0.01, "loss": 2.0412, "step": 27987 }, { "epoch": 2.8769657724329325, "grad_norm": 0.12404177337884903, "learning_rate": 0.01, "loss": 2.0047, "step": 27990 }, { "epoch": 2.877274128893, "grad_norm": 0.06843412667512894, "learning_rate": 0.01, "loss": 2.0144, "step": 27993 }, { "epoch": 2.8775824853530683, "grad_norm": 0.09625189751386642, "learning_rate": 0.01, "loss": 2.0435, "step": 27996 }, { "epoch": 2.877890841813136, "grad_norm": 0.04479416087269783, "learning_rate": 0.01, "loss": 2.0355, "step": 27999 }, { "epoch": 2.8781991982732036, "grad_norm": 0.04933414235711098, "learning_rate": 0.01, "loss": 2.0018, "step": 28002 }, { "epoch": 2.8785075547332717, "grad_norm": 0.05558469891548157, "learning_rate": 0.01, "loss": 2.03, "step": 28005 }, { "epoch": 2.8788159111933393, "grad_norm": 0.04813915863633156, "learning_rate": 0.01, "loss": 2.0227, "step": 28008 }, { "epoch": 2.8791242676534075, "grad_norm": 0.06402178108692169, "learning_rate": 0.01, "loss": 2.0023, "step": 28011 }, { "epoch": 2.879432624113475, "grad_norm": 0.03481597453355789, "learning_rate": 0.01, "loss": 2.032, "step": 28014 }, { "epoch": 2.879740980573543, "grad_norm": 0.03682602196931839, "learning_rate": 0.01, "loss": 2.0198, "step": 28017 }, { "epoch": 2.880049337033611, "grad_norm": 0.11671306192874908, "learning_rate": 0.01, "loss": 2.0025, "step": 28020 }, { "epoch": 2.8803576934936785, "grad_norm": 0.051900725811719894, "learning_rate": 0.01, "loss": 2.0128, "step": 28023 }, { "epoch": 2.8806660499537466, "grad_norm": 0.034372419118881226, "learning_rate": 0.01, "loss": 2.0146, "step": 28026 }, { "epoch": 2.8809744064138143, "grad_norm": 0.05165507644414902, "learning_rate": 0.01, "loss": 2.0139, "step": 28029 }, { "epoch": 2.881282762873882, "grad_norm": 0.046683549880981445, "learning_rate": 0.01, "loss": 2.0454, "step": 28032 }, { "epoch": 2.88159111933395, "grad_norm": 0.04153186455368996, "learning_rate": 0.01, "loss": 2.022, "step": 28035 }, { "epoch": 2.8818994757940177, "grad_norm": 0.07401791960000992, "learning_rate": 0.01, "loss": 2.0249, "step": 28038 }, { "epoch": 2.882207832254086, "grad_norm": 0.08125065267086029, "learning_rate": 0.01, "loss": 2.0042, "step": 28041 }, { "epoch": 2.8825161887141535, "grad_norm": 0.076799176633358, "learning_rate": 0.01, "loss": 1.9924, "step": 28044 }, { "epoch": 2.8828245451742216, "grad_norm": 0.09814302623271942, "learning_rate": 0.01, "loss": 2.028, "step": 28047 }, { "epoch": 2.8831329016342893, "grad_norm": 0.06998278200626373, "learning_rate": 0.01, "loss": 2.0036, "step": 28050 }, { "epoch": 2.883441258094357, "grad_norm": 0.057476770132780075, "learning_rate": 0.01, "loss": 2.0134, "step": 28053 }, { "epoch": 2.883749614554425, "grad_norm": 0.037239111959934235, "learning_rate": 0.01, "loss": 1.9987, "step": 28056 }, { "epoch": 2.8840579710144927, "grad_norm": 0.08055179566144943, "learning_rate": 0.01, "loss": 2.0267, "step": 28059 }, { "epoch": 2.8843663274745603, "grad_norm": 0.07050419598817825, "learning_rate": 0.01, "loss": 2.0294, "step": 28062 }, { "epoch": 2.8846746839346284, "grad_norm": 0.05187792330980301, "learning_rate": 0.01, "loss": 2.004, "step": 28065 }, { "epoch": 2.884983040394696, "grad_norm": 0.03746993839740753, "learning_rate": 0.01, "loss": 2.0219, "step": 28068 }, { "epoch": 2.885291396854764, "grad_norm": 0.09767074137926102, "learning_rate": 0.01, "loss": 2.0352, "step": 28071 }, { "epoch": 2.885599753314832, "grad_norm": 0.08471439778804779, "learning_rate": 0.01, "loss": 2.0026, "step": 28074 }, { "epoch": 2.8859081097749, "grad_norm": 0.061311863362789154, "learning_rate": 0.01, "loss": 2.0323, "step": 28077 }, { "epoch": 2.8862164662349676, "grad_norm": 0.06402043253183365, "learning_rate": 0.01, "loss": 2.0375, "step": 28080 }, { "epoch": 2.8865248226950353, "grad_norm": 0.06756860762834549, "learning_rate": 0.01, "loss": 2.0232, "step": 28083 }, { "epoch": 2.8868331791551034, "grad_norm": 0.07851870357990265, "learning_rate": 0.01, "loss": 2.0178, "step": 28086 }, { "epoch": 2.887141535615171, "grad_norm": 0.08102571219205856, "learning_rate": 0.01, "loss": 2.0162, "step": 28089 }, { "epoch": 2.8874498920752387, "grad_norm": 0.04745829850435257, "learning_rate": 0.01, "loss": 2.0267, "step": 28092 }, { "epoch": 2.887758248535307, "grad_norm": 0.04496223106980324, "learning_rate": 0.01, "loss": 2.012, "step": 28095 }, { "epoch": 2.8880666049953745, "grad_norm": 0.09044700860977173, "learning_rate": 0.01, "loss": 2.0354, "step": 28098 }, { "epoch": 2.8883749614554426, "grad_norm": 0.05591853708028793, "learning_rate": 0.01, "loss": 2.0033, "step": 28101 }, { "epoch": 2.8886833179155103, "grad_norm": 0.07982437312602997, "learning_rate": 0.01, "loss": 2.0423, "step": 28104 }, { "epoch": 2.8889916743755784, "grad_norm": 0.039345480501651764, "learning_rate": 0.01, "loss": 1.996, "step": 28107 }, { "epoch": 2.889300030835646, "grad_norm": 0.046677373349666595, "learning_rate": 0.01, "loss": 2.0204, "step": 28110 }, { "epoch": 2.8896083872957137, "grad_norm": 0.060302551835775375, "learning_rate": 0.01, "loss": 2.019, "step": 28113 }, { "epoch": 2.889916743755782, "grad_norm": 0.09422065317630768, "learning_rate": 0.01, "loss": 2.0223, "step": 28116 }, { "epoch": 2.8902251002158494, "grad_norm": 0.03713101148605347, "learning_rate": 0.01, "loss": 2.0008, "step": 28119 }, { "epoch": 2.890533456675917, "grad_norm": 0.062431566417217255, "learning_rate": 0.01, "loss": 2.0213, "step": 28122 }, { "epoch": 2.890841813135985, "grad_norm": 0.052230529487133026, "learning_rate": 0.01, "loss": 2.0233, "step": 28125 }, { "epoch": 2.8911501695960533, "grad_norm": 0.05514863133430481, "learning_rate": 0.01, "loss": 2.0233, "step": 28128 }, { "epoch": 2.891458526056121, "grad_norm": 0.04317644238471985, "learning_rate": 0.01, "loss": 2.0111, "step": 28131 }, { "epoch": 2.8917668825161886, "grad_norm": 0.05251277983188629, "learning_rate": 0.01, "loss": 2.0306, "step": 28134 }, { "epoch": 2.8920752389762567, "grad_norm": 0.10419400781393051, "learning_rate": 0.01, "loss": 2.0253, "step": 28137 }, { "epoch": 2.8923835954363244, "grad_norm": 0.08806058764457703, "learning_rate": 0.01, "loss": 2.0079, "step": 28140 }, { "epoch": 2.892691951896392, "grad_norm": 0.059875234961509705, "learning_rate": 0.01, "loss": 2.0066, "step": 28143 }, { "epoch": 2.89300030835646, "grad_norm": 0.036619991064071655, "learning_rate": 0.01, "loss": 2.0208, "step": 28146 }, { "epoch": 2.893308664816528, "grad_norm": 0.034454282373189926, "learning_rate": 0.01, "loss": 2.0068, "step": 28149 }, { "epoch": 2.8936170212765955, "grad_norm": 0.05209527164697647, "learning_rate": 0.01, "loss": 2.0252, "step": 28152 }, { "epoch": 2.8939253777366636, "grad_norm": 0.11549924314022064, "learning_rate": 0.01, "loss": 1.9974, "step": 28155 }, { "epoch": 2.8942337341967317, "grad_norm": 0.06887582689523697, "learning_rate": 0.01, "loss": 2.0, "step": 28158 }, { "epoch": 2.8945420906567993, "grad_norm": 0.046488743275403976, "learning_rate": 0.01, "loss": 1.9827, "step": 28161 }, { "epoch": 2.894850447116867, "grad_norm": 0.05086890980601311, "learning_rate": 0.01, "loss": 2.0073, "step": 28164 }, { "epoch": 2.895158803576935, "grad_norm": 0.03719864413142204, "learning_rate": 0.01, "loss": 2.0333, "step": 28167 }, { "epoch": 2.8954671600370028, "grad_norm": 0.059325605630874634, "learning_rate": 0.01, "loss": 2.0056, "step": 28170 }, { "epoch": 2.8957755164970704, "grad_norm": 0.088272824883461, "learning_rate": 0.01, "loss": 2.0114, "step": 28173 }, { "epoch": 2.8960838729571385, "grad_norm": 0.0874638557434082, "learning_rate": 0.01, "loss": 2.0158, "step": 28176 }, { "epoch": 2.896392229417206, "grad_norm": 0.059453610330820084, "learning_rate": 0.01, "loss": 1.9919, "step": 28179 }, { "epoch": 2.896700585877274, "grad_norm": 0.05221414193511009, "learning_rate": 0.01, "loss": 2.0056, "step": 28182 }, { "epoch": 2.897008942337342, "grad_norm": 0.03308747336268425, "learning_rate": 0.01, "loss": 2.0072, "step": 28185 }, { "epoch": 2.89731729879741, "grad_norm": 0.07506965100765228, "learning_rate": 0.01, "loss": 2.0022, "step": 28188 }, { "epoch": 2.8976256552574777, "grad_norm": 0.06189137324690819, "learning_rate": 0.01, "loss": 2.0046, "step": 28191 }, { "epoch": 2.8979340117175454, "grad_norm": 0.07855790853500366, "learning_rate": 0.01, "loss": 2.0307, "step": 28194 }, { "epoch": 2.8982423681776135, "grad_norm": 0.08880770951509476, "learning_rate": 0.01, "loss": 2.0242, "step": 28197 }, { "epoch": 2.898550724637681, "grad_norm": 0.05860179290175438, "learning_rate": 0.01, "loss": 2.0394, "step": 28200 }, { "epoch": 2.898859081097749, "grad_norm": 0.06037219241261482, "learning_rate": 0.01, "loss": 2.0131, "step": 28203 }, { "epoch": 2.899167437557817, "grad_norm": 0.16910460591316223, "learning_rate": 0.01, "loss": 2.0184, "step": 28206 }, { "epoch": 2.8994757940178846, "grad_norm": 0.05912697687745094, "learning_rate": 0.01, "loss": 2.0141, "step": 28209 }, { "epoch": 2.8997841504779522, "grad_norm": 0.04245199263095856, "learning_rate": 0.01, "loss": 2.0206, "step": 28212 }, { "epoch": 2.9000925069380203, "grad_norm": 0.03307129442691803, "learning_rate": 0.01, "loss": 2.0258, "step": 28215 }, { "epoch": 2.9004008633980884, "grad_norm": 0.03456464782357216, "learning_rate": 0.01, "loss": 2.0219, "step": 28218 }, { "epoch": 2.900709219858156, "grad_norm": 0.04525744169950485, "learning_rate": 0.01, "loss": 2.0417, "step": 28221 }, { "epoch": 2.9010175763182238, "grad_norm": 0.046930085867643356, "learning_rate": 0.01, "loss": 2.0296, "step": 28224 }, { "epoch": 2.901325932778292, "grad_norm": 0.07545941323041916, "learning_rate": 0.01, "loss": 2.028, "step": 28227 }, { "epoch": 2.9016342892383595, "grad_norm": 0.045901909470558167, "learning_rate": 0.01, "loss": 2.0143, "step": 28230 }, { "epoch": 2.901942645698427, "grad_norm": 0.06182632967829704, "learning_rate": 0.01, "loss": 2.0184, "step": 28233 }, { "epoch": 2.9022510021584953, "grad_norm": 0.1126553937792778, "learning_rate": 0.01, "loss": 2.0134, "step": 28236 }, { "epoch": 2.902559358618563, "grad_norm": 0.05637908726930618, "learning_rate": 0.01, "loss": 2.0023, "step": 28239 }, { "epoch": 2.902867715078631, "grad_norm": 0.03390496224164963, "learning_rate": 0.01, "loss": 2.0097, "step": 28242 }, { "epoch": 2.9031760715386987, "grad_norm": 0.1719658523797989, "learning_rate": 0.01, "loss": 2.0446, "step": 28245 }, { "epoch": 2.903484427998767, "grad_norm": 0.054201435297727585, "learning_rate": 0.01, "loss": 2.0101, "step": 28248 }, { "epoch": 2.9037927844588345, "grad_norm": 0.05339137092232704, "learning_rate": 0.01, "loss": 2.024, "step": 28251 }, { "epoch": 2.904101140918902, "grad_norm": 0.0490683875977993, "learning_rate": 0.01, "loss": 2.0112, "step": 28254 }, { "epoch": 2.9044094973789703, "grad_norm": 0.048101890832185745, "learning_rate": 0.01, "loss": 2.0244, "step": 28257 }, { "epoch": 2.904717853839038, "grad_norm": 0.034970205277204514, "learning_rate": 0.01, "loss": 2.0128, "step": 28260 }, { "epoch": 2.9050262102991056, "grad_norm": 0.05500125139951706, "learning_rate": 0.01, "loss": 1.9963, "step": 28263 }, { "epoch": 2.9053345667591737, "grad_norm": 0.0717696025967598, "learning_rate": 0.01, "loss": 2.0119, "step": 28266 }, { "epoch": 2.9056429232192413, "grad_norm": 0.08134905993938446, "learning_rate": 0.01, "loss": 2.0343, "step": 28269 }, { "epoch": 2.9059512796793094, "grad_norm": 0.13240204751491547, "learning_rate": 0.01, "loss": 2.0268, "step": 28272 }, { "epoch": 2.906259636139377, "grad_norm": 0.04060851037502289, "learning_rate": 0.01, "loss": 2.0163, "step": 28275 }, { "epoch": 2.906567992599445, "grad_norm": 0.04560050368309021, "learning_rate": 0.01, "loss": 2.0037, "step": 28278 }, { "epoch": 2.906876349059513, "grad_norm": 0.051935113966464996, "learning_rate": 0.01, "loss": 2.0025, "step": 28281 }, { "epoch": 2.9071847055195805, "grad_norm": 0.04502344876527786, "learning_rate": 0.01, "loss": 2.0212, "step": 28284 }, { "epoch": 2.9074930619796486, "grad_norm": 0.04561367630958557, "learning_rate": 0.01, "loss": 2.0369, "step": 28287 }, { "epoch": 2.9078014184397163, "grad_norm": 0.05686529353260994, "learning_rate": 0.01, "loss": 2.0217, "step": 28290 }, { "epoch": 2.908109774899784, "grad_norm": 0.06489894539117813, "learning_rate": 0.01, "loss": 2.0126, "step": 28293 }, { "epoch": 2.908418131359852, "grad_norm": 0.08427495509386063, "learning_rate": 0.01, "loss": 2.0, "step": 28296 }, { "epoch": 2.9087264878199197, "grad_norm": 0.08298421651124954, "learning_rate": 0.01, "loss": 2.0273, "step": 28299 }, { "epoch": 2.909034844279988, "grad_norm": 0.035743072628974915, "learning_rate": 0.01, "loss": 2.0167, "step": 28302 }, { "epoch": 2.9093432007400555, "grad_norm": 0.11177317798137665, "learning_rate": 0.01, "loss": 2.0037, "step": 28305 }, { "epoch": 2.9096515572001236, "grad_norm": 0.10962017625570297, "learning_rate": 0.01, "loss": 2.0155, "step": 28308 }, { "epoch": 2.9099599136601912, "grad_norm": 0.033734966069459915, "learning_rate": 0.01, "loss": 2.0131, "step": 28311 }, { "epoch": 2.910268270120259, "grad_norm": 0.050082772970199585, "learning_rate": 0.01, "loss": 2.0117, "step": 28314 }, { "epoch": 2.910576626580327, "grad_norm": 0.04048197343945503, "learning_rate": 0.01, "loss": 2.0085, "step": 28317 }, { "epoch": 2.9108849830403947, "grad_norm": 0.039657291024923325, "learning_rate": 0.01, "loss": 2.0233, "step": 28320 }, { "epoch": 2.9111933395004623, "grad_norm": 0.0639658197760582, "learning_rate": 0.01, "loss": 2.0001, "step": 28323 }, { "epoch": 2.9115016959605304, "grad_norm": 0.09306667000055313, "learning_rate": 0.01, "loss": 2.019, "step": 28326 }, { "epoch": 2.911810052420598, "grad_norm": 0.06027163937687874, "learning_rate": 0.01, "loss": 2.0341, "step": 28329 }, { "epoch": 2.912118408880666, "grad_norm": 0.08570882678031921, "learning_rate": 0.01, "loss": 2.0192, "step": 28332 }, { "epoch": 2.912426765340734, "grad_norm": 0.07936012744903564, "learning_rate": 0.01, "loss": 2.0165, "step": 28335 }, { "epoch": 2.912735121800802, "grad_norm": 0.06479290872812271, "learning_rate": 0.01, "loss": 2.027, "step": 28338 }, { "epoch": 2.9130434782608696, "grad_norm": 0.06973425298929214, "learning_rate": 0.01, "loss": 2.0037, "step": 28341 }, { "epoch": 2.9133518347209373, "grad_norm": 0.05557083338499069, "learning_rate": 0.01, "loss": 2.0323, "step": 28344 }, { "epoch": 2.9136601911810054, "grad_norm": 0.04901493340730667, "learning_rate": 0.01, "loss": 2.0088, "step": 28347 }, { "epoch": 2.913968547641073, "grad_norm": 0.1036488264799118, "learning_rate": 0.01, "loss": 2.0486, "step": 28350 }, { "epoch": 2.9142769041011407, "grad_norm": 0.04318247362971306, "learning_rate": 0.01, "loss": 2.0195, "step": 28353 }, { "epoch": 2.914585260561209, "grad_norm": 0.0404491052031517, "learning_rate": 0.01, "loss": 2.0237, "step": 28356 }, { "epoch": 2.9148936170212765, "grad_norm": 0.03727111592888832, "learning_rate": 0.01, "loss": 2.0153, "step": 28359 }, { "epoch": 2.9152019734813446, "grad_norm": 0.04736052453517914, "learning_rate": 0.01, "loss": 2.0487, "step": 28362 }, { "epoch": 2.9155103299414122, "grad_norm": 0.13712109625339508, "learning_rate": 0.01, "loss": 2.0434, "step": 28365 }, { "epoch": 2.9158186864014803, "grad_norm": 0.046428218483924866, "learning_rate": 0.01, "loss": 2.0177, "step": 28368 }, { "epoch": 2.916127042861548, "grad_norm": 0.036403872072696686, "learning_rate": 0.01, "loss": 2.0144, "step": 28371 }, { "epoch": 2.9164353993216157, "grad_norm": 0.04359270632266998, "learning_rate": 0.01, "loss": 2.0386, "step": 28374 }, { "epoch": 2.9167437557816838, "grad_norm": 0.06155150756239891, "learning_rate": 0.01, "loss": 2.0014, "step": 28377 }, { "epoch": 2.9170521122417514, "grad_norm": 0.035659730434417725, "learning_rate": 0.01, "loss": 2.012, "step": 28380 }, { "epoch": 2.917360468701819, "grad_norm": 0.09453250467777252, "learning_rate": 0.01, "loss": 2.0006, "step": 28383 }, { "epoch": 2.917668825161887, "grad_norm": 0.062337253242731094, "learning_rate": 0.01, "loss": 2.0131, "step": 28386 }, { "epoch": 2.917977181621955, "grad_norm": 0.05488646402955055, "learning_rate": 0.01, "loss": 2.0212, "step": 28389 }, { "epoch": 2.918285538082023, "grad_norm": 0.07092556357383728, "learning_rate": 0.01, "loss": 2.0123, "step": 28392 }, { "epoch": 2.9185938945420906, "grad_norm": 0.10335639864206314, "learning_rate": 0.01, "loss": 2.0336, "step": 28395 }, { "epoch": 2.9189022510021587, "grad_norm": 0.1187339797616005, "learning_rate": 0.01, "loss": 2.0218, "step": 28398 }, { "epoch": 2.9192106074622264, "grad_norm": 0.09254027903079987, "learning_rate": 0.01, "loss": 2.0077, "step": 28401 }, { "epoch": 2.919518963922294, "grad_norm": 0.04120078682899475, "learning_rate": 0.01, "loss": 2.032, "step": 28404 }, { "epoch": 2.919827320382362, "grad_norm": 0.04366682097315788, "learning_rate": 0.01, "loss": 2.0128, "step": 28407 }, { "epoch": 2.92013567684243, "grad_norm": 0.03700088709592819, "learning_rate": 0.01, "loss": 2.0027, "step": 28410 }, { "epoch": 2.9204440333024975, "grad_norm": 0.03409232571721077, "learning_rate": 0.01, "loss": 2.0068, "step": 28413 }, { "epoch": 2.9207523897625656, "grad_norm": 0.04503092169761658, "learning_rate": 0.01, "loss": 2.0042, "step": 28416 }, { "epoch": 2.9210607462226332, "grad_norm": 0.04988139122724533, "learning_rate": 0.01, "loss": 2.0418, "step": 28419 }, { "epoch": 2.9213691026827013, "grad_norm": 0.05121064558625221, "learning_rate": 0.01, "loss": 2.0241, "step": 28422 }, { "epoch": 2.921677459142769, "grad_norm": 0.03916258364915848, "learning_rate": 0.01, "loss": 2.0461, "step": 28425 }, { "epoch": 2.921985815602837, "grad_norm": 0.1349031776189804, "learning_rate": 0.01, "loss": 2.0186, "step": 28428 }, { "epoch": 2.9222941720629048, "grad_norm": 0.12042077630758286, "learning_rate": 0.01, "loss": 2.0378, "step": 28431 }, { "epoch": 2.9226025285229724, "grad_norm": 0.08157286792993546, "learning_rate": 0.01, "loss": 1.9957, "step": 28434 }, { "epoch": 2.9229108849830405, "grad_norm": 0.07750184834003448, "learning_rate": 0.01, "loss": 2.0193, "step": 28437 }, { "epoch": 2.923219241443108, "grad_norm": 0.07270103693008423, "learning_rate": 0.01, "loss": 2.015, "step": 28440 }, { "epoch": 2.923527597903176, "grad_norm": 0.05140276625752449, "learning_rate": 0.01, "loss": 1.9987, "step": 28443 }, { "epoch": 2.923835954363244, "grad_norm": 0.07426592707633972, "learning_rate": 0.01, "loss": 2.031, "step": 28446 }, { "epoch": 2.9241443108233116, "grad_norm": 0.05136018618941307, "learning_rate": 0.01, "loss": 2.0166, "step": 28449 }, { "epoch": 2.9244526672833797, "grad_norm": 0.03354816511273384, "learning_rate": 0.01, "loss": 1.9985, "step": 28452 }, { "epoch": 2.9247610237434474, "grad_norm": 0.03671132028102875, "learning_rate": 0.01, "loss": 1.9954, "step": 28455 }, { "epoch": 2.9250693802035155, "grad_norm": 0.05378049612045288, "learning_rate": 0.01, "loss": 2.0276, "step": 28458 }, { "epoch": 2.925377736663583, "grad_norm": 0.04772641509771347, "learning_rate": 0.01, "loss": 2.0113, "step": 28461 }, { "epoch": 2.925686093123651, "grad_norm": 0.05942932888865471, "learning_rate": 0.01, "loss": 2.0223, "step": 28464 }, { "epoch": 2.925994449583719, "grad_norm": 0.059020113199949265, "learning_rate": 0.01, "loss": 2.0089, "step": 28467 }, { "epoch": 2.9263028060437866, "grad_norm": 0.19283726811408997, "learning_rate": 0.01, "loss": 2.0302, "step": 28470 }, { "epoch": 2.9266111625038542, "grad_norm": 0.09231256693601608, "learning_rate": 0.01, "loss": 1.9915, "step": 28473 }, { "epoch": 2.9269195189639223, "grad_norm": 0.058674681931734085, "learning_rate": 0.01, "loss": 2.0095, "step": 28476 }, { "epoch": 2.92722787542399, "grad_norm": 0.05014891177415848, "learning_rate": 0.01, "loss": 2.0197, "step": 28479 }, { "epoch": 2.927536231884058, "grad_norm": 0.060597196221351624, "learning_rate": 0.01, "loss": 1.9932, "step": 28482 }, { "epoch": 2.9278445883441258, "grad_norm": 0.08379890769720078, "learning_rate": 0.01, "loss": 2.027, "step": 28485 }, { "epoch": 2.928152944804194, "grad_norm": 0.04832527041435242, "learning_rate": 0.01, "loss": 1.9973, "step": 28488 }, { "epoch": 2.9284613012642615, "grad_norm": 0.06709366291761398, "learning_rate": 0.01, "loss": 2.0287, "step": 28491 }, { "epoch": 2.928769657724329, "grad_norm": 0.044588811695575714, "learning_rate": 0.01, "loss": 2.0253, "step": 28494 }, { "epoch": 2.9290780141843973, "grad_norm": 0.05735667049884796, "learning_rate": 0.01, "loss": 1.9972, "step": 28497 }, { "epoch": 2.929386370644465, "grad_norm": 0.041256386786699295, "learning_rate": 0.01, "loss": 2.0109, "step": 28500 }, { "epoch": 2.9296947271045326, "grad_norm": 0.03460060432553291, "learning_rate": 0.01, "loss": 2.0028, "step": 28503 }, { "epoch": 2.9300030835646007, "grad_norm": 0.05080440267920494, "learning_rate": 0.01, "loss": 2.0186, "step": 28506 }, { "epoch": 2.9303114400246684, "grad_norm": 0.1281091421842575, "learning_rate": 0.01, "loss": 2.0219, "step": 28509 }, { "epoch": 2.9306197964847365, "grad_norm": 0.05667036399245262, "learning_rate": 0.01, "loss": 2.0034, "step": 28512 }, { "epoch": 2.930928152944804, "grad_norm": 0.10026687383651733, "learning_rate": 0.01, "loss": 2.0082, "step": 28515 }, { "epoch": 2.9312365094048722, "grad_norm": 0.04924483224749565, "learning_rate": 0.01, "loss": 2.0034, "step": 28518 }, { "epoch": 2.93154486586494, "grad_norm": 0.04905860871076584, "learning_rate": 0.01, "loss": 2.0125, "step": 28521 }, { "epoch": 2.9318532223250076, "grad_norm": 0.0525185652077198, "learning_rate": 0.01, "loss": 2.0193, "step": 28524 }, { "epoch": 2.9321615787850757, "grad_norm": 0.04196888953447342, "learning_rate": 0.01, "loss": 2.0167, "step": 28527 }, { "epoch": 2.9324699352451433, "grad_norm": 0.04660086706280708, "learning_rate": 0.01, "loss": 1.996, "step": 28530 }, { "epoch": 2.932778291705211, "grad_norm": 0.05741078406572342, "learning_rate": 0.01, "loss": 2.0093, "step": 28533 }, { "epoch": 2.933086648165279, "grad_norm": 0.052953120321035385, "learning_rate": 0.01, "loss": 2.0111, "step": 28536 }, { "epoch": 2.9333950046253467, "grad_norm": 0.04325610026717186, "learning_rate": 0.01, "loss": 2.0252, "step": 28539 }, { "epoch": 2.933703361085415, "grad_norm": 0.04387751594185829, "learning_rate": 0.01, "loss": 2.0052, "step": 28542 }, { "epoch": 2.9340117175454825, "grad_norm": 0.03476174548268318, "learning_rate": 0.01, "loss": 2.0143, "step": 28545 }, { "epoch": 2.9343200740055506, "grad_norm": 0.03839149698615074, "learning_rate": 0.01, "loss": 2.0289, "step": 28548 }, { "epoch": 2.9346284304656183, "grad_norm": 0.0841054692864418, "learning_rate": 0.01, "loss": 1.9642, "step": 28551 }, { "epoch": 2.934936786925686, "grad_norm": 0.10850472748279572, "learning_rate": 0.01, "loss": 2.0345, "step": 28554 }, { "epoch": 2.935245143385754, "grad_norm": 0.12255658954381943, "learning_rate": 0.01, "loss": 1.9961, "step": 28557 }, { "epoch": 2.9355534998458217, "grad_norm": 0.05664276331663132, "learning_rate": 0.01, "loss": 2.0202, "step": 28560 }, { "epoch": 2.9358618563058894, "grad_norm": 0.11724577099084854, "learning_rate": 0.01, "loss": 2.027, "step": 28563 }, { "epoch": 2.9361702127659575, "grad_norm": 0.06823346763849258, "learning_rate": 0.01, "loss": 2.0104, "step": 28566 }, { "epoch": 2.936478569226025, "grad_norm": 0.035576723515987396, "learning_rate": 0.01, "loss": 2.0302, "step": 28569 }, { "epoch": 2.9367869256860932, "grad_norm": 0.03186320886015892, "learning_rate": 0.01, "loss": 2.0181, "step": 28572 }, { "epoch": 2.937095282146161, "grad_norm": 0.03750181198120117, "learning_rate": 0.01, "loss": 2.0096, "step": 28575 }, { "epoch": 2.937403638606229, "grad_norm": 0.05272996798157692, "learning_rate": 0.01, "loss": 2.0021, "step": 28578 }, { "epoch": 2.9377119950662967, "grad_norm": 0.033289846032857895, "learning_rate": 0.01, "loss": 2.0055, "step": 28581 }, { "epoch": 2.9380203515263643, "grad_norm": 0.11037155240774155, "learning_rate": 0.01, "loss": 2.0271, "step": 28584 }, { "epoch": 2.9383287079864324, "grad_norm": 0.10620691627264023, "learning_rate": 0.01, "loss": 2.0012, "step": 28587 }, { "epoch": 2.9386370644465, "grad_norm": 0.052820686250925064, "learning_rate": 0.01, "loss": 2.009, "step": 28590 }, { "epoch": 2.9389454209065677, "grad_norm": 0.057419613003730774, "learning_rate": 0.01, "loss": 2.0122, "step": 28593 }, { "epoch": 2.939253777366636, "grad_norm": 0.04066954553127289, "learning_rate": 0.01, "loss": 2.0043, "step": 28596 }, { "epoch": 2.9395621338267035, "grad_norm": 0.04638439416885376, "learning_rate": 0.01, "loss": 2.0244, "step": 28599 }, { "epoch": 2.9398704902867716, "grad_norm": 0.11629427969455719, "learning_rate": 0.01, "loss": 1.9867, "step": 28602 }, { "epoch": 2.9401788467468393, "grad_norm": 0.05921720713376999, "learning_rate": 0.01, "loss": 2.0155, "step": 28605 }, { "epoch": 2.9404872032069074, "grad_norm": 0.05282594636082649, "learning_rate": 0.01, "loss": 2.0138, "step": 28608 }, { "epoch": 2.940795559666975, "grad_norm": 0.04680659994482994, "learning_rate": 0.01, "loss": 2.011, "step": 28611 }, { "epoch": 2.9411039161270427, "grad_norm": 0.11889224499464035, "learning_rate": 0.01, "loss": 2.0242, "step": 28614 }, { "epoch": 2.941412272587111, "grad_norm": 0.03500881418585777, "learning_rate": 0.01, "loss": 1.9972, "step": 28617 }, { "epoch": 2.9417206290471785, "grad_norm": 0.08639416098594666, "learning_rate": 0.01, "loss": 2.0108, "step": 28620 }, { "epoch": 2.942028985507246, "grad_norm": 0.03335639461874962, "learning_rate": 0.01, "loss": 2.0262, "step": 28623 }, { "epoch": 2.9423373419673142, "grad_norm": 0.045041028410196304, "learning_rate": 0.01, "loss": 2.0102, "step": 28626 }, { "epoch": 2.9426456984273823, "grad_norm": 0.047020211815834045, "learning_rate": 0.01, "loss": 2.0278, "step": 28629 }, { "epoch": 2.94295405488745, "grad_norm": 0.059858404099941254, "learning_rate": 0.01, "loss": 2.0038, "step": 28632 }, { "epoch": 2.9432624113475176, "grad_norm": 0.053345970809459686, "learning_rate": 0.01, "loss": 2.0135, "step": 28635 }, { "epoch": 2.9435707678075858, "grad_norm": 0.05528027564287186, "learning_rate": 0.01, "loss": 2.0044, "step": 28638 }, { "epoch": 2.9438791242676534, "grad_norm": 0.09895586222410202, "learning_rate": 0.01, "loss": 2.0117, "step": 28641 }, { "epoch": 2.944187480727721, "grad_norm": 0.11827319115400314, "learning_rate": 0.01, "loss": 1.9899, "step": 28644 }, { "epoch": 2.944495837187789, "grad_norm": 0.05013230815529823, "learning_rate": 0.01, "loss": 2.0146, "step": 28647 }, { "epoch": 2.944804193647857, "grad_norm": 0.0422658771276474, "learning_rate": 0.01, "loss": 2.0064, "step": 28650 }, { "epoch": 2.9451125501079245, "grad_norm": 0.04022778570652008, "learning_rate": 0.01, "loss": 1.9972, "step": 28653 }, { "epoch": 2.9454209065679926, "grad_norm": 0.043504439294338226, "learning_rate": 0.01, "loss": 2.0138, "step": 28656 }, { "epoch": 2.9457292630280607, "grad_norm": 0.06587618589401245, "learning_rate": 0.01, "loss": 2.0084, "step": 28659 }, { "epoch": 2.9460376194881284, "grad_norm": 0.10226722061634064, "learning_rate": 0.01, "loss": 2.0073, "step": 28662 }, { "epoch": 2.946345975948196, "grad_norm": 0.047040197998285294, "learning_rate": 0.01, "loss": 2.0082, "step": 28665 }, { "epoch": 2.946654332408264, "grad_norm": 0.0919933021068573, "learning_rate": 0.01, "loss": 2.0079, "step": 28668 }, { "epoch": 2.946962688868332, "grad_norm": 0.05978929251432419, "learning_rate": 0.01, "loss": 2.0271, "step": 28671 }, { "epoch": 2.9472710453283995, "grad_norm": 0.05814214050769806, "learning_rate": 0.01, "loss": 2.0224, "step": 28674 }, { "epoch": 2.9475794017884676, "grad_norm": 0.09039480984210968, "learning_rate": 0.01, "loss": 2.0026, "step": 28677 }, { "epoch": 2.947887758248535, "grad_norm": 0.1607222557067871, "learning_rate": 0.01, "loss": 2.0201, "step": 28680 }, { "epoch": 2.948196114708603, "grad_norm": 0.04198214039206505, "learning_rate": 0.01, "loss": 2.0157, "step": 28683 }, { "epoch": 2.948504471168671, "grad_norm": 0.06442588567733765, "learning_rate": 0.01, "loss": 1.9874, "step": 28686 }, { "epoch": 2.948812827628739, "grad_norm": 0.11082901805639267, "learning_rate": 0.01, "loss": 2.0165, "step": 28689 }, { "epoch": 2.9491211840888067, "grad_norm": 0.03669416159391403, "learning_rate": 0.01, "loss": 2.0311, "step": 28692 }, { "epoch": 2.9494295405488744, "grad_norm": 0.06837104260921478, "learning_rate": 0.01, "loss": 2.0052, "step": 28695 }, { "epoch": 2.9497378970089425, "grad_norm": 0.0466892383992672, "learning_rate": 0.01, "loss": 1.9896, "step": 28698 }, { "epoch": 2.95004625346901, "grad_norm": 0.06962305307388306, "learning_rate": 0.01, "loss": 2.023, "step": 28701 }, { "epoch": 2.950354609929078, "grad_norm": 0.06136500835418701, "learning_rate": 0.01, "loss": 2.0189, "step": 28704 }, { "epoch": 2.950662966389146, "grad_norm": 0.050067611038684845, "learning_rate": 0.01, "loss": 2.0258, "step": 28707 }, { "epoch": 2.9509713228492136, "grad_norm": 0.0527566559612751, "learning_rate": 0.01, "loss": 2.0157, "step": 28710 }, { "epoch": 2.9512796793092813, "grad_norm": 0.12275776267051697, "learning_rate": 0.01, "loss": 2.0061, "step": 28713 }, { "epoch": 2.9515880357693494, "grad_norm": 0.052756138145923615, "learning_rate": 0.01, "loss": 2.0235, "step": 28716 }, { "epoch": 2.9518963922294175, "grad_norm": 0.038834694772958755, "learning_rate": 0.01, "loss": 1.9956, "step": 28719 }, { "epoch": 2.952204748689485, "grad_norm": 0.07736565917730331, "learning_rate": 0.01, "loss": 2.0185, "step": 28722 }, { "epoch": 2.952513105149553, "grad_norm": 0.10177972167730331, "learning_rate": 0.01, "loss": 2.017, "step": 28725 }, { "epoch": 2.952821461609621, "grad_norm": 0.082985520362854, "learning_rate": 0.01, "loss": 1.9967, "step": 28728 }, { "epoch": 2.9531298180696886, "grad_norm": 0.05385474115610123, "learning_rate": 0.01, "loss": 2.0239, "step": 28731 }, { "epoch": 2.953438174529756, "grad_norm": 0.0803777202963829, "learning_rate": 0.01, "loss": 2.0155, "step": 28734 }, { "epoch": 2.9537465309898243, "grad_norm": 0.06875913590192795, "learning_rate": 0.01, "loss": 2.0237, "step": 28737 }, { "epoch": 2.954054887449892, "grad_norm": 0.09149770438671112, "learning_rate": 0.01, "loss": 2.0153, "step": 28740 }, { "epoch": 2.95436324390996, "grad_norm": 0.1182807981967926, "learning_rate": 0.01, "loss": 2.0117, "step": 28743 }, { "epoch": 2.9546716003700277, "grad_norm": 0.044717706739902496, "learning_rate": 0.01, "loss": 2.0233, "step": 28746 }, { "epoch": 2.954979956830096, "grad_norm": 0.04887622967362404, "learning_rate": 0.01, "loss": 2.0232, "step": 28749 }, { "epoch": 2.9552883132901635, "grad_norm": 0.041407834738492966, "learning_rate": 0.01, "loss": 1.9945, "step": 28752 }, { "epoch": 2.955596669750231, "grad_norm": 0.05831639841198921, "learning_rate": 0.01, "loss": 2.0485, "step": 28755 }, { "epoch": 2.9559050262102993, "grad_norm": 0.08279699832201004, "learning_rate": 0.01, "loss": 2.0275, "step": 28758 }, { "epoch": 2.956213382670367, "grad_norm": 0.08988698571920395, "learning_rate": 0.01, "loss": 1.9966, "step": 28761 }, { "epoch": 2.9565217391304346, "grad_norm": 0.0645131915807724, "learning_rate": 0.01, "loss": 2.0001, "step": 28764 }, { "epoch": 2.9568300955905027, "grad_norm": 0.08262491226196289, "learning_rate": 0.01, "loss": 1.999, "step": 28767 }, { "epoch": 2.9571384520505704, "grad_norm": 0.03773394227027893, "learning_rate": 0.01, "loss": 2.0108, "step": 28770 }, { "epoch": 2.9574468085106385, "grad_norm": 0.08012068271636963, "learning_rate": 0.01, "loss": 2.0032, "step": 28773 }, { "epoch": 2.957755164970706, "grad_norm": 0.04637681320309639, "learning_rate": 0.01, "loss": 2.0324, "step": 28776 }, { "epoch": 2.9580635214307742, "grad_norm": 0.031988177448511124, "learning_rate": 0.01, "loss": 1.9966, "step": 28779 }, { "epoch": 2.958371877890842, "grad_norm": 0.08325552940368652, "learning_rate": 0.01, "loss": 1.9993, "step": 28782 }, { "epoch": 2.9586802343509095, "grad_norm": 0.06131797283887863, "learning_rate": 0.01, "loss": 1.9963, "step": 28785 }, { "epoch": 2.9589885908109776, "grad_norm": 0.04146185144782066, "learning_rate": 0.01, "loss": 2.0353, "step": 28788 }, { "epoch": 2.9592969472710453, "grad_norm": 0.04004296660423279, "learning_rate": 0.01, "loss": 2.0113, "step": 28791 }, { "epoch": 2.959605303731113, "grad_norm": 0.07771418243646622, "learning_rate": 0.01, "loss": 2.025, "step": 28794 }, { "epoch": 2.959913660191181, "grad_norm": 0.048570699989795685, "learning_rate": 0.01, "loss": 2.0013, "step": 28797 }, { "epoch": 2.9602220166512487, "grad_norm": 0.0664374902844429, "learning_rate": 0.01, "loss": 2.0279, "step": 28800 }, { "epoch": 2.960530373111317, "grad_norm": 0.09744110703468323, "learning_rate": 0.01, "loss": 2.0058, "step": 28803 }, { "epoch": 2.9608387295713845, "grad_norm": 0.03430997580289841, "learning_rate": 0.01, "loss": 1.9996, "step": 28806 }, { "epoch": 2.9611470860314526, "grad_norm": 0.036999545991420746, "learning_rate": 0.01, "loss": 2.0146, "step": 28809 }, { "epoch": 2.9614554424915203, "grad_norm": 0.05290836840867996, "learning_rate": 0.01, "loss": 2.011, "step": 28812 }, { "epoch": 2.961763798951588, "grad_norm": 0.061555683612823486, "learning_rate": 0.01, "loss": 2.0145, "step": 28815 }, { "epoch": 2.962072155411656, "grad_norm": 0.06276807934045792, "learning_rate": 0.01, "loss": 2.0218, "step": 28818 }, { "epoch": 2.9623805118717237, "grad_norm": 0.05685890465974808, "learning_rate": 0.01, "loss": 1.9963, "step": 28821 }, { "epoch": 2.9626888683317913, "grad_norm": 0.09975235909223557, "learning_rate": 0.01, "loss": 2.0082, "step": 28824 }, { "epoch": 2.9629972247918595, "grad_norm": 0.04205232113599777, "learning_rate": 0.01, "loss": 2.0383, "step": 28827 }, { "epoch": 2.963305581251927, "grad_norm": 0.07952512800693512, "learning_rate": 0.01, "loss": 2.0059, "step": 28830 }, { "epoch": 2.963613937711995, "grad_norm": 0.04118579626083374, "learning_rate": 0.01, "loss": 1.9727, "step": 28833 }, { "epoch": 2.963922294172063, "grad_norm": 0.10875571519136429, "learning_rate": 0.01, "loss": 2.0153, "step": 28836 }, { "epoch": 2.964230650632131, "grad_norm": 0.16663865745067596, "learning_rate": 0.01, "loss": 2.001, "step": 28839 }, { "epoch": 2.9645390070921986, "grad_norm": 0.08370403200387955, "learning_rate": 0.01, "loss": 2.0158, "step": 28842 }, { "epoch": 2.9648473635522663, "grad_norm": 0.07209211587905884, "learning_rate": 0.01, "loss": 2.0271, "step": 28845 }, { "epoch": 2.9651557200123344, "grad_norm": 0.05848775431513786, "learning_rate": 0.01, "loss": 2.013, "step": 28848 }, { "epoch": 2.965464076472402, "grad_norm": 0.02791527472436428, "learning_rate": 0.01, "loss": 2.0331, "step": 28851 }, { "epoch": 2.9657724329324697, "grad_norm": 0.03520442545413971, "learning_rate": 0.01, "loss": 2.0346, "step": 28854 }, { "epoch": 2.966080789392538, "grad_norm": 0.03414986655116081, "learning_rate": 0.01, "loss": 2.0067, "step": 28857 }, { "epoch": 2.9663891458526055, "grad_norm": 0.0448157899081707, "learning_rate": 0.01, "loss": 2.0137, "step": 28860 }, { "epoch": 2.9666975023126736, "grad_norm": 0.09282497316598892, "learning_rate": 0.01, "loss": 2.0284, "step": 28863 }, { "epoch": 2.9670058587727413, "grad_norm": 0.05999337136745453, "learning_rate": 0.01, "loss": 2.0256, "step": 28866 }, { "epoch": 2.9673142152328094, "grad_norm": 0.038877278566360474, "learning_rate": 0.01, "loss": 1.9988, "step": 28869 }, { "epoch": 2.967622571692877, "grad_norm": 0.04760310798883438, "learning_rate": 0.01, "loss": 2.016, "step": 28872 }, { "epoch": 2.9679309281529447, "grad_norm": 0.049317944794893265, "learning_rate": 0.01, "loss": 1.9993, "step": 28875 }, { "epoch": 2.968239284613013, "grad_norm": 0.038830939680337906, "learning_rate": 0.01, "loss": 1.9969, "step": 28878 }, { "epoch": 2.9685476410730804, "grad_norm": 0.18675923347473145, "learning_rate": 0.01, "loss": 2.0217, "step": 28881 }, { "epoch": 2.968855997533148, "grad_norm": 0.04269779101014137, "learning_rate": 0.01, "loss": 2.0163, "step": 28884 }, { "epoch": 2.969164353993216, "grad_norm": 0.04740308225154877, "learning_rate": 0.01, "loss": 2.0184, "step": 28887 }, { "epoch": 2.969472710453284, "grad_norm": 0.058595623821020126, "learning_rate": 0.01, "loss": 2.0369, "step": 28890 }, { "epoch": 2.969781066913352, "grad_norm": 0.06136814132332802, "learning_rate": 0.01, "loss": 2.0229, "step": 28893 }, { "epoch": 2.9700894233734196, "grad_norm": 0.059679534286260605, "learning_rate": 0.01, "loss": 2.0151, "step": 28896 }, { "epoch": 2.9703977798334877, "grad_norm": 0.044182952493429184, "learning_rate": 0.01, "loss": 1.9977, "step": 28899 }, { "epoch": 2.9707061362935554, "grad_norm": 0.04152587056159973, "learning_rate": 0.01, "loss": 2.01, "step": 28902 }, { "epoch": 2.971014492753623, "grad_norm": 0.0424608550965786, "learning_rate": 0.01, "loss": 2.0228, "step": 28905 }, { "epoch": 2.971322849213691, "grad_norm": 0.040096018463373184, "learning_rate": 0.01, "loss": 1.9976, "step": 28908 }, { "epoch": 2.971631205673759, "grad_norm": 0.03288499265909195, "learning_rate": 0.01, "loss": 2.0416, "step": 28911 }, { "epoch": 2.9719395621338265, "grad_norm": 0.05150043964385986, "learning_rate": 0.01, "loss": 2.0, "step": 28914 }, { "epoch": 2.9722479185938946, "grad_norm": 0.05535360425710678, "learning_rate": 0.01, "loss": 2.0215, "step": 28917 }, { "epoch": 2.9725562750539622, "grad_norm": 0.09103085100650787, "learning_rate": 0.01, "loss": 2.0227, "step": 28920 }, { "epoch": 2.9728646315140304, "grad_norm": 0.060518983751535416, "learning_rate": 0.01, "loss": 1.9942, "step": 28923 }, { "epoch": 2.973172987974098, "grad_norm": 0.15921367704868317, "learning_rate": 0.01, "loss": 2.0203, "step": 28926 }, { "epoch": 2.973481344434166, "grad_norm": 0.04451125115156174, "learning_rate": 0.01, "loss": 2.0057, "step": 28929 }, { "epoch": 2.973789700894234, "grad_norm": 0.06099553778767586, "learning_rate": 0.01, "loss": 2.0317, "step": 28932 }, { "epoch": 2.9740980573543014, "grad_norm": 0.07864733040332794, "learning_rate": 0.01, "loss": 2.0392, "step": 28935 }, { "epoch": 2.9744064138143695, "grad_norm": 0.04279434680938721, "learning_rate": 0.01, "loss": 2.0189, "step": 28938 }, { "epoch": 2.974714770274437, "grad_norm": 0.06299113482236862, "learning_rate": 0.01, "loss": 2.0379, "step": 28941 }, { "epoch": 2.975023126734505, "grad_norm": 0.04803795740008354, "learning_rate": 0.01, "loss": 2.0161, "step": 28944 }, { "epoch": 2.975331483194573, "grad_norm": 0.04646730050444603, "learning_rate": 0.01, "loss": 2.026, "step": 28947 }, { "epoch": 2.9756398396546406, "grad_norm": 0.05352159962058067, "learning_rate": 0.01, "loss": 2.0114, "step": 28950 }, { "epoch": 2.9759481961147087, "grad_norm": 0.07281997799873352, "learning_rate": 0.01, "loss": 2.0328, "step": 28953 }, { "epoch": 2.9762565525747764, "grad_norm": 0.050049133598804474, "learning_rate": 0.01, "loss": 1.9943, "step": 28956 }, { "epoch": 2.9765649090348445, "grad_norm": 0.039795007556676865, "learning_rate": 0.01, "loss": 2.0128, "step": 28959 }, { "epoch": 2.976873265494912, "grad_norm": 0.09085065871477127, "learning_rate": 0.01, "loss": 2.005, "step": 28962 }, { "epoch": 2.97718162195498, "grad_norm": 0.1592961698770523, "learning_rate": 0.01, "loss": 2.0373, "step": 28965 }, { "epoch": 2.977489978415048, "grad_norm": 0.10092988610267639, "learning_rate": 0.01, "loss": 2.0009, "step": 28968 }, { "epoch": 2.9777983348751156, "grad_norm": 0.046051934361457825, "learning_rate": 0.01, "loss": 1.9942, "step": 28971 }, { "epoch": 2.9781066913351832, "grad_norm": 0.04267173632979393, "learning_rate": 0.01, "loss": 1.9997, "step": 28974 }, { "epoch": 2.9784150477952513, "grad_norm": 0.032134927809238434, "learning_rate": 0.01, "loss": 1.9694, "step": 28977 }, { "epoch": 2.978723404255319, "grad_norm": 0.055023569613695145, "learning_rate": 0.01, "loss": 2.0202, "step": 28980 }, { "epoch": 2.979031760715387, "grad_norm": 0.07177849858999252, "learning_rate": 0.01, "loss": 2.0287, "step": 28983 }, { "epoch": 2.9793401171754548, "grad_norm": 0.05890415981411934, "learning_rate": 0.01, "loss": 2.0226, "step": 28986 }, { "epoch": 2.979648473635523, "grad_norm": 0.04443821310997009, "learning_rate": 0.01, "loss": 2.042, "step": 28989 }, { "epoch": 2.9799568300955905, "grad_norm": 0.04153164103627205, "learning_rate": 0.01, "loss": 2.0111, "step": 28992 }, { "epoch": 2.980265186555658, "grad_norm": 0.07844668626785278, "learning_rate": 0.01, "loss": 2.0281, "step": 28995 }, { "epoch": 2.9805735430157263, "grad_norm": 0.09027374535799026, "learning_rate": 0.01, "loss": 2.0325, "step": 28998 }, { "epoch": 2.980881899475794, "grad_norm": 0.04559837654232979, "learning_rate": 0.01, "loss": 1.9926, "step": 29001 }, { "epoch": 2.9811902559358616, "grad_norm": 0.03771069645881653, "learning_rate": 0.01, "loss": 1.9966, "step": 29004 }, { "epoch": 2.9814986123959297, "grad_norm": 0.09008541703224182, "learning_rate": 0.01, "loss": 2.0246, "step": 29007 }, { "epoch": 2.9818069688559974, "grad_norm": 0.060089852660894394, "learning_rate": 0.01, "loss": 2.0127, "step": 29010 }, { "epoch": 2.9821153253160655, "grad_norm": 0.11291385442018509, "learning_rate": 0.01, "loss": 2.0397, "step": 29013 }, { "epoch": 2.982423681776133, "grad_norm": 0.09720003604888916, "learning_rate": 0.01, "loss": 2.0277, "step": 29016 }, { "epoch": 2.9827320382362013, "grad_norm": 0.060459867119789124, "learning_rate": 0.01, "loss": 1.9914, "step": 29019 }, { "epoch": 2.983040394696269, "grad_norm": 0.07619535177946091, "learning_rate": 0.01, "loss": 1.9964, "step": 29022 }, { "epoch": 2.9833487511563366, "grad_norm": 0.05402089282870293, "learning_rate": 0.01, "loss": 1.9867, "step": 29025 }, { "epoch": 2.9836571076164047, "grad_norm": 0.05162883922457695, "learning_rate": 0.01, "loss": 2.0135, "step": 29028 }, { "epoch": 2.9839654640764723, "grad_norm": 0.037369467318058014, "learning_rate": 0.01, "loss": 2.0232, "step": 29031 }, { "epoch": 2.98427382053654, "grad_norm": 0.05483279377222061, "learning_rate": 0.01, "loss": 2.0208, "step": 29034 }, { "epoch": 2.984582176996608, "grad_norm": 0.050847407430410385, "learning_rate": 0.01, "loss": 2.0043, "step": 29037 }, { "epoch": 2.9848905334566758, "grad_norm": 0.07075628638267517, "learning_rate": 0.01, "loss": 2.0408, "step": 29040 }, { "epoch": 2.985198889916744, "grad_norm": 0.05580511689186096, "learning_rate": 0.01, "loss": 2.0142, "step": 29043 }, { "epoch": 2.9855072463768115, "grad_norm": 0.043689336627721786, "learning_rate": 0.01, "loss": 2.0118, "step": 29046 }, { "epoch": 2.9858156028368796, "grad_norm": 0.07577245682477951, "learning_rate": 0.01, "loss": 2.0287, "step": 29049 }, { "epoch": 2.9861239592969473, "grad_norm": 0.07917779684066772, "learning_rate": 0.01, "loss": 2.0148, "step": 29052 }, { "epoch": 2.986432315757015, "grad_norm": 0.07137954980134964, "learning_rate": 0.01, "loss": 2.0007, "step": 29055 }, { "epoch": 2.986740672217083, "grad_norm": 0.09875428676605225, "learning_rate": 0.01, "loss": 2.0242, "step": 29058 }, { "epoch": 2.9870490286771507, "grad_norm": 0.059274058789014816, "learning_rate": 0.01, "loss": 2.0212, "step": 29061 }, { "epoch": 2.9873573851372184, "grad_norm": 0.04235726222395897, "learning_rate": 0.01, "loss": 2.0178, "step": 29064 }, { "epoch": 2.9876657415972865, "grad_norm": 0.1677253097295761, "learning_rate": 0.01, "loss": 1.9921, "step": 29067 }, { "epoch": 2.987974098057354, "grad_norm": 0.05953408405184746, "learning_rate": 0.01, "loss": 2.0138, "step": 29070 }, { "epoch": 2.9882824545174222, "grad_norm": 0.031471315771341324, "learning_rate": 0.01, "loss": 1.9953, "step": 29073 }, { "epoch": 2.98859081097749, "grad_norm": 0.0490463487803936, "learning_rate": 0.01, "loss": 2.0193, "step": 29076 }, { "epoch": 2.988899167437558, "grad_norm": 0.0685456171631813, "learning_rate": 0.01, "loss": 2.0459, "step": 29079 }, { "epoch": 2.9892075238976257, "grad_norm": 0.04472583159804344, "learning_rate": 0.01, "loss": 1.9909, "step": 29082 }, { "epoch": 2.9895158803576933, "grad_norm": 0.03528788685798645, "learning_rate": 0.01, "loss": 2.0024, "step": 29085 }, { "epoch": 2.9898242368177614, "grad_norm": 0.04196497052907944, "learning_rate": 0.01, "loss": 2.0015, "step": 29088 }, { "epoch": 2.990132593277829, "grad_norm": 0.1347953975200653, "learning_rate": 0.01, "loss": 1.9983, "step": 29091 }, { "epoch": 2.9904409497378968, "grad_norm": 0.03483536094427109, "learning_rate": 0.01, "loss": 2.0038, "step": 29094 }, { "epoch": 2.990749306197965, "grad_norm": 0.04203316941857338, "learning_rate": 0.01, "loss": 2.0364, "step": 29097 }, { "epoch": 2.9910576626580325, "grad_norm": 0.039628706872463226, "learning_rate": 0.01, "loss": 2.0212, "step": 29100 }, { "epoch": 2.9913660191181006, "grad_norm": 0.04524881765246391, "learning_rate": 0.01, "loss": 2.0436, "step": 29103 }, { "epoch": 2.9916743755781683, "grad_norm": 0.0704786404967308, "learning_rate": 0.01, "loss": 2.0084, "step": 29106 }, { "epoch": 2.9919827320382364, "grad_norm": 0.06169109791517258, "learning_rate": 0.01, "loss": 1.9879, "step": 29109 }, { "epoch": 2.992291088498304, "grad_norm": 0.05929429456591606, "learning_rate": 0.01, "loss": 2.0341, "step": 29112 }, { "epoch": 2.9925994449583717, "grad_norm": 0.06046414375305176, "learning_rate": 0.01, "loss": 2.0212, "step": 29115 }, { "epoch": 2.99290780141844, "grad_norm": 0.03632686287164688, "learning_rate": 0.01, "loss": 2.0288, "step": 29118 }, { "epoch": 2.9932161578785075, "grad_norm": 0.0416223518550396, "learning_rate": 0.01, "loss": 1.9955, "step": 29121 }, { "epoch": 2.993524514338575, "grad_norm": 0.033993568271398544, "learning_rate": 0.01, "loss": 2.0373, "step": 29124 }, { "epoch": 2.9938328707986432, "grad_norm": 0.11413303017616272, "learning_rate": 0.01, "loss": 2.0219, "step": 29127 }, { "epoch": 2.9941412272587113, "grad_norm": 0.04512523114681244, "learning_rate": 0.01, "loss": 2.0197, "step": 29130 }, { "epoch": 2.994449583718779, "grad_norm": 0.08577805757522583, "learning_rate": 0.01, "loss": 2.0263, "step": 29133 }, { "epoch": 2.9947579401788467, "grad_norm": 0.09573300927877426, "learning_rate": 0.01, "loss": 2.0057, "step": 29136 }, { "epoch": 2.9950662966389148, "grad_norm": 0.04170147702097893, "learning_rate": 0.01, "loss": 2.0287, "step": 29139 }, { "epoch": 2.9953746530989824, "grad_norm": 0.04233024641871452, "learning_rate": 0.01, "loss": 2.0084, "step": 29142 }, { "epoch": 2.99568300955905, "grad_norm": 0.05406009405851364, "learning_rate": 0.01, "loss": 2.0249, "step": 29145 }, { "epoch": 2.995991366019118, "grad_norm": 0.037997808307409286, "learning_rate": 0.01, "loss": 2.0106, "step": 29148 }, { "epoch": 2.996299722479186, "grad_norm": 0.08010167628526688, "learning_rate": 0.01, "loss": 1.9899, "step": 29151 }, { "epoch": 2.9966080789392535, "grad_norm": 0.05372076854109764, "learning_rate": 0.01, "loss": 2.0406, "step": 29154 }, { "epoch": 2.9969164353993216, "grad_norm": 0.04186830669641495, "learning_rate": 0.01, "loss": 2.0047, "step": 29157 }, { "epoch": 2.9972247918593897, "grad_norm": 0.0803917944431305, "learning_rate": 0.01, "loss": 2.0106, "step": 29160 }, { "epoch": 2.9975331483194574, "grad_norm": 0.06086145341396332, "learning_rate": 0.01, "loss": 2.0016, "step": 29163 }, { "epoch": 2.997841504779525, "grad_norm": 0.1197366788983345, "learning_rate": 0.01, "loss": 2.0025, "step": 29166 }, { "epoch": 2.998149861239593, "grad_norm": 0.09424477070569992, "learning_rate": 0.01, "loss": 2.0032, "step": 29169 }, { "epoch": 2.998458217699661, "grad_norm": 0.05025864019989967, "learning_rate": 0.01, "loss": 2.0166, "step": 29172 }, { "epoch": 2.9987665741597285, "grad_norm": 0.0840819776058197, "learning_rate": 0.01, "loss": 1.9983, "step": 29175 }, { "epoch": 2.9990749306197966, "grad_norm": 0.03415597230195999, "learning_rate": 0.01, "loss": 2.0233, "step": 29178 }, { "epoch": 2.9993832870798642, "grad_norm": 0.09066252410411835, "learning_rate": 0.01, "loss": 2.0245, "step": 29181 }, { "epoch": 2.999691643539932, "grad_norm": 0.05019732564687729, "learning_rate": 0.01, "loss": 1.9893, "step": 29184 }, { "epoch": 3.0, "grad_norm": 0.09318925440311432, "learning_rate": 0.01, "loss": 2.0062, "step": 29187 }, { "epoch": 2.999383477188656, "grad_norm": 0.08568185567855835, "learning_rate": 0.01, "loss": 2.0409, "step": 29190 }, { "epoch": 2.9996917385943282, "grad_norm": 0.05479726567864418, "learning_rate": 0.01, "loss": 1.9992, "step": 29193 }, { "epoch": 3.0, "grad_norm": 0.0812709629535675, "learning_rate": 0.01, "loss": 2.0193, "step": 29196 }, { "epoch": 3.000308261405672, "grad_norm": 0.06423875689506531, "learning_rate": 0.01, "loss": 2.0037, "step": 29199 }, { "epoch": 3.000616522811344, "grad_norm": 0.06900475919246674, "learning_rate": 0.01, "loss": 2.0235, "step": 29202 }, { "epoch": 3.000924784217016, "grad_norm": 0.09736461192369461, "learning_rate": 0.01, "loss": 2.0269, "step": 29205 }, { "epoch": 3.001233045622688, "grad_norm": 0.10431299358606339, "learning_rate": 0.01, "loss": 2.0366, "step": 29208 }, { "epoch": 3.00154130702836, "grad_norm": 0.06412489712238312, "learning_rate": 0.01, "loss": 2.0336, "step": 29211 }, { "epoch": 3.0018495684340323, "grad_norm": 0.07305043190717697, "learning_rate": 0.01, "loss": 2.0355, "step": 29214 }, { "epoch": 3.002157829839704, "grad_norm": 0.07680006325244904, "learning_rate": 0.01, "loss": 2.031, "step": 29217 }, { "epoch": 3.0024660912453762, "grad_norm": 0.04416871443390846, "learning_rate": 0.01, "loss": 2.0228, "step": 29220 }, { "epoch": 3.002774352651048, "grad_norm": 0.05895330011844635, "learning_rate": 0.01, "loss": 2.0124, "step": 29223 }, { "epoch": 3.00308261405672, "grad_norm": 0.16763944923877716, "learning_rate": 0.01, "loss": 2.0521, "step": 29226 }, { "epoch": 3.003390875462392, "grad_norm": 0.04152580350637436, "learning_rate": 0.01, "loss": 2.0144, "step": 29229 }, { "epoch": 3.003699136868064, "grad_norm": 0.05650210753083229, "learning_rate": 0.01, "loss": 2.0373, "step": 29232 }, { "epoch": 3.0040073982737363, "grad_norm": 0.10183783620595932, "learning_rate": 0.01, "loss": 2.0374, "step": 29235 }, { "epoch": 3.004315659679408, "grad_norm": 0.11545984447002411, "learning_rate": 0.01, "loss": 2.0359, "step": 29238 }, { "epoch": 3.0046239210850803, "grad_norm": 0.07768990844488144, "learning_rate": 0.01, "loss": 2.0222, "step": 29241 }, { "epoch": 3.004932182490752, "grad_norm": 0.06256531924009323, "learning_rate": 0.01, "loss": 2.0332, "step": 29244 }, { "epoch": 3.005240443896424, "grad_norm": 0.041665416210889816, "learning_rate": 0.01, "loss": 2.0061, "step": 29247 }, { "epoch": 3.0055487053020964, "grad_norm": 0.051996082067489624, "learning_rate": 0.01, "loss": 2.0237, "step": 29250 }, { "epoch": 3.005856966707768, "grad_norm": 0.032815441489219666, "learning_rate": 0.01, "loss": 2.0203, "step": 29253 }, { "epoch": 3.0061652281134403, "grad_norm": 0.060963522642850876, "learning_rate": 0.01, "loss": 2.0293, "step": 29256 }, { "epoch": 3.006473489519112, "grad_norm": 0.11818858236074448, "learning_rate": 0.01, "loss": 2.0246, "step": 29259 }, { "epoch": 3.0067817509247843, "grad_norm": 0.08653881400823593, "learning_rate": 0.01, "loss": 2.0208, "step": 29262 }, { "epoch": 3.007090012330456, "grad_norm": 0.09629751741886139, "learning_rate": 0.01, "loss": 1.9929, "step": 29265 }, { "epoch": 3.0073982737361282, "grad_norm": 0.04238956794142723, "learning_rate": 0.01, "loss": 2.0112, "step": 29268 }, { "epoch": 3.0077065351418004, "grad_norm": 0.040573202073574066, "learning_rate": 0.01, "loss": 2.0159, "step": 29271 }, { "epoch": 3.008014796547472, "grad_norm": 0.08061878383159637, "learning_rate": 0.01, "loss": 2.0562, "step": 29274 }, { "epoch": 3.0083230579531444, "grad_norm": 0.06944199651479721, "learning_rate": 0.01, "loss": 2.0077, "step": 29277 }, { "epoch": 3.008631319358816, "grad_norm": 0.06629909574985504, "learning_rate": 0.01, "loss": 1.9934, "step": 29280 }, { "epoch": 3.0089395807644883, "grad_norm": 0.06654530018568039, "learning_rate": 0.01, "loss": 2.0018, "step": 29283 }, { "epoch": 3.0092478421701605, "grad_norm": 0.08806800842285156, "learning_rate": 0.01, "loss": 2.0033, "step": 29286 }, { "epoch": 3.0095561035758323, "grad_norm": 0.12367472797632217, "learning_rate": 0.01, "loss": 2.0382, "step": 29289 }, { "epoch": 3.0098643649815044, "grad_norm": 0.052121471613645554, "learning_rate": 0.01, "loss": 2.0278, "step": 29292 }, { "epoch": 3.010172626387176, "grad_norm": 0.0409090481698513, "learning_rate": 0.01, "loss": 2.0146, "step": 29295 }, { "epoch": 3.0104808877928484, "grad_norm": 0.09956279397010803, "learning_rate": 0.01, "loss": 2.0339, "step": 29298 }, { "epoch": 3.01078914919852, "grad_norm": 0.04090806096792221, "learning_rate": 0.01, "loss": 2.031, "step": 29301 }, { "epoch": 3.0110974106041923, "grad_norm": 0.06308458000421524, "learning_rate": 0.01, "loss": 2.0347, "step": 29304 }, { "epoch": 3.0114056720098645, "grad_norm": 0.05140318349003792, "learning_rate": 0.01, "loss": 2.0341, "step": 29307 }, { "epoch": 3.0117139334155363, "grad_norm": 0.0383441224694252, "learning_rate": 0.01, "loss": 2.0167, "step": 29310 }, { "epoch": 3.0120221948212085, "grad_norm": 0.06803669035434723, "learning_rate": 0.01, "loss": 2.0442, "step": 29313 }, { "epoch": 3.0123304562268802, "grad_norm": 0.042336028069257736, "learning_rate": 0.01, "loss": 2.018, "step": 29316 }, { "epoch": 3.0126387176325524, "grad_norm": 0.052575305104255676, "learning_rate": 0.01, "loss": 1.9956, "step": 29319 }, { "epoch": 3.0129469790382246, "grad_norm": 0.04428831860423088, "learning_rate": 0.01, "loss": 2.0225, "step": 29322 }, { "epoch": 3.0132552404438964, "grad_norm": 0.03720409423112869, "learning_rate": 0.01, "loss": 2.0135, "step": 29325 }, { "epoch": 3.0135635018495686, "grad_norm": 0.03491399809718132, "learning_rate": 0.01, "loss": 1.9993, "step": 29328 }, { "epoch": 3.0138717632552403, "grad_norm": 0.11868512630462646, "learning_rate": 0.01, "loss": 2.014, "step": 29331 }, { "epoch": 3.0141800246609125, "grad_norm": 0.05709204822778702, "learning_rate": 0.01, "loss": 2.0043, "step": 29334 }, { "epoch": 3.0144882860665843, "grad_norm": 0.09501231461763382, "learning_rate": 0.01, "loss": 2.0056, "step": 29337 }, { "epoch": 3.0147965474722564, "grad_norm": 0.04431547597050667, "learning_rate": 0.01, "loss": 2.0223, "step": 29340 }, { "epoch": 3.0151048088779286, "grad_norm": 0.07580556720495224, "learning_rate": 0.01, "loss": 2.0343, "step": 29343 }, { "epoch": 3.0154130702836004, "grad_norm": 0.05567536503076553, "learning_rate": 0.01, "loss": 2.0327, "step": 29346 }, { "epoch": 3.0157213316892726, "grad_norm": 0.03637940436601639, "learning_rate": 0.01, "loss": 2.0366, "step": 29349 }, { "epoch": 3.0160295930949443, "grad_norm": 0.07379139959812164, "learning_rate": 0.01, "loss": 2.0196, "step": 29352 }, { "epoch": 3.0163378545006165, "grad_norm": 0.08311998844146729, "learning_rate": 0.01, "loss": 2.0013, "step": 29355 }, { "epoch": 3.0166461159062887, "grad_norm": 0.11170487850904465, "learning_rate": 0.01, "loss": 2.0174, "step": 29358 }, { "epoch": 3.0169543773119605, "grad_norm": 0.055827848613262177, "learning_rate": 0.01, "loss": 2.0184, "step": 29361 }, { "epoch": 3.0172626387176327, "grad_norm": 0.06052641570568085, "learning_rate": 0.01, "loss": 2.0238, "step": 29364 }, { "epoch": 3.0175709001233044, "grad_norm": 0.03150554001331329, "learning_rate": 0.01, "loss": 2.024, "step": 29367 }, { "epoch": 3.0178791615289766, "grad_norm": 0.06298622488975525, "learning_rate": 0.01, "loss": 2.0132, "step": 29370 }, { "epoch": 3.0181874229346484, "grad_norm": 0.09742710739374161, "learning_rate": 0.01, "loss": 2.0411, "step": 29373 }, { "epoch": 3.0184956843403206, "grad_norm": 0.07219108939170837, "learning_rate": 0.01, "loss": 2.0113, "step": 29376 }, { "epoch": 3.0188039457459928, "grad_norm": 0.05558139458298683, "learning_rate": 0.01, "loss": 2.0075, "step": 29379 }, { "epoch": 3.0191122071516645, "grad_norm": 0.05763263627886772, "learning_rate": 0.01, "loss": 2.0433, "step": 29382 }, { "epoch": 3.0194204685573367, "grad_norm": 0.04832174628973007, "learning_rate": 0.01, "loss": 2.007, "step": 29385 }, { "epoch": 3.0197287299630085, "grad_norm": 0.03823337331414223, "learning_rate": 0.01, "loss": 2.0307, "step": 29388 }, { "epoch": 3.0200369913686806, "grad_norm": 0.05253903195261955, "learning_rate": 0.01, "loss": 2.0018, "step": 29391 }, { "epoch": 3.020345252774353, "grad_norm": 0.10889255255460739, "learning_rate": 0.01, "loss": 2.0106, "step": 29394 }, { "epoch": 3.0206535141800246, "grad_norm": 0.04247021675109863, "learning_rate": 0.01, "loss": 2.0466, "step": 29397 }, { "epoch": 3.020961775585697, "grad_norm": 0.09347319602966309, "learning_rate": 0.01, "loss": 2.037, "step": 29400 }, { "epoch": 3.0212700369913685, "grad_norm": 0.05651739612221718, "learning_rate": 0.01, "loss": 2.0385, "step": 29403 }, { "epoch": 3.0215782983970407, "grad_norm": 0.0666181743144989, "learning_rate": 0.01, "loss": 2.0274, "step": 29406 }, { "epoch": 3.021886559802713, "grad_norm": 0.053186848759651184, "learning_rate": 0.01, "loss": 2.0127, "step": 29409 }, { "epoch": 3.0221948212083847, "grad_norm": 0.05201537534594536, "learning_rate": 0.01, "loss": 2.0009, "step": 29412 }, { "epoch": 3.022503082614057, "grad_norm": 0.02726483717560768, "learning_rate": 0.01, "loss": 2.0135, "step": 29415 }, { "epoch": 3.0228113440197286, "grad_norm": 0.10047302395105362, "learning_rate": 0.01, "loss": 2.0336, "step": 29418 }, { "epoch": 3.023119605425401, "grad_norm": 0.05461571365594864, "learning_rate": 0.01, "loss": 2.0213, "step": 29421 }, { "epoch": 3.0234278668310726, "grad_norm": 0.08373844623565674, "learning_rate": 0.01, "loss": 2.0246, "step": 29424 }, { "epoch": 3.0237361282367448, "grad_norm": 0.045885663479566574, "learning_rate": 0.01, "loss": 2.044, "step": 29427 }, { "epoch": 3.024044389642417, "grad_norm": 0.054790932685136795, "learning_rate": 0.01, "loss": 2.0387, "step": 29430 }, { "epoch": 3.0243526510480887, "grad_norm": 0.04917832091450691, "learning_rate": 0.01, "loss": 2.025, "step": 29433 }, { "epoch": 3.024660912453761, "grad_norm": 0.09375031292438507, "learning_rate": 0.01, "loss": 2.0482, "step": 29436 }, { "epoch": 3.0249691738594326, "grad_norm": 0.051234230399131775, "learning_rate": 0.01, "loss": 2.0024, "step": 29439 }, { "epoch": 3.025277435265105, "grad_norm": 0.057380858808755875, "learning_rate": 0.01, "loss": 2.0011, "step": 29442 }, { "epoch": 3.025585696670777, "grad_norm": 0.060605574399232864, "learning_rate": 0.01, "loss": 2.0158, "step": 29445 }, { "epoch": 3.025893958076449, "grad_norm": 0.09125442057847977, "learning_rate": 0.01, "loss": 2.0284, "step": 29448 }, { "epoch": 3.026202219482121, "grad_norm": 0.04081615433096886, "learning_rate": 0.01, "loss": 2.0029, "step": 29451 }, { "epoch": 3.0265104808877927, "grad_norm": 0.05347365140914917, "learning_rate": 0.01, "loss": 2.0319, "step": 29454 }, { "epoch": 3.026818742293465, "grad_norm": 0.04458871856331825, "learning_rate": 0.01, "loss": 2.0035, "step": 29457 }, { "epoch": 3.0271270036991367, "grad_norm": 0.04996645078063011, "learning_rate": 0.01, "loss": 2.0046, "step": 29460 }, { "epoch": 3.027435265104809, "grad_norm": 0.051296427845954895, "learning_rate": 0.01, "loss": 2.0099, "step": 29463 }, { "epoch": 3.027743526510481, "grad_norm": 0.05448664352297783, "learning_rate": 0.01, "loss": 2.0082, "step": 29466 }, { "epoch": 3.028051787916153, "grad_norm": 0.0587022639811039, "learning_rate": 0.01, "loss": 2.0083, "step": 29469 }, { "epoch": 3.028360049321825, "grad_norm": 0.0711282268166542, "learning_rate": 0.01, "loss": 1.9997, "step": 29472 }, { "epoch": 3.0286683107274968, "grad_norm": 0.14440664649009705, "learning_rate": 0.01, "loss": 2.0525, "step": 29475 }, { "epoch": 3.028976572133169, "grad_norm": 0.06147081404924393, "learning_rate": 0.01, "loss": 1.9979, "step": 29478 }, { "epoch": 3.029284833538841, "grad_norm": 0.0842541828751564, "learning_rate": 0.01, "loss": 2.0075, "step": 29481 }, { "epoch": 3.029593094944513, "grad_norm": 0.04915475845336914, "learning_rate": 0.01, "loss": 2.029, "step": 29484 }, { "epoch": 3.029901356350185, "grad_norm": 0.04254012182354927, "learning_rate": 0.01, "loss": 2.0406, "step": 29487 }, { "epoch": 3.030209617755857, "grad_norm": 0.03716140240430832, "learning_rate": 0.01, "loss": 2.0477, "step": 29490 }, { "epoch": 3.030517879161529, "grad_norm": 0.09156777709722519, "learning_rate": 0.01, "loss": 2.0178, "step": 29493 }, { "epoch": 3.030826140567201, "grad_norm": 0.05401970446109772, "learning_rate": 0.01, "loss": 2.005, "step": 29496 }, { "epoch": 3.031134401972873, "grad_norm": 0.11564016342163086, "learning_rate": 0.01, "loss": 2.0344, "step": 29499 }, { "epoch": 3.031442663378545, "grad_norm": 0.12813927233219147, "learning_rate": 0.01, "loss": 2.0159, "step": 29502 }, { "epoch": 3.031750924784217, "grad_norm": 0.04971994087100029, "learning_rate": 0.01, "loss": 2.0122, "step": 29505 }, { "epoch": 3.032059186189889, "grad_norm": 0.037013307213783264, "learning_rate": 0.01, "loss": 2.0165, "step": 29508 }, { "epoch": 3.032367447595561, "grad_norm": 0.048204224556684494, "learning_rate": 0.01, "loss": 2.0635, "step": 29511 }, { "epoch": 3.032675709001233, "grad_norm": 0.034393493086099625, "learning_rate": 0.01, "loss": 2.0283, "step": 29514 }, { "epoch": 3.0329839704069053, "grad_norm": 0.041031621396541595, "learning_rate": 0.01, "loss": 2.0062, "step": 29517 }, { "epoch": 3.033292231812577, "grad_norm": 0.049610815942287445, "learning_rate": 0.01, "loss": 2.015, "step": 29520 }, { "epoch": 3.033600493218249, "grad_norm": 0.07062069326639175, "learning_rate": 0.01, "loss": 2.0216, "step": 29523 }, { "epoch": 3.033908754623921, "grad_norm": 0.040892720222473145, "learning_rate": 0.01, "loss": 2.0223, "step": 29526 }, { "epoch": 3.034217016029593, "grad_norm": 0.036804720759391785, "learning_rate": 0.01, "loss": 2.0303, "step": 29529 }, { "epoch": 3.034525277435265, "grad_norm": 0.06749982386827469, "learning_rate": 0.01, "loss": 2.0267, "step": 29532 }, { "epoch": 3.034833538840937, "grad_norm": 0.07996654510498047, "learning_rate": 0.01, "loss": 2.0587, "step": 29535 }, { "epoch": 3.0351418002466093, "grad_norm": 0.05621659383177757, "learning_rate": 0.01, "loss": 2.0119, "step": 29538 }, { "epoch": 3.035450061652281, "grad_norm": 0.04464147612452507, "learning_rate": 0.01, "loss": 2.0308, "step": 29541 }, { "epoch": 3.0357583230579532, "grad_norm": 0.04619302973151207, "learning_rate": 0.01, "loss": 2.0028, "step": 29544 }, { "epoch": 3.036066584463625, "grad_norm": 0.09262579679489136, "learning_rate": 0.01, "loss": 2.0457, "step": 29547 }, { "epoch": 3.036374845869297, "grad_norm": 0.07954932749271393, "learning_rate": 0.01, "loss": 1.9981, "step": 29550 }, { "epoch": 3.0366831072749694, "grad_norm": 0.08746150135993958, "learning_rate": 0.01, "loss": 2.0072, "step": 29553 }, { "epoch": 3.036991368680641, "grad_norm": 0.04415113106369972, "learning_rate": 0.01, "loss": 2.0113, "step": 29556 }, { "epoch": 3.0372996300863133, "grad_norm": 0.08307299762964249, "learning_rate": 0.01, "loss": 2.0329, "step": 29559 }, { "epoch": 3.037607891491985, "grad_norm": 0.05882325395941734, "learning_rate": 0.01, "loss": 2.0094, "step": 29562 }, { "epoch": 3.0379161528976573, "grad_norm": 0.08524999022483826, "learning_rate": 0.01, "loss": 2.0039, "step": 29565 }, { "epoch": 3.038224414303329, "grad_norm": 0.07542447000741959, "learning_rate": 0.01, "loss": 2.0156, "step": 29568 }, { "epoch": 3.038532675709001, "grad_norm": 0.06394769251346588, "learning_rate": 0.01, "loss": 2.0551, "step": 29571 }, { "epoch": 3.0388409371146734, "grad_norm": 0.07811316847801208, "learning_rate": 0.01, "loss": 2.0198, "step": 29574 }, { "epoch": 3.039149198520345, "grad_norm": 0.0849740207195282, "learning_rate": 0.01, "loss": 2.0276, "step": 29577 }, { "epoch": 3.0394574599260173, "grad_norm": 0.05540047585964203, "learning_rate": 0.01, "loss": 2.0383, "step": 29580 }, { "epoch": 3.039765721331689, "grad_norm": 0.044082462787628174, "learning_rate": 0.01, "loss": 1.9947, "step": 29583 }, { "epoch": 3.0400739827373613, "grad_norm": 0.11179853975772858, "learning_rate": 0.01, "loss": 2.011, "step": 29586 }, { "epoch": 3.0403822441430335, "grad_norm": 0.10160455852746964, "learning_rate": 0.01, "loss": 2.0303, "step": 29589 }, { "epoch": 3.0406905055487052, "grad_norm": 0.08072539418935776, "learning_rate": 0.01, "loss": 2.0351, "step": 29592 }, { "epoch": 3.0409987669543774, "grad_norm": 0.039637934416532516, "learning_rate": 0.01, "loss": 2.0304, "step": 29595 }, { "epoch": 3.041307028360049, "grad_norm": 0.05847008153796196, "learning_rate": 0.01, "loss": 2.02, "step": 29598 }, { "epoch": 3.0416152897657214, "grad_norm": 0.04309094697237015, "learning_rate": 0.01, "loss": 2.0445, "step": 29601 }, { "epoch": 3.041923551171393, "grad_norm": 0.04230069741606712, "learning_rate": 0.01, "loss": 2.0014, "step": 29604 }, { "epoch": 3.0422318125770653, "grad_norm": 0.08122226595878601, "learning_rate": 0.01, "loss": 2.0214, "step": 29607 }, { "epoch": 3.0425400739827375, "grad_norm": 0.05134423449635506, "learning_rate": 0.01, "loss": 1.9923, "step": 29610 }, { "epoch": 3.0428483353884093, "grad_norm": 0.06343290954828262, "learning_rate": 0.01, "loss": 2.0146, "step": 29613 }, { "epoch": 3.0431565967940815, "grad_norm": 0.04415202513337135, "learning_rate": 0.01, "loss": 2.0189, "step": 29616 }, { "epoch": 3.043464858199753, "grad_norm": 0.09160872548818588, "learning_rate": 0.01, "loss": 2.0045, "step": 29619 }, { "epoch": 3.0437731196054254, "grad_norm": 0.0800345167517662, "learning_rate": 0.01, "loss": 2.0214, "step": 29622 }, { "epoch": 3.0440813810110976, "grad_norm": 0.11805953085422516, "learning_rate": 0.01, "loss": 2.0165, "step": 29625 }, { "epoch": 3.0443896424167693, "grad_norm": 0.036926135420799255, "learning_rate": 0.01, "loss": 2.0115, "step": 29628 }, { "epoch": 3.0446979038224415, "grad_norm": 0.04666229337453842, "learning_rate": 0.01, "loss": 2.0178, "step": 29631 }, { "epoch": 3.0450061652281133, "grad_norm": 0.152203768491745, "learning_rate": 0.01, "loss": 2.0173, "step": 29634 }, { "epoch": 3.0453144266337855, "grad_norm": 0.07508762180805206, "learning_rate": 0.01, "loss": 2.0276, "step": 29637 }, { "epoch": 3.0456226880394572, "grad_norm": 0.07548423856496811, "learning_rate": 0.01, "loss": 2.006, "step": 29640 }, { "epoch": 3.0459309494451294, "grad_norm": 0.03427097201347351, "learning_rate": 0.01, "loss": 2.0229, "step": 29643 }, { "epoch": 3.0462392108508016, "grad_norm": 0.0399865061044693, "learning_rate": 0.01, "loss": 2.012, "step": 29646 }, { "epoch": 3.0465474722564734, "grad_norm": 0.03335277736186981, "learning_rate": 0.01, "loss": 2.0083, "step": 29649 }, { "epoch": 3.0468557336621456, "grad_norm": 0.0520477369427681, "learning_rate": 0.01, "loss": 2.0389, "step": 29652 }, { "epoch": 3.0471639950678173, "grad_norm": 0.08565925061702728, "learning_rate": 0.01, "loss": 2.0088, "step": 29655 }, { "epoch": 3.0474722564734895, "grad_norm": 0.062498703598976135, "learning_rate": 0.01, "loss": 1.9971, "step": 29658 }, { "epoch": 3.0477805178791617, "grad_norm": 0.08171094208955765, "learning_rate": 0.01, "loss": 1.9849, "step": 29661 }, { "epoch": 3.0480887792848335, "grad_norm": 0.06803334504365921, "learning_rate": 0.01, "loss": 2.0127, "step": 29664 }, { "epoch": 3.0483970406905057, "grad_norm": 0.113568514585495, "learning_rate": 0.01, "loss": 2.0074, "step": 29667 }, { "epoch": 3.0487053020961774, "grad_norm": 0.049806151539087296, "learning_rate": 0.01, "loss": 2.0144, "step": 29670 }, { "epoch": 3.0490135635018496, "grad_norm": 0.05908326804637909, "learning_rate": 0.01, "loss": 2.0229, "step": 29673 }, { "epoch": 3.049321824907522, "grad_norm": 0.0421968549489975, "learning_rate": 0.01, "loss": 2.038, "step": 29676 }, { "epoch": 3.0496300863131935, "grad_norm": 0.04743592441082001, "learning_rate": 0.01, "loss": 2.0126, "step": 29679 }, { "epoch": 3.0499383477188657, "grad_norm": 0.11457180231809616, "learning_rate": 0.01, "loss": 2.0057, "step": 29682 }, { "epoch": 3.0502466091245375, "grad_norm": 0.052697159349918365, "learning_rate": 0.01, "loss": 2.0276, "step": 29685 }, { "epoch": 3.0505548705302097, "grad_norm": 0.140494704246521, "learning_rate": 0.01, "loss": 2.0083, "step": 29688 }, { "epoch": 3.0508631319358814, "grad_norm": 0.03548673912882805, "learning_rate": 0.01, "loss": 1.9985, "step": 29691 }, { "epoch": 3.0511713933415536, "grad_norm": 0.03223096579313278, "learning_rate": 0.01, "loss": 2.0254, "step": 29694 }, { "epoch": 3.051479654747226, "grad_norm": 0.040596138685941696, "learning_rate": 0.01, "loss": 2.0179, "step": 29697 }, { "epoch": 3.0517879161528976, "grad_norm": 0.09559677541255951, "learning_rate": 0.01, "loss": 2.0298, "step": 29700 }, { "epoch": 3.0520961775585698, "grad_norm": 0.10038384050130844, "learning_rate": 0.01, "loss": 1.9998, "step": 29703 }, { "epoch": 3.0524044389642415, "grad_norm": 0.06066306680440903, "learning_rate": 0.01, "loss": 2.0395, "step": 29706 }, { "epoch": 3.0527127003699137, "grad_norm": 0.08278308808803558, "learning_rate": 0.01, "loss": 2.0231, "step": 29709 }, { "epoch": 3.053020961775586, "grad_norm": 0.05971555784344673, "learning_rate": 0.01, "loss": 2.0275, "step": 29712 }, { "epoch": 3.0533292231812577, "grad_norm": 0.036974966526031494, "learning_rate": 0.01, "loss": 2.0315, "step": 29715 }, { "epoch": 3.05363748458693, "grad_norm": 0.0310160294175148, "learning_rate": 0.01, "loss": 2.0049, "step": 29718 }, { "epoch": 3.0539457459926016, "grad_norm": 0.03587731346487999, "learning_rate": 0.01, "loss": 2.019, "step": 29721 }, { "epoch": 3.054254007398274, "grad_norm": 0.05497679486870766, "learning_rate": 0.01, "loss": 2.0321, "step": 29724 }, { "epoch": 3.0545622688039455, "grad_norm": 0.06713774055242538, "learning_rate": 0.01, "loss": 1.9943, "step": 29727 }, { "epoch": 3.0548705302096177, "grad_norm": 0.034498222172260284, "learning_rate": 0.01, "loss": 2.0304, "step": 29730 }, { "epoch": 3.05517879161529, "grad_norm": 0.12036336213350296, "learning_rate": 0.01, "loss": 2.0249, "step": 29733 }, { "epoch": 3.0554870530209617, "grad_norm": 0.08315537869930267, "learning_rate": 0.01, "loss": 2.0003, "step": 29736 }, { "epoch": 3.055795314426634, "grad_norm": 0.041693978011608124, "learning_rate": 0.01, "loss": 2.0117, "step": 29739 }, { "epoch": 3.0561035758323056, "grad_norm": 0.08889281749725342, "learning_rate": 0.01, "loss": 2.0256, "step": 29742 }, { "epoch": 3.056411837237978, "grad_norm": 0.07068068534135818, "learning_rate": 0.01, "loss": 2.0018, "step": 29745 }, { "epoch": 3.05672009864365, "grad_norm": 0.09744048863649368, "learning_rate": 0.01, "loss": 2.001, "step": 29748 }, { "epoch": 3.0570283600493218, "grad_norm": 0.04529158025979996, "learning_rate": 0.01, "loss": 2.0084, "step": 29751 }, { "epoch": 3.057336621454994, "grad_norm": 0.041666992008686066, "learning_rate": 0.01, "loss": 2.031, "step": 29754 }, { "epoch": 3.0576448828606657, "grad_norm": 0.07549773156642914, "learning_rate": 0.01, "loss": 2.0274, "step": 29757 }, { "epoch": 3.057953144266338, "grad_norm": 0.046318188309669495, "learning_rate": 0.01, "loss": 2.0132, "step": 29760 }, { "epoch": 3.0582614056720097, "grad_norm": 0.05954563990235329, "learning_rate": 0.01, "loss": 2.01, "step": 29763 }, { "epoch": 3.058569667077682, "grad_norm": 0.03786987066268921, "learning_rate": 0.01, "loss": 1.9981, "step": 29766 }, { "epoch": 3.058877928483354, "grad_norm": 0.04249919578433037, "learning_rate": 0.01, "loss": 2.0168, "step": 29769 }, { "epoch": 3.059186189889026, "grad_norm": 0.09349595755338669, "learning_rate": 0.01, "loss": 2.0365, "step": 29772 }, { "epoch": 3.059494451294698, "grad_norm": 0.05637031048536301, "learning_rate": 0.01, "loss": 1.9966, "step": 29775 }, { "epoch": 3.0598027127003697, "grad_norm": 0.09952693432569504, "learning_rate": 0.01, "loss": 2.0295, "step": 29778 }, { "epoch": 3.060110974106042, "grad_norm": 0.10634247213602066, "learning_rate": 0.01, "loss": 2.0425, "step": 29781 }, { "epoch": 3.060419235511714, "grad_norm": 0.0774141326546669, "learning_rate": 0.01, "loss": 2.0222, "step": 29784 }, { "epoch": 3.060727496917386, "grad_norm": 0.04012331739068031, "learning_rate": 0.01, "loss": 2.021, "step": 29787 }, { "epoch": 3.061035758323058, "grad_norm": 0.04289887100458145, "learning_rate": 0.01, "loss": 2.0115, "step": 29790 }, { "epoch": 3.06134401972873, "grad_norm": 0.03678658604621887, "learning_rate": 0.01, "loss": 2.0269, "step": 29793 }, { "epoch": 3.061652281134402, "grad_norm": 0.034676194190979004, "learning_rate": 0.01, "loss": 2.0426, "step": 29796 }, { "epoch": 3.0619605425400738, "grad_norm": 0.08951381593942642, "learning_rate": 0.01, "loss": 2.001, "step": 29799 }, { "epoch": 3.062268803945746, "grad_norm": 0.053729988634586334, "learning_rate": 0.01, "loss": 2.0108, "step": 29802 }, { "epoch": 3.062577065351418, "grad_norm": 0.08025490492582321, "learning_rate": 0.01, "loss": 2.0091, "step": 29805 }, { "epoch": 3.06288532675709, "grad_norm": 0.11099457740783691, "learning_rate": 0.01, "loss": 2.0134, "step": 29808 }, { "epoch": 3.063193588162762, "grad_norm": 0.13354651629924774, "learning_rate": 0.01, "loss": 2.0417, "step": 29811 }, { "epoch": 3.063501849568434, "grad_norm": 0.07378706336021423, "learning_rate": 0.01, "loss": 2.0169, "step": 29814 }, { "epoch": 3.063810110974106, "grad_norm": 0.04699387028813362, "learning_rate": 0.01, "loss": 2.0211, "step": 29817 }, { "epoch": 3.0641183723797782, "grad_norm": 0.08287378400564194, "learning_rate": 0.01, "loss": 2.0414, "step": 29820 }, { "epoch": 3.06442663378545, "grad_norm": 0.04004284739494324, "learning_rate": 0.01, "loss": 2.0275, "step": 29823 }, { "epoch": 3.064734895191122, "grad_norm": 0.05801365152001381, "learning_rate": 0.01, "loss": 2.0315, "step": 29826 }, { "epoch": 3.065043156596794, "grad_norm": 0.05196039378643036, "learning_rate": 0.01, "loss": 2.0193, "step": 29829 }, { "epoch": 3.065351418002466, "grad_norm": 0.04543706774711609, "learning_rate": 0.01, "loss": 2.0032, "step": 29832 }, { "epoch": 3.065659679408138, "grad_norm": 0.05413155257701874, "learning_rate": 0.01, "loss": 2.0127, "step": 29835 }, { "epoch": 3.06596794081381, "grad_norm": 0.05158795788884163, "learning_rate": 0.01, "loss": 2.0193, "step": 29838 }, { "epoch": 3.0662762022194823, "grad_norm": 0.05547649413347244, "learning_rate": 0.01, "loss": 2.0217, "step": 29841 }, { "epoch": 3.066584463625154, "grad_norm": 0.04968711733818054, "learning_rate": 0.01, "loss": 2.0332, "step": 29844 }, { "epoch": 3.066892725030826, "grad_norm": 0.05625564232468605, "learning_rate": 0.01, "loss": 2.0115, "step": 29847 }, { "epoch": 3.067200986436498, "grad_norm": 0.05007552728056908, "learning_rate": 0.01, "loss": 2.022, "step": 29850 }, { "epoch": 3.06750924784217, "grad_norm": 0.08748306334018707, "learning_rate": 0.01, "loss": 2.0372, "step": 29853 }, { "epoch": 3.0678175092478424, "grad_norm": 0.08939902484416962, "learning_rate": 0.01, "loss": 2.0279, "step": 29856 }, { "epoch": 3.068125770653514, "grad_norm": 0.04412224516272545, "learning_rate": 0.01, "loss": 2.0039, "step": 29859 }, { "epoch": 3.0684340320591863, "grad_norm": 0.04574199765920639, "learning_rate": 0.01, "loss": 2.0204, "step": 29862 }, { "epoch": 3.068742293464858, "grad_norm": 0.059090469032526016, "learning_rate": 0.01, "loss": 2.0097, "step": 29865 }, { "epoch": 3.0690505548705302, "grad_norm": 0.048365235328674316, "learning_rate": 0.01, "loss": 1.988, "step": 29868 }, { "epoch": 3.0693588162762024, "grad_norm": 0.05044705048203468, "learning_rate": 0.01, "loss": 2.0242, "step": 29871 }, { "epoch": 3.069667077681874, "grad_norm": 0.06108652427792549, "learning_rate": 0.01, "loss": 2.0223, "step": 29874 }, { "epoch": 3.0699753390875464, "grad_norm": 0.04400194063782692, "learning_rate": 0.01, "loss": 2.0188, "step": 29877 }, { "epoch": 3.070283600493218, "grad_norm": 0.047024693340063095, "learning_rate": 0.01, "loss": 2.0298, "step": 29880 }, { "epoch": 3.0705918618988903, "grad_norm": 0.06958126276731491, "learning_rate": 0.01, "loss": 2.0122, "step": 29883 }, { "epoch": 3.070900123304562, "grad_norm": 0.07789502292871475, "learning_rate": 0.01, "loss": 2.0022, "step": 29886 }, { "epoch": 3.0712083847102343, "grad_norm": 0.06438666582107544, "learning_rate": 0.01, "loss": 2.035, "step": 29889 }, { "epoch": 3.0715166461159065, "grad_norm": 0.1446814239025116, "learning_rate": 0.01, "loss": 1.9922, "step": 29892 }, { "epoch": 3.071824907521578, "grad_norm": 0.055988240987062454, "learning_rate": 0.01, "loss": 2.0204, "step": 29895 }, { "epoch": 3.0721331689272504, "grad_norm": 0.04055717960000038, "learning_rate": 0.01, "loss": 2.025, "step": 29898 }, { "epoch": 3.072441430332922, "grad_norm": 0.11918565630912781, "learning_rate": 0.01, "loss": 2.0436, "step": 29901 }, { "epoch": 3.0727496917385944, "grad_norm": 0.04726850986480713, "learning_rate": 0.01, "loss": 2.0313, "step": 29904 }, { "epoch": 3.0730579531442666, "grad_norm": 0.0526423342525959, "learning_rate": 0.01, "loss": 2.0541, "step": 29907 }, { "epoch": 3.0733662145499383, "grad_norm": 0.048728957772254944, "learning_rate": 0.01, "loss": 2.0134, "step": 29910 }, { "epoch": 3.0736744759556105, "grad_norm": 0.036348532885313034, "learning_rate": 0.01, "loss": 1.9963, "step": 29913 }, { "epoch": 3.0739827373612822, "grad_norm": 0.08039457350969315, "learning_rate": 0.01, "loss": 1.9541, "step": 29916 }, { "epoch": 3.0742909987669544, "grad_norm": 0.05370686575770378, "learning_rate": 0.01, "loss": 2.0306, "step": 29919 }, { "epoch": 3.074599260172626, "grad_norm": 0.08430914580821991, "learning_rate": 0.01, "loss": 2.0159, "step": 29922 }, { "epoch": 3.0749075215782984, "grad_norm": 0.04675029218196869, "learning_rate": 0.01, "loss": 1.9867, "step": 29925 }, { "epoch": 3.0752157829839706, "grad_norm": 0.0455501526594162, "learning_rate": 0.01, "loss": 2.0117, "step": 29928 }, { "epoch": 3.0755240443896423, "grad_norm": 0.04330252856016159, "learning_rate": 0.01, "loss": 2.0166, "step": 29931 }, { "epoch": 3.0758323057953145, "grad_norm": 0.04302576184272766, "learning_rate": 0.01, "loss": 2.0252, "step": 29934 }, { "epoch": 3.0761405672009863, "grad_norm": 0.07246873527765274, "learning_rate": 0.01, "loss": 2.0118, "step": 29937 }, { "epoch": 3.0764488286066585, "grad_norm": 0.12140212953090668, "learning_rate": 0.01, "loss": 2.0384, "step": 29940 }, { "epoch": 3.0767570900123307, "grad_norm": 0.050940465182065964, "learning_rate": 0.01, "loss": 2.0315, "step": 29943 }, { "epoch": 3.0770653514180024, "grad_norm": 0.08281772583723068, "learning_rate": 0.01, "loss": 2.0209, "step": 29946 }, { "epoch": 3.0773736128236746, "grad_norm": 0.05485936999320984, "learning_rate": 0.01, "loss": 2.0086, "step": 29949 }, { "epoch": 3.0776818742293464, "grad_norm": 0.0354488380253315, "learning_rate": 0.01, "loss": 2.0127, "step": 29952 }, { "epoch": 3.0779901356350186, "grad_norm": 0.04454338923096657, "learning_rate": 0.01, "loss": 2.0225, "step": 29955 }, { "epoch": 3.0782983970406903, "grad_norm": 0.07980217784643173, "learning_rate": 0.01, "loss": 2.0178, "step": 29958 }, { "epoch": 3.0786066584463625, "grad_norm": 0.07601354271173477, "learning_rate": 0.01, "loss": 2.0062, "step": 29961 }, { "epoch": 3.0789149198520347, "grad_norm": 0.036425743252038956, "learning_rate": 0.01, "loss": 2.0094, "step": 29964 }, { "epoch": 3.0792231812577064, "grad_norm": 0.05909854918718338, "learning_rate": 0.01, "loss": 2.0291, "step": 29967 }, { "epoch": 3.0795314426633786, "grad_norm": 0.038156334310770035, "learning_rate": 0.01, "loss": 2.0146, "step": 29970 }, { "epoch": 3.0798397040690504, "grad_norm": 0.03241376578807831, "learning_rate": 0.01, "loss": 2.0132, "step": 29973 }, { "epoch": 3.0801479654747226, "grad_norm": 0.039934635162353516, "learning_rate": 0.01, "loss": 2.0315, "step": 29976 }, { "epoch": 3.0804562268803948, "grad_norm": 0.05287212133407593, "learning_rate": 0.01, "loss": 2.0126, "step": 29979 }, { "epoch": 3.0807644882860665, "grad_norm": 0.0807357057929039, "learning_rate": 0.01, "loss": 2.0162, "step": 29982 }, { "epoch": 3.0810727496917387, "grad_norm": 0.07264538109302521, "learning_rate": 0.01, "loss": 2.0103, "step": 29985 }, { "epoch": 3.0813810110974105, "grad_norm": 0.06328427046537399, "learning_rate": 0.01, "loss": 2.0377, "step": 29988 }, { "epoch": 3.0816892725030827, "grad_norm": 0.05093936249613762, "learning_rate": 0.01, "loss": 2.0318, "step": 29991 }, { "epoch": 3.0819975339087544, "grad_norm": 0.035449884831905365, "learning_rate": 0.01, "loss": 2.0076, "step": 29994 }, { "epoch": 3.0823057953144266, "grad_norm": 0.13781675696372986, "learning_rate": 0.01, "loss": 2.0333, "step": 29997 }, { "epoch": 3.082614056720099, "grad_norm": 0.06310711055994034, "learning_rate": 0.01, "loss": 1.9998, "step": 30000 }, { "epoch": 3.0829223181257706, "grad_norm": 0.059962496161460876, "learning_rate": 0.01, "loss": 2.0033, "step": 30003 }, { "epoch": 3.0832305795314427, "grad_norm": 0.07414010167121887, "learning_rate": 0.01, "loss": 2.0017, "step": 30006 }, { "epoch": 3.0835388409371145, "grad_norm": 0.05147621035575867, "learning_rate": 0.01, "loss": 2.0253, "step": 30009 }, { "epoch": 3.0838471023427867, "grad_norm": 0.08669129014015198, "learning_rate": 0.01, "loss": 2.029, "step": 30012 }, { "epoch": 3.084155363748459, "grad_norm": 0.03791889548301697, "learning_rate": 0.01, "loss": 1.997, "step": 30015 }, { "epoch": 3.0844636251541306, "grad_norm": 0.08246924728155136, "learning_rate": 0.01, "loss": 1.9742, "step": 30018 }, { "epoch": 3.084771886559803, "grad_norm": 0.11454186588525772, "learning_rate": 0.01, "loss": 2.0275, "step": 30021 }, { "epoch": 3.0850801479654746, "grad_norm": 0.07639762759208679, "learning_rate": 0.01, "loss": 2.033, "step": 30024 }, { "epoch": 3.085388409371147, "grad_norm": 0.05283968895673752, "learning_rate": 0.01, "loss": 2.0121, "step": 30027 }, { "epoch": 3.0856966707768185, "grad_norm": 0.041770000010728836, "learning_rate": 0.01, "loss": 2.0196, "step": 30030 }, { "epoch": 3.0860049321824907, "grad_norm": 0.034824103116989136, "learning_rate": 0.01, "loss": 2.0161, "step": 30033 }, { "epoch": 3.086313193588163, "grad_norm": 0.043972861021757126, "learning_rate": 0.01, "loss": 2.0217, "step": 30036 }, { "epoch": 3.0866214549938347, "grad_norm": 0.03452165424823761, "learning_rate": 0.01, "loss": 2.0153, "step": 30039 }, { "epoch": 3.086929716399507, "grad_norm": 0.07175113260746002, "learning_rate": 0.01, "loss": 2.0091, "step": 30042 }, { "epoch": 3.0872379778051786, "grad_norm": 0.0780046209692955, "learning_rate": 0.01, "loss": 2.0127, "step": 30045 }, { "epoch": 3.087546239210851, "grad_norm": 0.06838949769735336, "learning_rate": 0.01, "loss": 2.0108, "step": 30048 }, { "epoch": 3.087854500616523, "grad_norm": 0.114894799888134, "learning_rate": 0.01, "loss": 2.0199, "step": 30051 }, { "epoch": 3.0881627620221948, "grad_norm": 0.10075607150793076, "learning_rate": 0.01, "loss": 2.025, "step": 30054 }, { "epoch": 3.088471023427867, "grad_norm": 0.04926849529147148, "learning_rate": 0.01, "loss": 2.0171, "step": 30057 }, { "epoch": 3.0887792848335387, "grad_norm": 0.04143848270177841, "learning_rate": 0.01, "loss": 2.0275, "step": 30060 }, { "epoch": 3.089087546239211, "grad_norm": 0.04664148762822151, "learning_rate": 0.01, "loss": 2.0227, "step": 30063 }, { "epoch": 3.089395807644883, "grad_norm": 0.04764292761683464, "learning_rate": 0.01, "loss": 2.0408, "step": 30066 }, { "epoch": 3.089704069050555, "grad_norm": 0.06317167729139328, "learning_rate": 0.01, "loss": 2.0177, "step": 30069 }, { "epoch": 3.090012330456227, "grad_norm": 0.07983100414276123, "learning_rate": 0.01, "loss": 2.0285, "step": 30072 }, { "epoch": 3.090320591861899, "grad_norm": 0.03481268137693405, "learning_rate": 0.01, "loss": 2.0441, "step": 30075 }, { "epoch": 3.090628853267571, "grad_norm": 0.042781632393598557, "learning_rate": 0.01, "loss": 2.0278, "step": 30078 }, { "epoch": 3.0909371146732427, "grad_norm": 0.05203581228852272, "learning_rate": 0.01, "loss": 2.0073, "step": 30081 }, { "epoch": 3.091245376078915, "grad_norm": 0.07452182471752167, "learning_rate": 0.01, "loss": 2.031, "step": 30084 }, { "epoch": 3.091553637484587, "grad_norm": 0.05319884046912193, "learning_rate": 0.01, "loss": 2.02, "step": 30087 }, { "epoch": 3.091861898890259, "grad_norm": 0.11648225039243698, "learning_rate": 0.01, "loss": 1.9881, "step": 30090 }, { "epoch": 3.092170160295931, "grad_norm": 0.07870937138795853, "learning_rate": 0.01, "loss": 2.0367, "step": 30093 }, { "epoch": 3.092478421701603, "grad_norm": 0.04574506729841232, "learning_rate": 0.01, "loss": 2.027, "step": 30096 }, { "epoch": 3.092786683107275, "grad_norm": 0.07913927733898163, "learning_rate": 0.01, "loss": 2.0318, "step": 30099 }, { "epoch": 3.0930949445129468, "grad_norm": 0.08093220740556717, "learning_rate": 0.01, "loss": 2.0226, "step": 30102 }, { "epoch": 3.093403205918619, "grad_norm": 0.07089852541685104, "learning_rate": 0.01, "loss": 2.0292, "step": 30105 }, { "epoch": 3.093711467324291, "grad_norm": 0.06293002516031265, "learning_rate": 0.01, "loss": 1.9972, "step": 30108 }, { "epoch": 3.094019728729963, "grad_norm": 0.057953476905822754, "learning_rate": 0.01, "loss": 2.027, "step": 30111 }, { "epoch": 3.094327990135635, "grad_norm": 0.06989496946334839, "learning_rate": 0.01, "loss": 2.0318, "step": 30114 }, { "epoch": 3.094636251541307, "grad_norm": 0.04050515964627266, "learning_rate": 0.01, "loss": 2.0071, "step": 30117 }, { "epoch": 3.094944512946979, "grad_norm": 0.1112913116812706, "learning_rate": 0.01, "loss": 2.0082, "step": 30120 }, { "epoch": 3.0952527743526512, "grad_norm": 0.05135345458984375, "learning_rate": 0.01, "loss": 2.044, "step": 30123 }, { "epoch": 3.095561035758323, "grad_norm": 0.1411667764186859, "learning_rate": 0.01, "loss": 2.0096, "step": 30126 }, { "epoch": 3.095869297163995, "grad_norm": 0.08077210187911987, "learning_rate": 0.01, "loss": 2.0251, "step": 30129 }, { "epoch": 3.096177558569667, "grad_norm": 0.10934364050626755, "learning_rate": 0.01, "loss": 2.0169, "step": 30132 }, { "epoch": 3.096485819975339, "grad_norm": 0.07186675071716309, "learning_rate": 0.01, "loss": 2.0123, "step": 30135 }, { "epoch": 3.0967940813810113, "grad_norm": 0.11918327957391739, "learning_rate": 0.01, "loss": 2.0391, "step": 30138 }, { "epoch": 3.097102342786683, "grad_norm": 0.052199963480234146, "learning_rate": 0.01, "loss": 2.011, "step": 30141 }, { "epoch": 3.0974106041923553, "grad_norm": 0.06796009093523026, "learning_rate": 0.01, "loss": 2.0399, "step": 30144 }, { "epoch": 3.097718865598027, "grad_norm": 0.055949617177248, "learning_rate": 0.01, "loss": 2.0243, "step": 30147 }, { "epoch": 3.098027127003699, "grad_norm": 0.06256923824548721, "learning_rate": 0.01, "loss": 2.0299, "step": 30150 }, { "epoch": 3.098335388409371, "grad_norm": 0.04565596580505371, "learning_rate": 0.01, "loss": 2.0084, "step": 30153 }, { "epoch": 3.098643649815043, "grad_norm": 0.051202937960624695, "learning_rate": 0.01, "loss": 2.0276, "step": 30156 }, { "epoch": 3.0989519112207153, "grad_norm": 0.09557224065065384, "learning_rate": 0.01, "loss": 2.0133, "step": 30159 }, { "epoch": 3.099260172626387, "grad_norm": 0.07491281628608704, "learning_rate": 0.01, "loss": 2.0222, "step": 30162 }, { "epoch": 3.0995684340320593, "grad_norm": 0.06766588240861893, "learning_rate": 0.01, "loss": 2.0165, "step": 30165 }, { "epoch": 3.099876695437731, "grad_norm": 0.036496859043836594, "learning_rate": 0.01, "loss": 2.0135, "step": 30168 }, { "epoch": 3.1001849568434032, "grad_norm": 0.09757262468338013, "learning_rate": 0.01, "loss": 2.0397, "step": 30171 }, { "epoch": 3.1004932182490754, "grad_norm": 0.046625055372714996, "learning_rate": 0.01, "loss": 2.0149, "step": 30174 }, { "epoch": 3.100801479654747, "grad_norm": 0.07655061781406403, "learning_rate": 0.01, "loss": 2.0054, "step": 30177 }, { "epoch": 3.1011097410604194, "grad_norm": 0.04577295854687691, "learning_rate": 0.01, "loss": 2.0192, "step": 30180 }, { "epoch": 3.101418002466091, "grad_norm": 0.04528047516942024, "learning_rate": 0.01, "loss": 2.0181, "step": 30183 }, { "epoch": 3.1017262638717633, "grad_norm": 0.04630058631300926, "learning_rate": 0.01, "loss": 2.0127, "step": 30186 }, { "epoch": 3.102034525277435, "grad_norm": 0.03936396911740303, "learning_rate": 0.01, "loss": 2.0093, "step": 30189 }, { "epoch": 3.1023427866831073, "grad_norm": 0.05879136547446251, "learning_rate": 0.01, "loss": 2.0169, "step": 30192 }, { "epoch": 3.1026510480887795, "grad_norm": 0.061699915677309036, "learning_rate": 0.01, "loss": 2.0418, "step": 30195 }, { "epoch": 3.102959309494451, "grad_norm": 0.05961352214217186, "learning_rate": 0.01, "loss": 1.9985, "step": 30198 }, { "epoch": 3.1032675709001234, "grad_norm": 0.10209197551012039, "learning_rate": 0.01, "loss": 2.0048, "step": 30201 }, { "epoch": 3.103575832305795, "grad_norm": 0.10693737864494324, "learning_rate": 0.01, "loss": 2.0321, "step": 30204 }, { "epoch": 3.1038840937114673, "grad_norm": 0.11183932423591614, "learning_rate": 0.01, "loss": 2.0269, "step": 30207 }, { "epoch": 3.1041923551171395, "grad_norm": 0.07532890886068344, "learning_rate": 0.01, "loss": 2.0284, "step": 30210 }, { "epoch": 3.1045006165228113, "grad_norm": 0.04888713359832764, "learning_rate": 0.01, "loss": 1.9731, "step": 30213 }, { "epoch": 3.1048088779284835, "grad_norm": 0.0533674955368042, "learning_rate": 0.01, "loss": 2.0158, "step": 30216 }, { "epoch": 3.1051171393341552, "grad_norm": 0.044245727360248566, "learning_rate": 0.01, "loss": 2.0006, "step": 30219 }, { "epoch": 3.1054254007398274, "grad_norm": 0.032582368701696396, "learning_rate": 0.01, "loss": 2.0314, "step": 30222 }, { "epoch": 3.105733662145499, "grad_norm": 0.10102632641792297, "learning_rate": 0.01, "loss": 2.0094, "step": 30225 }, { "epoch": 3.1060419235511714, "grad_norm": 0.03793931379914284, "learning_rate": 0.01, "loss": 2.0356, "step": 30228 }, { "epoch": 3.1063501849568436, "grad_norm": 0.04896242544054985, "learning_rate": 0.01, "loss": 2.0167, "step": 30231 }, { "epoch": 3.1066584463625153, "grad_norm": 0.057475607842206955, "learning_rate": 0.01, "loss": 1.9813, "step": 30234 }, { "epoch": 3.1069667077681875, "grad_norm": 0.037703339010477066, "learning_rate": 0.01, "loss": 2.0002, "step": 30237 }, { "epoch": 3.1072749691738593, "grad_norm": 0.04800937697291374, "learning_rate": 0.01, "loss": 2.043, "step": 30240 }, { "epoch": 3.1075832305795315, "grad_norm": 0.042761534452438354, "learning_rate": 0.01, "loss": 2.0208, "step": 30243 }, { "epoch": 3.1078914919852036, "grad_norm": 0.0864332839846611, "learning_rate": 0.01, "loss": 2.0229, "step": 30246 }, { "epoch": 3.1081997533908754, "grad_norm": 0.10482830554246902, "learning_rate": 0.01, "loss": 2.0601, "step": 30249 }, { "epoch": 3.1085080147965476, "grad_norm": 0.10628984868526459, "learning_rate": 0.01, "loss": 2.0078, "step": 30252 }, { "epoch": 3.1088162762022193, "grad_norm": 0.04853438585996628, "learning_rate": 0.01, "loss": 1.9999, "step": 30255 }, { "epoch": 3.1091245376078915, "grad_norm": 0.05274122208356857, "learning_rate": 0.01, "loss": 1.9926, "step": 30258 }, { "epoch": 3.1094327990135637, "grad_norm": 0.03757349029183388, "learning_rate": 0.01, "loss": 2.003, "step": 30261 }, { "epoch": 3.1097410604192355, "grad_norm": 0.04505928233265877, "learning_rate": 0.01, "loss": 2.0126, "step": 30264 }, { "epoch": 3.1100493218249077, "grad_norm": 0.08804783225059509, "learning_rate": 0.01, "loss": 2.0285, "step": 30267 }, { "epoch": 3.1103575832305794, "grad_norm": 0.11545568704605103, "learning_rate": 0.01, "loss": 2.0105, "step": 30270 }, { "epoch": 3.1106658446362516, "grad_norm": 0.04955466091632843, "learning_rate": 0.01, "loss": 2.048, "step": 30273 }, { "epoch": 3.1109741060419234, "grad_norm": 0.04930401220917702, "learning_rate": 0.01, "loss": 2.0223, "step": 30276 }, { "epoch": 3.1112823674475956, "grad_norm": 0.05701540783047676, "learning_rate": 0.01, "loss": 2.046, "step": 30279 }, { "epoch": 3.1115906288532678, "grad_norm": 0.07442028075456619, "learning_rate": 0.01, "loss": 2.0085, "step": 30282 }, { "epoch": 3.1118988902589395, "grad_norm": 0.06538541615009308, "learning_rate": 0.01, "loss": 2.0194, "step": 30285 }, { "epoch": 3.1122071516646117, "grad_norm": 0.0826522633433342, "learning_rate": 0.01, "loss": 2.0267, "step": 30288 }, { "epoch": 3.1125154130702835, "grad_norm": 0.1353752762079239, "learning_rate": 0.01, "loss": 2.0089, "step": 30291 }, { "epoch": 3.1128236744759556, "grad_norm": 0.06406208872795105, "learning_rate": 0.01, "loss": 2.0174, "step": 30294 }, { "epoch": 3.1131319358816274, "grad_norm": 0.036653418093919754, "learning_rate": 0.01, "loss": 1.9951, "step": 30297 }, { "epoch": 3.1134401972872996, "grad_norm": 0.0839834213256836, "learning_rate": 0.01, "loss": 2.0213, "step": 30300 }, { "epoch": 3.113748458692972, "grad_norm": 0.04629015177488327, "learning_rate": 0.01, "loss": 2.0488, "step": 30303 }, { "epoch": 3.1140567200986435, "grad_norm": 0.08630986511707306, "learning_rate": 0.01, "loss": 2.0271, "step": 30306 }, { "epoch": 3.1143649815043157, "grad_norm": 0.07894504070281982, "learning_rate": 0.01, "loss": 2.0128, "step": 30309 }, { "epoch": 3.1146732429099875, "grad_norm": 0.09203556925058365, "learning_rate": 0.01, "loss": 2.0094, "step": 30312 }, { "epoch": 3.1149815043156597, "grad_norm": 0.061055563390254974, "learning_rate": 0.01, "loss": 2.0199, "step": 30315 }, { "epoch": 3.115289765721332, "grad_norm": 0.0711871087551117, "learning_rate": 0.01, "loss": 2.0091, "step": 30318 }, { "epoch": 3.1155980271270036, "grad_norm": 0.07315775007009506, "learning_rate": 0.01, "loss": 2.0209, "step": 30321 }, { "epoch": 3.115906288532676, "grad_norm": 0.1050182655453682, "learning_rate": 0.01, "loss": 2.0086, "step": 30324 }, { "epoch": 3.1162145499383476, "grad_norm": 0.04959526285529137, "learning_rate": 0.01, "loss": 1.9827, "step": 30327 }, { "epoch": 3.1165228113440198, "grad_norm": 0.061066534370183945, "learning_rate": 0.01, "loss": 2.0258, "step": 30330 }, { "epoch": 3.116831072749692, "grad_norm": 0.057984329760074615, "learning_rate": 0.01, "loss": 2.0119, "step": 30333 }, { "epoch": 3.1171393341553637, "grad_norm": 0.07337040454149246, "learning_rate": 0.01, "loss": 2.0069, "step": 30336 }, { "epoch": 3.117447595561036, "grad_norm": 0.07801029831171036, "learning_rate": 0.01, "loss": 2.0172, "step": 30339 }, { "epoch": 3.1177558569667077, "grad_norm": 0.06625111401081085, "learning_rate": 0.01, "loss": 2.022, "step": 30342 }, { "epoch": 3.11806411837238, "grad_norm": 0.11416434496641159, "learning_rate": 0.01, "loss": 2.0304, "step": 30345 }, { "epoch": 3.1183723797780516, "grad_norm": 0.05046262592077255, "learning_rate": 0.01, "loss": 2.0064, "step": 30348 }, { "epoch": 3.118680641183724, "grad_norm": 0.030621282756328583, "learning_rate": 0.01, "loss": 2.0085, "step": 30351 }, { "epoch": 3.118988902589396, "grad_norm": 0.03274908661842346, "learning_rate": 0.01, "loss": 2.0164, "step": 30354 }, { "epoch": 3.1192971639950677, "grad_norm": 0.03673490136861801, "learning_rate": 0.01, "loss": 2.0383, "step": 30357 }, { "epoch": 3.11960542540074, "grad_norm": 0.07161369919776917, "learning_rate": 0.01, "loss": 2.0122, "step": 30360 }, { "epoch": 3.1199136868064117, "grad_norm": 0.04244636744260788, "learning_rate": 0.01, "loss": 1.9901, "step": 30363 }, { "epoch": 3.120221948212084, "grad_norm": 0.05357150360941887, "learning_rate": 0.01, "loss": 1.9994, "step": 30366 }, { "epoch": 3.120530209617756, "grad_norm": 0.06689538061618805, "learning_rate": 0.01, "loss": 2.008, "step": 30369 }, { "epoch": 3.120838471023428, "grad_norm": 0.08160898089408875, "learning_rate": 0.01, "loss": 2.0189, "step": 30372 }, { "epoch": 3.1211467324291, "grad_norm": 0.04037999361753464, "learning_rate": 0.01, "loss": 1.993, "step": 30375 }, { "epoch": 3.1214549938347718, "grad_norm": 0.09855981171131134, "learning_rate": 0.01, "loss": 2.0157, "step": 30378 }, { "epoch": 3.121763255240444, "grad_norm": 0.04645252600312233, "learning_rate": 0.01, "loss": 2.0211, "step": 30381 }, { "epoch": 3.1220715166461157, "grad_norm": 0.10605467110872269, "learning_rate": 0.01, "loss": 2.015, "step": 30384 }, { "epoch": 3.122379778051788, "grad_norm": 0.11321547627449036, "learning_rate": 0.01, "loss": 2.0193, "step": 30387 }, { "epoch": 3.12268803945746, "grad_norm": 0.07283324748277664, "learning_rate": 0.01, "loss": 2.0327, "step": 30390 }, { "epoch": 3.122996300863132, "grad_norm": 0.07217562943696976, "learning_rate": 0.01, "loss": 2.0079, "step": 30393 }, { "epoch": 3.123304562268804, "grad_norm": 0.036078888922929764, "learning_rate": 0.01, "loss": 2.0211, "step": 30396 }, { "epoch": 3.123612823674476, "grad_norm": 0.03730477765202522, "learning_rate": 0.01, "loss": 2.0077, "step": 30399 }, { "epoch": 3.123921085080148, "grad_norm": 0.04707048460841179, "learning_rate": 0.01, "loss": 2.0204, "step": 30402 }, { "epoch": 3.12422934648582, "grad_norm": 0.0571308508515358, "learning_rate": 0.01, "loss": 2.0054, "step": 30405 }, { "epoch": 3.124537607891492, "grad_norm": 0.05167640373110771, "learning_rate": 0.01, "loss": 2.0134, "step": 30408 }, { "epoch": 3.124845869297164, "grad_norm": 0.06759858876466751, "learning_rate": 0.01, "loss": 2.0286, "step": 30411 }, { "epoch": 3.125154130702836, "grad_norm": 0.031637318432331085, "learning_rate": 0.01, "loss": 1.9848, "step": 30414 }, { "epoch": 3.125462392108508, "grad_norm": 0.0991082713007927, "learning_rate": 0.01, "loss": 2.0277, "step": 30417 }, { "epoch": 3.12577065351418, "grad_norm": 0.06524378806352615, "learning_rate": 0.01, "loss": 1.9995, "step": 30420 }, { "epoch": 3.126078914919852, "grad_norm": 0.05751586705446243, "learning_rate": 0.01, "loss": 2.012, "step": 30423 }, { "epoch": 3.126387176325524, "grad_norm": 0.03776116296648979, "learning_rate": 0.01, "loss": 2.0291, "step": 30426 }, { "epoch": 3.126695437731196, "grad_norm": 0.0619078204035759, "learning_rate": 0.01, "loss": 1.9884, "step": 30429 }, { "epoch": 3.127003699136868, "grad_norm": 0.0511760450899601, "learning_rate": 0.01, "loss": 2.0142, "step": 30432 }, { "epoch": 3.12731196054254, "grad_norm": 0.053964611142873764, "learning_rate": 0.01, "loss": 2.0361, "step": 30435 }, { "epoch": 3.127620221948212, "grad_norm": 0.11773833632469177, "learning_rate": 0.01, "loss": 2.0298, "step": 30438 }, { "epoch": 3.1279284833538843, "grad_norm": 0.04505350813269615, "learning_rate": 0.01, "loss": 2.0162, "step": 30441 }, { "epoch": 3.128236744759556, "grad_norm": 0.10077274590730667, "learning_rate": 0.01, "loss": 2.0101, "step": 30444 }, { "epoch": 3.1285450061652282, "grad_norm": 0.05291616916656494, "learning_rate": 0.01, "loss": 2.0148, "step": 30447 }, { "epoch": 3.1288532675709, "grad_norm": 0.049340371042490005, "learning_rate": 0.01, "loss": 2.0101, "step": 30450 }, { "epoch": 3.129161528976572, "grad_norm": 0.06910324841737747, "learning_rate": 0.01, "loss": 2.008, "step": 30453 }, { "epoch": 3.1294697903822444, "grad_norm": 0.07307056337594986, "learning_rate": 0.01, "loss": 1.9933, "step": 30456 }, { "epoch": 3.129778051787916, "grad_norm": 0.06828980147838593, "learning_rate": 0.01, "loss": 2.0343, "step": 30459 }, { "epoch": 3.1300863131935883, "grad_norm": 0.0480424240231514, "learning_rate": 0.01, "loss": 2.0207, "step": 30462 }, { "epoch": 3.13039457459926, "grad_norm": 0.10072044283151627, "learning_rate": 0.01, "loss": 2.0244, "step": 30465 }, { "epoch": 3.1307028360049323, "grad_norm": 0.04987982660531998, "learning_rate": 0.01, "loss": 2.0056, "step": 30468 }, { "epoch": 3.131011097410604, "grad_norm": 0.10036738961935043, "learning_rate": 0.01, "loss": 2.0069, "step": 30471 }, { "epoch": 3.131319358816276, "grad_norm": 0.06445086747407913, "learning_rate": 0.01, "loss": 2.0196, "step": 30474 }, { "epoch": 3.1316276202219484, "grad_norm": 0.10858535021543503, "learning_rate": 0.01, "loss": 2.0159, "step": 30477 }, { "epoch": 3.13193588162762, "grad_norm": 0.041025932878255844, "learning_rate": 0.01, "loss": 2.0156, "step": 30480 }, { "epoch": 3.1322441430332923, "grad_norm": 0.05287109315395355, "learning_rate": 0.01, "loss": 2.0143, "step": 30483 }, { "epoch": 3.132552404438964, "grad_norm": 0.09659206867218018, "learning_rate": 0.01, "loss": 1.9991, "step": 30486 }, { "epoch": 3.1328606658446363, "grad_norm": 0.07235360145568848, "learning_rate": 0.01, "loss": 2.0521, "step": 30489 }, { "epoch": 3.133168927250308, "grad_norm": 0.042338863015174866, "learning_rate": 0.01, "loss": 2.0403, "step": 30492 }, { "epoch": 3.1334771886559802, "grad_norm": 0.045676395297050476, "learning_rate": 0.01, "loss": 1.9921, "step": 30495 }, { "epoch": 3.1337854500616524, "grad_norm": 0.04957246780395508, "learning_rate": 0.01, "loss": 2.0095, "step": 30498 }, { "epoch": 3.134093711467324, "grad_norm": 0.047321684658527374, "learning_rate": 0.01, "loss": 2.025, "step": 30501 }, { "epoch": 3.1344019728729964, "grad_norm": 0.09219278395175934, "learning_rate": 0.01, "loss": 2.0326, "step": 30504 }, { "epoch": 3.134710234278668, "grad_norm": 0.07666533440351486, "learning_rate": 0.01, "loss": 2.0132, "step": 30507 }, { "epoch": 3.1350184956843403, "grad_norm": 0.06832748651504517, "learning_rate": 0.01, "loss": 2.0074, "step": 30510 }, { "epoch": 3.1353267570900125, "grad_norm": 0.05527606979012489, "learning_rate": 0.01, "loss": 1.9941, "step": 30513 }, { "epoch": 3.1356350184956843, "grad_norm": 0.09272732585668564, "learning_rate": 0.01, "loss": 1.9942, "step": 30516 }, { "epoch": 3.1359432799013565, "grad_norm": 0.02908925898373127, "learning_rate": 0.01, "loss": 2.0173, "step": 30519 }, { "epoch": 3.136251541307028, "grad_norm": 0.04611453413963318, "learning_rate": 0.01, "loss": 2.0332, "step": 30522 }, { "epoch": 3.1365598027127004, "grad_norm": 0.05230382829904556, "learning_rate": 0.01, "loss": 2.0226, "step": 30525 }, { "epoch": 3.1368680641183726, "grad_norm": 0.09377764165401459, "learning_rate": 0.01, "loss": 2.0395, "step": 30528 }, { "epoch": 3.1371763255240444, "grad_norm": 0.049708276987075806, "learning_rate": 0.01, "loss": 1.996, "step": 30531 }, { "epoch": 3.1374845869297165, "grad_norm": 0.05086242035031319, "learning_rate": 0.01, "loss": 1.9855, "step": 30534 }, { "epoch": 3.1377928483353883, "grad_norm": 0.05085150897502899, "learning_rate": 0.01, "loss": 1.9968, "step": 30537 }, { "epoch": 3.1381011097410605, "grad_norm": 0.05230359733104706, "learning_rate": 0.01, "loss": 2.0079, "step": 30540 }, { "epoch": 3.1384093711467322, "grad_norm": 0.07910077273845673, "learning_rate": 0.01, "loss": 2.0075, "step": 30543 }, { "epoch": 3.1387176325524044, "grad_norm": 0.044358085840940475, "learning_rate": 0.01, "loss": 2.0236, "step": 30546 }, { "epoch": 3.1390258939580766, "grad_norm": 0.07269710302352905, "learning_rate": 0.01, "loss": 1.9995, "step": 30549 }, { "epoch": 3.1393341553637484, "grad_norm": 0.09917914122343063, "learning_rate": 0.01, "loss": 2.0233, "step": 30552 }, { "epoch": 3.1396424167694206, "grad_norm": 0.0755099207162857, "learning_rate": 0.01, "loss": 2.0113, "step": 30555 }, { "epoch": 3.1399506781750923, "grad_norm": 0.07911227643489838, "learning_rate": 0.01, "loss": 1.9751, "step": 30558 }, { "epoch": 3.1402589395807645, "grad_norm": 0.04883533716201782, "learning_rate": 0.01, "loss": 2.0247, "step": 30561 }, { "epoch": 3.1405672009864363, "grad_norm": 0.0375591404736042, "learning_rate": 0.01, "loss": 2.004, "step": 30564 }, { "epoch": 3.1408754623921085, "grad_norm": 0.08654747903347015, "learning_rate": 0.01, "loss": 2.0315, "step": 30567 }, { "epoch": 3.1411837237977807, "grad_norm": 0.07025197148323059, "learning_rate": 0.01, "loss": 2.0334, "step": 30570 }, { "epoch": 3.1414919852034524, "grad_norm": 0.11750215291976929, "learning_rate": 0.01, "loss": 1.9902, "step": 30573 }, { "epoch": 3.1418002466091246, "grad_norm": 0.037444472312927246, "learning_rate": 0.01, "loss": 2.0272, "step": 30576 }, { "epoch": 3.1421085080147964, "grad_norm": 0.044617678970098495, "learning_rate": 0.01, "loss": 2.0304, "step": 30579 }, { "epoch": 3.1424167694204685, "grad_norm": 0.06604386866092682, "learning_rate": 0.01, "loss": 2.0153, "step": 30582 }, { "epoch": 3.1427250308261407, "grad_norm": 0.09958125650882721, "learning_rate": 0.01, "loss": 2.0217, "step": 30585 }, { "epoch": 3.1430332922318125, "grad_norm": 0.05710573121905327, "learning_rate": 0.01, "loss": 2.0199, "step": 30588 }, { "epoch": 3.1433415536374847, "grad_norm": 0.05984263867139816, "learning_rate": 0.01, "loss": 2.0091, "step": 30591 }, { "epoch": 3.1436498150431564, "grad_norm": 0.04073350876569748, "learning_rate": 0.01, "loss": 2.032, "step": 30594 }, { "epoch": 3.1439580764488286, "grad_norm": 0.07050776481628418, "learning_rate": 0.01, "loss": 1.9977, "step": 30597 }, { "epoch": 3.144266337854501, "grad_norm": 0.03400518372654915, "learning_rate": 0.01, "loss": 2.0404, "step": 30600 }, { "epoch": 3.1445745992601726, "grad_norm": 0.06751801073551178, "learning_rate": 0.01, "loss": 2.0138, "step": 30603 }, { "epoch": 3.1448828606658448, "grad_norm": 0.06015434488654137, "learning_rate": 0.01, "loss": 1.9956, "step": 30606 }, { "epoch": 3.1451911220715165, "grad_norm": 0.11231853067874908, "learning_rate": 0.01, "loss": 2.0367, "step": 30609 }, { "epoch": 3.1454993834771887, "grad_norm": 0.047932934015989304, "learning_rate": 0.01, "loss": 2.0133, "step": 30612 }, { "epoch": 3.1458076448828605, "grad_norm": 0.04414699971675873, "learning_rate": 0.01, "loss": 2.0155, "step": 30615 }, { "epoch": 3.1461159062885327, "grad_norm": 0.06486550718545914, "learning_rate": 0.01, "loss": 2.0251, "step": 30618 }, { "epoch": 3.146424167694205, "grad_norm": 0.056324832141399384, "learning_rate": 0.01, "loss": 2.0137, "step": 30621 }, { "epoch": 3.1467324290998766, "grad_norm": 0.030867785215377808, "learning_rate": 0.01, "loss": 2.0339, "step": 30624 }, { "epoch": 3.147040690505549, "grad_norm": 0.050489556044340134, "learning_rate": 0.01, "loss": 2.0233, "step": 30627 }, { "epoch": 3.1473489519112205, "grad_norm": 0.04749634861946106, "learning_rate": 0.01, "loss": 2.0061, "step": 30630 }, { "epoch": 3.1476572133168927, "grad_norm": 0.05312662571668625, "learning_rate": 0.01, "loss": 1.9881, "step": 30633 }, { "epoch": 3.147965474722565, "grad_norm": 0.1234770268201828, "learning_rate": 0.01, "loss": 2.0119, "step": 30636 }, { "epoch": 3.1482737361282367, "grad_norm": 0.1423310786485672, "learning_rate": 0.01, "loss": 1.99, "step": 30639 }, { "epoch": 3.148581997533909, "grad_norm": 0.08930431306362152, "learning_rate": 0.01, "loss": 2.023, "step": 30642 }, { "epoch": 3.1488902589395806, "grad_norm": 0.03335024416446686, "learning_rate": 0.01, "loss": 2.0072, "step": 30645 }, { "epoch": 3.149198520345253, "grad_norm": 0.07785200327634811, "learning_rate": 0.01, "loss": 2.0083, "step": 30648 }, { "epoch": 3.1495067817509246, "grad_norm": 0.047731272876262665, "learning_rate": 0.01, "loss": 1.9999, "step": 30651 }, { "epoch": 3.1498150431565968, "grad_norm": 0.10476487874984741, "learning_rate": 0.01, "loss": 2.0374, "step": 30654 }, { "epoch": 3.150123304562269, "grad_norm": 0.05200812220573425, "learning_rate": 0.01, "loss": 2.0204, "step": 30657 }, { "epoch": 3.1504315659679407, "grad_norm": 0.06658156216144562, "learning_rate": 0.01, "loss": 2.0331, "step": 30660 }, { "epoch": 3.150739827373613, "grad_norm": 0.06515184789896011, "learning_rate": 0.01, "loss": 2.0248, "step": 30663 }, { "epoch": 3.1510480887792847, "grad_norm": 0.03604321926832199, "learning_rate": 0.01, "loss": 2.0279, "step": 30666 }, { "epoch": 3.151356350184957, "grad_norm": 0.10496728122234344, "learning_rate": 0.01, "loss": 2.016, "step": 30669 }, { "epoch": 3.151664611590629, "grad_norm": 0.05922207236289978, "learning_rate": 0.01, "loss": 1.9988, "step": 30672 }, { "epoch": 3.151972872996301, "grad_norm": 0.03718853369355202, "learning_rate": 0.01, "loss": 2.0313, "step": 30675 }, { "epoch": 3.152281134401973, "grad_norm": 0.04691898077726364, "learning_rate": 0.01, "loss": 1.9755, "step": 30678 }, { "epoch": 3.1525893958076447, "grad_norm": 0.048930030316114426, "learning_rate": 0.01, "loss": 2.0079, "step": 30681 }, { "epoch": 3.152897657213317, "grad_norm": 0.028419634327292442, "learning_rate": 0.01, "loss": 1.9928, "step": 30684 }, { "epoch": 3.1532059186189887, "grad_norm": 0.05914349481463432, "learning_rate": 0.01, "loss": 2.004, "step": 30687 }, { "epoch": 3.153514180024661, "grad_norm": 0.05946047231554985, "learning_rate": 0.01, "loss": 2.025, "step": 30690 }, { "epoch": 3.153822441430333, "grad_norm": 0.06951475888490677, "learning_rate": 0.01, "loss": 2.0341, "step": 30693 }, { "epoch": 3.154130702836005, "grad_norm": 0.055885329842567444, "learning_rate": 0.01, "loss": 2.0335, "step": 30696 }, { "epoch": 3.154438964241677, "grad_norm": 0.043436676263809204, "learning_rate": 0.01, "loss": 1.9812, "step": 30699 }, { "epoch": 3.1547472256473488, "grad_norm": 0.04978411644697189, "learning_rate": 0.01, "loss": 2.0117, "step": 30702 }, { "epoch": 3.155055487053021, "grad_norm": 0.11427465081214905, "learning_rate": 0.01, "loss": 2.0179, "step": 30705 }, { "epoch": 3.155363748458693, "grad_norm": 0.045963071286678314, "learning_rate": 0.01, "loss": 1.9926, "step": 30708 }, { "epoch": 3.155672009864365, "grad_norm": 0.03811359032988548, "learning_rate": 0.01, "loss": 2.0241, "step": 30711 }, { "epoch": 3.155980271270037, "grad_norm": 0.05763382837176323, "learning_rate": 0.01, "loss": 2.0158, "step": 30714 }, { "epoch": 3.156288532675709, "grad_norm": 0.045626237988471985, "learning_rate": 0.01, "loss": 2.0033, "step": 30717 }, { "epoch": 3.156596794081381, "grad_norm": 0.037544216960668564, "learning_rate": 0.01, "loss": 1.9964, "step": 30720 }, { "epoch": 3.1569050554870532, "grad_norm": 0.14238816499710083, "learning_rate": 0.01, "loss": 2.0342, "step": 30723 }, { "epoch": 3.157213316892725, "grad_norm": 0.048318054527044296, "learning_rate": 0.01, "loss": 2.0181, "step": 30726 }, { "epoch": 3.157521578298397, "grad_norm": 0.041052673012018204, "learning_rate": 0.01, "loss": 1.9813, "step": 30729 }, { "epoch": 3.157829839704069, "grad_norm": 0.0370815135538578, "learning_rate": 0.01, "loss": 2.0042, "step": 30732 }, { "epoch": 3.158138101109741, "grad_norm": 0.07156988233327866, "learning_rate": 0.01, "loss": 2.0374, "step": 30735 }, { "epoch": 3.158446362515413, "grad_norm": 0.0757046490907669, "learning_rate": 0.01, "loss": 2.0147, "step": 30738 }, { "epoch": 3.158754623921085, "grad_norm": 0.0674174353480339, "learning_rate": 0.01, "loss": 1.9951, "step": 30741 }, { "epoch": 3.1590628853267573, "grad_norm": 0.05094176158308983, "learning_rate": 0.01, "loss": 2.0196, "step": 30744 }, { "epoch": 3.159371146732429, "grad_norm": 0.04377339780330658, "learning_rate": 0.01, "loss": 2.0129, "step": 30747 }, { "epoch": 3.159679408138101, "grad_norm": 0.04788428172469139, "learning_rate": 0.01, "loss": 2.032, "step": 30750 }, { "epoch": 3.159987669543773, "grad_norm": 0.08424562215805054, "learning_rate": 0.01, "loss": 1.9851, "step": 30753 }, { "epoch": 3.160295930949445, "grad_norm": 0.0742245689034462, "learning_rate": 0.01, "loss": 1.9942, "step": 30756 }, { "epoch": 3.160604192355117, "grad_norm": 0.127692312002182, "learning_rate": 0.01, "loss": 2.0294, "step": 30759 }, { "epoch": 3.160912453760789, "grad_norm": 0.06112000718712807, "learning_rate": 0.01, "loss": 2.0087, "step": 30762 }, { "epoch": 3.1612207151664613, "grad_norm": 0.04676929488778114, "learning_rate": 0.01, "loss": 2.0117, "step": 30765 }, { "epoch": 3.161528976572133, "grad_norm": 0.07944846153259277, "learning_rate": 0.01, "loss": 2.0239, "step": 30768 }, { "epoch": 3.1618372379778052, "grad_norm": 0.05377965793013573, "learning_rate": 0.01, "loss": 2.0424, "step": 30771 }, { "epoch": 3.162145499383477, "grad_norm": 0.04961777105927467, "learning_rate": 0.01, "loss": 1.9989, "step": 30774 }, { "epoch": 3.162453760789149, "grad_norm": 0.043640993535518646, "learning_rate": 0.01, "loss": 1.9938, "step": 30777 }, { "epoch": 3.1627620221948214, "grad_norm": 0.08145361393690109, "learning_rate": 0.01, "loss": 2.0253, "step": 30780 }, { "epoch": 3.163070283600493, "grad_norm": 0.03488341346383095, "learning_rate": 0.01, "loss": 1.9797, "step": 30783 }, { "epoch": 3.1633785450061653, "grad_norm": 0.054735999554395676, "learning_rate": 0.01, "loss": 2.0142, "step": 30786 }, { "epoch": 3.163686806411837, "grad_norm": 0.05064836144447327, "learning_rate": 0.01, "loss": 2.0255, "step": 30789 }, { "epoch": 3.1639950678175093, "grad_norm": 0.05693964287638664, "learning_rate": 0.01, "loss": 2.0073, "step": 30792 }, { "epoch": 3.1643033292231815, "grad_norm": 0.08762123435735703, "learning_rate": 0.01, "loss": 2.0049, "step": 30795 }, { "epoch": 3.164611590628853, "grad_norm": 0.08652741461992264, "learning_rate": 0.01, "loss": 2.0021, "step": 30798 }, { "epoch": 3.1649198520345254, "grad_norm": 0.15153749287128448, "learning_rate": 0.01, "loss": 2.0326, "step": 30801 }, { "epoch": 3.165228113440197, "grad_norm": 0.09822986274957657, "learning_rate": 0.01, "loss": 2.0232, "step": 30804 }, { "epoch": 3.1655363748458694, "grad_norm": 0.07334254682064056, "learning_rate": 0.01, "loss": 2.0176, "step": 30807 }, { "epoch": 3.165844636251541, "grad_norm": 0.042224787175655365, "learning_rate": 0.01, "loss": 2.0277, "step": 30810 }, { "epoch": 3.1661528976572133, "grad_norm": 0.047276921570301056, "learning_rate": 0.01, "loss": 2.0014, "step": 30813 }, { "epoch": 3.1664611590628855, "grad_norm": 0.112834133207798, "learning_rate": 0.01, "loss": 1.998, "step": 30816 }, { "epoch": 3.1667694204685573, "grad_norm": 0.0851617380976677, "learning_rate": 0.01, "loss": 2.013, "step": 30819 }, { "epoch": 3.1670776818742294, "grad_norm": 0.046194661408662796, "learning_rate": 0.01, "loss": 2.0029, "step": 30822 }, { "epoch": 3.167385943279901, "grad_norm": 0.056417178362607956, "learning_rate": 0.01, "loss": 2.0171, "step": 30825 }, { "epoch": 3.1676942046855734, "grad_norm": 0.06759685277938843, "learning_rate": 0.01, "loss": 2.0281, "step": 30828 }, { "epoch": 3.1680024660912456, "grad_norm": 0.08961043506860733, "learning_rate": 0.01, "loss": 2.0222, "step": 30831 }, { "epoch": 3.1683107274969173, "grad_norm": 0.03684352710843086, "learning_rate": 0.01, "loss": 2.0037, "step": 30834 }, { "epoch": 3.1686189889025895, "grad_norm": 0.1388491988182068, "learning_rate": 0.01, "loss": 2.0304, "step": 30837 }, { "epoch": 3.1689272503082613, "grad_norm": 0.03769170120358467, "learning_rate": 0.01, "loss": 1.9998, "step": 30840 }, { "epoch": 3.1692355117139335, "grad_norm": 0.040609996765851974, "learning_rate": 0.01, "loss": 2.0208, "step": 30843 }, { "epoch": 3.1695437731196052, "grad_norm": 0.04916587471961975, "learning_rate": 0.01, "loss": 2.0249, "step": 30846 }, { "epoch": 3.1698520345252774, "grad_norm": 0.042920999228954315, "learning_rate": 0.01, "loss": 2.0078, "step": 30849 }, { "epoch": 3.1701602959309496, "grad_norm": 0.05777138099074364, "learning_rate": 0.01, "loss": 2.019, "step": 30852 }, { "epoch": 3.1704685573366214, "grad_norm": 0.06128811836242676, "learning_rate": 0.01, "loss": 2.0123, "step": 30855 }, { "epoch": 3.1707768187422936, "grad_norm": 0.08042391389608383, "learning_rate": 0.01, "loss": 2.0123, "step": 30858 }, { "epoch": 3.1710850801479653, "grad_norm": 0.036383189260959625, "learning_rate": 0.01, "loss": 2.0011, "step": 30861 }, { "epoch": 3.1713933415536375, "grad_norm": 0.05068094655871391, "learning_rate": 0.01, "loss": 1.9922, "step": 30864 }, { "epoch": 3.1717016029593097, "grad_norm": 0.09005576372146606, "learning_rate": 0.01, "loss": 2.048, "step": 30867 }, { "epoch": 3.1720098643649814, "grad_norm": 0.0589495450258255, "learning_rate": 0.01, "loss": 2.0227, "step": 30870 }, { "epoch": 3.1723181257706536, "grad_norm": 0.04160517826676369, "learning_rate": 0.01, "loss": 1.9987, "step": 30873 }, { "epoch": 3.1726263871763254, "grad_norm": 0.044718582183122635, "learning_rate": 0.01, "loss": 1.9895, "step": 30876 }, { "epoch": 3.1729346485819976, "grad_norm": 0.09011929482221603, "learning_rate": 0.01, "loss": 2.0076, "step": 30879 }, { "epoch": 3.1732429099876693, "grad_norm": 0.0597953200340271, "learning_rate": 0.01, "loss": 1.9974, "step": 30882 }, { "epoch": 3.1735511713933415, "grad_norm": 0.06141204759478569, "learning_rate": 0.01, "loss": 2.0141, "step": 30885 }, { "epoch": 3.1738594327990137, "grad_norm": 0.09013784676790237, "learning_rate": 0.01, "loss": 2.0159, "step": 30888 }, { "epoch": 3.1741676942046855, "grad_norm": 0.08281320333480835, "learning_rate": 0.01, "loss": 2.0239, "step": 30891 }, { "epoch": 3.1744759556103577, "grad_norm": 0.09314266592264175, "learning_rate": 0.01, "loss": 2.0224, "step": 30894 }, { "epoch": 3.1747842170160294, "grad_norm": 0.04410851374268532, "learning_rate": 0.01, "loss": 2.0152, "step": 30897 }, { "epoch": 3.1750924784217016, "grad_norm": 0.12074366211891174, "learning_rate": 0.01, "loss": 2.0385, "step": 30900 }, { "epoch": 3.175400739827374, "grad_norm": 0.06861037015914917, "learning_rate": 0.01, "loss": 2.0024, "step": 30903 }, { "epoch": 3.1757090012330456, "grad_norm": 0.038134749978780746, "learning_rate": 0.01, "loss": 1.9987, "step": 30906 }, { "epoch": 3.1760172626387178, "grad_norm": 0.11108820140361786, "learning_rate": 0.01, "loss": 2.0149, "step": 30909 }, { "epoch": 3.1763255240443895, "grad_norm": 0.047687213867902756, "learning_rate": 0.01, "loss": 2.0063, "step": 30912 }, { "epoch": 3.1766337854500617, "grad_norm": 0.05983440950512886, "learning_rate": 0.01, "loss": 2.0099, "step": 30915 }, { "epoch": 3.176942046855734, "grad_norm": 0.038930587470531464, "learning_rate": 0.01, "loss": 2.0122, "step": 30918 }, { "epoch": 3.1772503082614056, "grad_norm": 0.08851155638694763, "learning_rate": 0.01, "loss": 2.0077, "step": 30921 }, { "epoch": 3.177558569667078, "grad_norm": 0.08430106192827225, "learning_rate": 0.01, "loss": 2.0082, "step": 30924 }, { "epoch": 3.1778668310727496, "grad_norm": 0.04469950869679451, "learning_rate": 0.01, "loss": 2.0138, "step": 30927 }, { "epoch": 3.178175092478422, "grad_norm": 0.06740089505910873, "learning_rate": 0.01, "loss": 2.0051, "step": 30930 }, { "epoch": 3.1784833538840935, "grad_norm": 0.06175517663359642, "learning_rate": 0.01, "loss": 2.0215, "step": 30933 }, { "epoch": 3.1787916152897657, "grad_norm": 0.047650065273046494, "learning_rate": 0.01, "loss": 2.0246, "step": 30936 }, { "epoch": 3.179099876695438, "grad_norm": 0.07261566072702408, "learning_rate": 0.01, "loss": 2.0107, "step": 30939 }, { "epoch": 3.1794081381011097, "grad_norm": 0.1270940601825714, "learning_rate": 0.01, "loss": 2.0208, "step": 30942 }, { "epoch": 3.179716399506782, "grad_norm": 0.054443880915641785, "learning_rate": 0.01, "loss": 2.0272, "step": 30945 }, { "epoch": 3.1800246609124536, "grad_norm": 0.06243740767240524, "learning_rate": 0.01, "loss": 2.0013, "step": 30948 }, { "epoch": 3.180332922318126, "grad_norm": 0.04665446653962135, "learning_rate": 0.01, "loss": 2.0269, "step": 30951 }, { "epoch": 3.1806411837237976, "grad_norm": 0.0470532663166523, "learning_rate": 0.01, "loss": 2.0194, "step": 30954 }, { "epoch": 3.1809494451294698, "grad_norm": 0.03944860398769379, "learning_rate": 0.01, "loss": 2.0166, "step": 30957 }, { "epoch": 3.181257706535142, "grad_norm": 0.04705018922686577, "learning_rate": 0.01, "loss": 2.0044, "step": 30960 }, { "epoch": 3.1815659679408137, "grad_norm": 0.04603470116853714, "learning_rate": 0.01, "loss": 1.9635, "step": 30963 }, { "epoch": 3.181874229346486, "grad_norm": 0.04761103168129921, "learning_rate": 0.01, "loss": 2.0245, "step": 30966 }, { "epoch": 3.1821824907521576, "grad_norm": 0.09109831601381302, "learning_rate": 0.01, "loss": 1.9919, "step": 30969 }, { "epoch": 3.18249075215783, "grad_norm": 0.07111615687608719, "learning_rate": 0.01, "loss": 2.0123, "step": 30972 }, { "epoch": 3.182799013563502, "grad_norm": 0.07623612880706787, "learning_rate": 0.01, "loss": 1.9949, "step": 30975 }, { "epoch": 3.183107274969174, "grad_norm": 0.0768456682562828, "learning_rate": 0.01, "loss": 2.0323, "step": 30978 }, { "epoch": 3.183415536374846, "grad_norm": 0.07147437334060669, "learning_rate": 0.01, "loss": 2.0126, "step": 30981 }, { "epoch": 3.1837237977805177, "grad_norm": 0.04732421785593033, "learning_rate": 0.01, "loss": 1.9975, "step": 30984 }, { "epoch": 3.18403205918619, "grad_norm": 0.04406769201159477, "learning_rate": 0.01, "loss": 2.0201, "step": 30987 }, { "epoch": 3.184340320591862, "grad_norm": 0.03760458901524544, "learning_rate": 0.01, "loss": 1.9994, "step": 30990 }, { "epoch": 3.184648581997534, "grad_norm": 0.051892757415771484, "learning_rate": 0.01, "loss": 2.0146, "step": 30993 }, { "epoch": 3.184956843403206, "grad_norm": 0.03961913660168648, "learning_rate": 0.01, "loss": 2.0145, "step": 30996 }, { "epoch": 3.185265104808878, "grad_norm": 0.07263363152742386, "learning_rate": 0.01, "loss": 2.0183, "step": 30999 }, { "epoch": 3.18557336621455, "grad_norm": 0.08618062734603882, "learning_rate": 0.01, "loss": 2.0074, "step": 31002 }, { "epoch": 3.1858816276202218, "grad_norm": 0.08712664991617203, "learning_rate": 0.01, "loss": 2.0084, "step": 31005 }, { "epoch": 3.186189889025894, "grad_norm": 0.05723334103822708, "learning_rate": 0.01, "loss": 2.0231, "step": 31008 }, { "epoch": 3.186498150431566, "grad_norm": 0.06170898675918579, "learning_rate": 0.01, "loss": 2.0079, "step": 31011 }, { "epoch": 3.186806411837238, "grad_norm": 0.04730013385415077, "learning_rate": 0.01, "loss": 2.0081, "step": 31014 }, { "epoch": 3.18711467324291, "grad_norm": 0.0381946824491024, "learning_rate": 0.01, "loss": 2.0194, "step": 31017 }, { "epoch": 3.187422934648582, "grad_norm": 0.09591019153594971, "learning_rate": 0.01, "loss": 1.9788, "step": 31020 }, { "epoch": 3.187731196054254, "grad_norm": 0.047843087464571, "learning_rate": 0.01, "loss": 1.9948, "step": 31023 }, { "epoch": 3.188039457459926, "grad_norm": 0.04370959475636482, "learning_rate": 0.01, "loss": 2.0249, "step": 31026 }, { "epoch": 3.188347718865598, "grad_norm": 0.060692187398672104, "learning_rate": 0.01, "loss": 2.0313, "step": 31029 }, { "epoch": 3.18865598027127, "grad_norm": 0.05906793847680092, "learning_rate": 0.01, "loss": 2.0144, "step": 31032 }, { "epoch": 3.188964241676942, "grad_norm": 0.06203675642609596, "learning_rate": 0.01, "loss": 2.0325, "step": 31035 }, { "epoch": 3.189272503082614, "grad_norm": 0.07943592220544815, "learning_rate": 0.01, "loss": 2.0079, "step": 31038 }, { "epoch": 3.189580764488286, "grad_norm": 0.08803451061248779, "learning_rate": 0.01, "loss": 2.0055, "step": 31041 }, { "epoch": 3.189889025893958, "grad_norm": 0.07365550100803375, "learning_rate": 0.01, "loss": 2.0234, "step": 31044 }, { "epoch": 3.1901972872996303, "grad_norm": 0.0795077532529831, "learning_rate": 0.01, "loss": 2.0253, "step": 31047 }, { "epoch": 3.190505548705302, "grad_norm": 0.08341194689273834, "learning_rate": 0.01, "loss": 1.9972, "step": 31050 }, { "epoch": 3.190813810110974, "grad_norm": 0.05842220410704613, "learning_rate": 0.01, "loss": 2.0244, "step": 31053 }, { "epoch": 3.191122071516646, "grad_norm": 0.09980861842632294, "learning_rate": 0.01, "loss": 2.0056, "step": 31056 }, { "epoch": 3.191430332922318, "grad_norm": 0.061474163085222244, "learning_rate": 0.01, "loss": 2.0059, "step": 31059 }, { "epoch": 3.1917385943279903, "grad_norm": 0.06752124428749084, "learning_rate": 0.01, "loss": 2.0243, "step": 31062 }, { "epoch": 3.192046855733662, "grad_norm": 0.06160835176706314, "learning_rate": 0.01, "loss": 1.9988, "step": 31065 }, { "epoch": 3.1923551171393343, "grad_norm": 0.09516782313585281, "learning_rate": 0.01, "loss": 1.9864, "step": 31068 }, { "epoch": 3.192663378545006, "grad_norm": 0.049451183527708054, "learning_rate": 0.01, "loss": 2.0311, "step": 31071 }, { "epoch": 3.1929716399506782, "grad_norm": 0.08874432742595673, "learning_rate": 0.01, "loss": 2.0198, "step": 31074 }, { "epoch": 3.19327990135635, "grad_norm": 0.07393426448106766, "learning_rate": 0.01, "loss": 1.9918, "step": 31077 }, { "epoch": 3.193588162762022, "grad_norm": 0.09064768254756927, "learning_rate": 0.01, "loss": 2.027, "step": 31080 }, { "epoch": 3.1938964241676944, "grad_norm": 0.05667688325047493, "learning_rate": 0.01, "loss": 2.0058, "step": 31083 }, { "epoch": 3.194204685573366, "grad_norm": 0.03858955577015877, "learning_rate": 0.01, "loss": 2.0184, "step": 31086 }, { "epoch": 3.1945129469790383, "grad_norm": 0.03137395530939102, "learning_rate": 0.01, "loss": 1.9912, "step": 31089 }, { "epoch": 3.19482120838471, "grad_norm": 0.09366928040981293, "learning_rate": 0.01, "loss": 2.0279, "step": 31092 }, { "epoch": 3.1951294697903823, "grad_norm": 0.05282336100935936, "learning_rate": 0.01, "loss": 2.0145, "step": 31095 }, { "epoch": 3.1954377311960545, "grad_norm": 0.07649802416563034, "learning_rate": 0.01, "loss": 1.9961, "step": 31098 }, { "epoch": 3.195745992601726, "grad_norm": 0.07249470800161362, "learning_rate": 0.01, "loss": 2.0063, "step": 31101 }, { "epoch": 3.1960542540073984, "grad_norm": 0.12900890409946442, "learning_rate": 0.01, "loss": 1.9952, "step": 31104 }, { "epoch": 3.19636251541307, "grad_norm": 0.08060257881879807, "learning_rate": 0.01, "loss": 2.0247, "step": 31107 }, { "epoch": 3.1966707768187423, "grad_norm": 0.07051622122526169, "learning_rate": 0.01, "loss": 1.9965, "step": 31110 }, { "epoch": 3.1969790382244145, "grad_norm": 0.0567597970366478, "learning_rate": 0.01, "loss": 2.0072, "step": 31113 }, { "epoch": 3.1972872996300863, "grad_norm": 0.053274448961019516, "learning_rate": 0.01, "loss": 1.9992, "step": 31116 }, { "epoch": 3.1975955610357585, "grad_norm": 0.0634993240237236, "learning_rate": 0.01, "loss": 2.0111, "step": 31119 }, { "epoch": 3.1979038224414302, "grad_norm": 0.060209862887859344, "learning_rate": 0.01, "loss": 2.0062, "step": 31122 }, { "epoch": 3.1982120838471024, "grad_norm": 0.05891990661621094, "learning_rate": 0.01, "loss": 2.0394, "step": 31125 }, { "epoch": 3.198520345252774, "grad_norm": 0.03743661195039749, "learning_rate": 0.01, "loss": 1.993, "step": 31128 }, { "epoch": 3.1988286066584464, "grad_norm": 0.047772981226444244, "learning_rate": 0.01, "loss": 1.9922, "step": 31131 }, { "epoch": 3.1991368680641186, "grad_norm": 0.09134045243263245, "learning_rate": 0.01, "loss": 2.0073, "step": 31134 }, { "epoch": 3.1994451294697903, "grad_norm": 0.040920648723840714, "learning_rate": 0.01, "loss": 1.9876, "step": 31137 }, { "epoch": 3.1997533908754625, "grad_norm": 0.09245988726615906, "learning_rate": 0.01, "loss": 1.993, "step": 31140 }, { "epoch": 3.2000616522811343, "grad_norm": 0.06823042035102844, "learning_rate": 0.01, "loss": 1.9983, "step": 31143 }, { "epoch": 3.2003699136868065, "grad_norm": 0.10324928909540176, "learning_rate": 0.01, "loss": 2.0429, "step": 31146 }, { "epoch": 3.200678175092478, "grad_norm": 0.09294021129608154, "learning_rate": 0.01, "loss": 2.0278, "step": 31149 }, { "epoch": 3.2009864364981504, "grad_norm": 0.07790663093328476, "learning_rate": 0.01, "loss": 2.0043, "step": 31152 }, { "epoch": 3.2012946979038226, "grad_norm": 0.041240107268095016, "learning_rate": 0.01, "loss": 2.0223, "step": 31155 }, { "epoch": 3.2016029593094943, "grad_norm": 0.042019765824079514, "learning_rate": 0.01, "loss": 2.0019, "step": 31158 }, { "epoch": 3.2019112207151665, "grad_norm": 0.03546491637825966, "learning_rate": 0.01, "loss": 1.9972, "step": 31161 }, { "epoch": 3.2022194821208383, "grad_norm": 0.08740135282278061, "learning_rate": 0.01, "loss": 2.0364, "step": 31164 }, { "epoch": 3.2025277435265105, "grad_norm": 0.06613611429929733, "learning_rate": 0.01, "loss": 2.0345, "step": 31167 }, { "epoch": 3.2028360049321827, "grad_norm": 0.1089286357164383, "learning_rate": 0.01, "loss": 2.0027, "step": 31170 }, { "epoch": 3.2031442663378544, "grad_norm": 0.037778157740831375, "learning_rate": 0.01, "loss": 2.0168, "step": 31173 }, { "epoch": 3.2034525277435266, "grad_norm": 0.07214018702507019, "learning_rate": 0.01, "loss": 2.0162, "step": 31176 }, { "epoch": 3.2037607891491984, "grad_norm": 0.05182633548974991, "learning_rate": 0.01, "loss": 2.0078, "step": 31179 }, { "epoch": 3.2040690505548706, "grad_norm": 0.043808627873659134, "learning_rate": 0.01, "loss": 2.0266, "step": 31182 }, { "epoch": 3.2043773119605428, "grad_norm": 0.03699186071753502, "learning_rate": 0.01, "loss": 2.0391, "step": 31185 }, { "epoch": 3.2046855733662145, "grad_norm": 0.10249976068735123, "learning_rate": 0.01, "loss": 1.9939, "step": 31188 }, { "epoch": 3.2049938347718867, "grad_norm": 0.05326079949736595, "learning_rate": 0.01, "loss": 2.0082, "step": 31191 }, { "epoch": 3.2053020961775585, "grad_norm": 0.09879110008478165, "learning_rate": 0.01, "loss": 2.0149, "step": 31194 }, { "epoch": 3.2056103575832307, "grad_norm": 0.042158786207437515, "learning_rate": 0.01, "loss": 1.9861, "step": 31197 }, { "epoch": 3.2059186189889024, "grad_norm": 0.13437750935554504, "learning_rate": 0.01, "loss": 2.0179, "step": 31200 }, { "epoch": 3.2062268803945746, "grad_norm": 0.07030022144317627, "learning_rate": 0.01, "loss": 2.0499, "step": 31203 }, { "epoch": 3.206535141800247, "grad_norm": 0.044758979231119156, "learning_rate": 0.01, "loss": 1.9956, "step": 31206 }, { "epoch": 3.2068434032059185, "grad_norm": 0.030563069507479668, "learning_rate": 0.01, "loss": 1.9987, "step": 31209 }, { "epoch": 3.2071516646115907, "grad_norm": 0.047487739473581314, "learning_rate": 0.01, "loss": 1.9763, "step": 31212 }, { "epoch": 3.2074599260172625, "grad_norm": 0.044461995363235474, "learning_rate": 0.01, "loss": 2.015, "step": 31215 }, { "epoch": 3.2077681874229347, "grad_norm": 0.16204911470413208, "learning_rate": 0.01, "loss": 2.0259, "step": 31218 }, { "epoch": 3.2080764488286064, "grad_norm": 0.0470188669860363, "learning_rate": 0.01, "loss": 2.0244, "step": 31221 }, { "epoch": 3.2083847102342786, "grad_norm": 0.04323815181851387, "learning_rate": 0.01, "loss": 2.0158, "step": 31224 }, { "epoch": 3.208692971639951, "grad_norm": 0.04388147220015526, "learning_rate": 0.01, "loss": 1.9996, "step": 31227 }, { "epoch": 3.2090012330456226, "grad_norm": 0.06811438500881195, "learning_rate": 0.01, "loss": 2.0196, "step": 31230 }, { "epoch": 3.2093094944512948, "grad_norm": 0.09423845261335373, "learning_rate": 0.01, "loss": 1.99, "step": 31233 }, { "epoch": 3.2096177558569665, "grad_norm": 0.05107983946800232, "learning_rate": 0.01, "loss": 2.0203, "step": 31236 }, { "epoch": 3.2099260172626387, "grad_norm": 0.06188793480396271, "learning_rate": 0.01, "loss": 2.0283, "step": 31239 }, { "epoch": 3.210234278668311, "grad_norm": 0.04658438265323639, "learning_rate": 0.01, "loss": 1.9986, "step": 31242 }, { "epoch": 3.2105425400739827, "grad_norm": 0.05382300913333893, "learning_rate": 0.01, "loss": 2.0242, "step": 31245 }, { "epoch": 3.210850801479655, "grad_norm": 0.04271601140499115, "learning_rate": 0.01, "loss": 1.9952, "step": 31248 }, { "epoch": 3.2111590628853266, "grad_norm": 0.06256501376628876, "learning_rate": 0.01, "loss": 2.0109, "step": 31251 }, { "epoch": 3.211467324290999, "grad_norm": 0.03523874282836914, "learning_rate": 0.01, "loss": 2.0107, "step": 31254 }, { "epoch": 3.211775585696671, "grad_norm": 0.18781809508800507, "learning_rate": 0.01, "loss": 2.0421, "step": 31257 }, { "epoch": 3.2120838471023427, "grad_norm": 0.11113902926445007, "learning_rate": 0.01, "loss": 2.0158, "step": 31260 }, { "epoch": 3.212392108508015, "grad_norm": 0.11578498035669327, "learning_rate": 0.01, "loss": 1.9993, "step": 31263 }, { "epoch": 3.2127003699136867, "grad_norm": 0.06620439887046814, "learning_rate": 0.01, "loss": 1.9711, "step": 31266 }, { "epoch": 3.213008631319359, "grad_norm": 0.04588307812809944, "learning_rate": 0.01, "loss": 2.0262, "step": 31269 }, { "epoch": 3.2133168927250306, "grad_norm": 0.06933780014514923, "learning_rate": 0.01, "loss": 2.0117, "step": 31272 }, { "epoch": 3.213625154130703, "grad_norm": 0.03479130566120148, "learning_rate": 0.01, "loss": 1.996, "step": 31275 }, { "epoch": 3.213933415536375, "grad_norm": 0.05823906138539314, "learning_rate": 0.01, "loss": 1.9988, "step": 31278 }, { "epoch": 3.2142416769420468, "grad_norm": 0.044943809509277344, "learning_rate": 0.01, "loss": 2.0121, "step": 31281 }, { "epoch": 3.214549938347719, "grad_norm": 0.05428524687886238, "learning_rate": 0.01, "loss": 1.9761, "step": 31284 }, { "epoch": 3.2148581997533907, "grad_norm": 0.07103761285543442, "learning_rate": 0.01, "loss": 2.0401, "step": 31287 }, { "epoch": 3.215166461159063, "grad_norm": 0.1237725242972374, "learning_rate": 0.01, "loss": 2.023, "step": 31290 }, { "epoch": 3.215474722564735, "grad_norm": 0.03638492897152901, "learning_rate": 0.01, "loss": 2.0036, "step": 31293 }, { "epoch": 3.215782983970407, "grad_norm": 0.05026063695549965, "learning_rate": 0.01, "loss": 1.9929, "step": 31296 }, { "epoch": 3.216091245376079, "grad_norm": 0.09602409601211548, "learning_rate": 0.01, "loss": 2.011, "step": 31299 }, { "epoch": 3.216399506781751, "grad_norm": 0.12711849808692932, "learning_rate": 0.01, "loss": 1.9912, "step": 31302 }, { "epoch": 3.216707768187423, "grad_norm": 0.051612287759780884, "learning_rate": 0.01, "loss": 2.0397, "step": 31305 }, { "epoch": 3.2170160295930947, "grad_norm": 0.055910855531692505, "learning_rate": 0.01, "loss": 2.0282, "step": 31308 }, { "epoch": 3.217324290998767, "grad_norm": 0.053995631635189056, "learning_rate": 0.01, "loss": 2.0034, "step": 31311 }, { "epoch": 3.217632552404439, "grad_norm": 0.03721768036484718, "learning_rate": 0.01, "loss": 1.9957, "step": 31314 }, { "epoch": 3.217940813810111, "grad_norm": 0.04018721356987953, "learning_rate": 0.01, "loss": 2.0215, "step": 31317 }, { "epoch": 3.218249075215783, "grad_norm": 0.06476118415594101, "learning_rate": 0.01, "loss": 2.0138, "step": 31320 }, { "epoch": 3.218557336621455, "grad_norm": 0.09766072034835815, "learning_rate": 0.01, "loss": 2.0343, "step": 31323 }, { "epoch": 3.218865598027127, "grad_norm": 0.0436365082859993, "learning_rate": 0.01, "loss": 2.0081, "step": 31326 }, { "epoch": 3.219173859432799, "grad_norm": 0.04617559164762497, "learning_rate": 0.01, "loss": 1.9809, "step": 31329 }, { "epoch": 3.219482120838471, "grad_norm": 0.0487680621445179, "learning_rate": 0.01, "loss": 2.0244, "step": 31332 }, { "epoch": 3.219790382244143, "grad_norm": 0.036623213440179825, "learning_rate": 0.01, "loss": 2.0159, "step": 31335 }, { "epoch": 3.220098643649815, "grad_norm": 0.04713229089975357, "learning_rate": 0.01, "loss": 2.01, "step": 31338 }, { "epoch": 3.220406905055487, "grad_norm": 0.03848060593008995, "learning_rate": 0.01, "loss": 1.9975, "step": 31341 }, { "epoch": 3.220715166461159, "grad_norm": 0.03674410656094551, "learning_rate": 0.01, "loss": 2.031, "step": 31344 }, { "epoch": 3.221023427866831, "grad_norm": 0.09635547548532486, "learning_rate": 0.01, "loss": 2.0069, "step": 31347 }, { "epoch": 3.2213316892725032, "grad_norm": 0.06311628222465515, "learning_rate": 0.01, "loss": 1.9864, "step": 31350 }, { "epoch": 3.221639950678175, "grad_norm": 0.05867060646414757, "learning_rate": 0.01, "loss": 2.0282, "step": 31353 }, { "epoch": 3.221948212083847, "grad_norm": 0.0816822350025177, "learning_rate": 0.01, "loss": 2.0192, "step": 31356 }, { "epoch": 3.222256473489519, "grad_norm": 0.0825878456234932, "learning_rate": 0.01, "loss": 2.0045, "step": 31359 }, { "epoch": 3.222564734895191, "grad_norm": 0.06253322958946228, "learning_rate": 0.01, "loss": 2.0461, "step": 31362 }, { "epoch": 3.2228729963008633, "grad_norm": 0.07637360692024231, "learning_rate": 0.01, "loss": 2.0028, "step": 31365 }, { "epoch": 3.223181257706535, "grad_norm": 0.081746406853199, "learning_rate": 0.01, "loss": 2.0226, "step": 31368 }, { "epoch": 3.2234895191122073, "grad_norm": 0.11683212220668793, "learning_rate": 0.01, "loss": 2.0075, "step": 31371 }, { "epoch": 3.223797780517879, "grad_norm": 0.07254528999328613, "learning_rate": 0.01, "loss": 2.0392, "step": 31374 }, { "epoch": 3.224106041923551, "grad_norm": 0.05394696444272995, "learning_rate": 0.01, "loss": 2.0199, "step": 31377 }, { "epoch": 3.2244143033292234, "grad_norm": 0.044905390590429306, "learning_rate": 0.01, "loss": 2.0156, "step": 31380 }, { "epoch": 3.224722564734895, "grad_norm": 0.07633423805236816, "learning_rate": 0.01, "loss": 2.0096, "step": 31383 }, { "epoch": 3.2250308261405674, "grad_norm": 0.03892616555094719, "learning_rate": 0.01, "loss": 2.014, "step": 31386 }, { "epoch": 3.225339087546239, "grad_norm": 0.03514908254146576, "learning_rate": 0.01, "loss": 2.0173, "step": 31389 }, { "epoch": 3.2256473489519113, "grad_norm": 0.07658538222312927, "learning_rate": 0.01, "loss": 2.0052, "step": 31392 }, { "epoch": 3.225955610357583, "grad_norm": 0.1004643440246582, "learning_rate": 0.01, "loss": 2.01, "step": 31395 }, { "epoch": 3.2262638717632552, "grad_norm": 0.08478312194347382, "learning_rate": 0.01, "loss": 2.0184, "step": 31398 }, { "epoch": 3.2265721331689274, "grad_norm": 0.05344710871577263, "learning_rate": 0.01, "loss": 1.9995, "step": 31401 }, { "epoch": 3.226880394574599, "grad_norm": 0.05862101912498474, "learning_rate": 0.01, "loss": 1.9907, "step": 31404 }, { "epoch": 3.2271886559802714, "grad_norm": 0.03430565074086189, "learning_rate": 0.01, "loss": 1.9976, "step": 31407 }, { "epoch": 3.227496917385943, "grad_norm": 0.03822310268878937, "learning_rate": 0.01, "loss": 2.0081, "step": 31410 }, { "epoch": 3.2278051787916153, "grad_norm": 0.04391561448574066, "learning_rate": 0.01, "loss": 2.0239, "step": 31413 }, { "epoch": 3.228113440197287, "grad_norm": 0.053595174103975296, "learning_rate": 0.01, "loss": 1.994, "step": 31416 }, { "epoch": 3.2284217016029593, "grad_norm": 0.06710030138492584, "learning_rate": 0.01, "loss": 2.0249, "step": 31419 }, { "epoch": 3.2287299630086315, "grad_norm": 0.19077260792255402, "learning_rate": 0.01, "loss": 2.0214, "step": 31422 }, { "epoch": 3.229038224414303, "grad_norm": 0.050228483974933624, "learning_rate": 0.01, "loss": 1.9957, "step": 31425 }, { "epoch": 3.2293464858199754, "grad_norm": 0.047246966511011124, "learning_rate": 0.01, "loss": 1.9984, "step": 31428 }, { "epoch": 3.229654747225647, "grad_norm": 0.03976801037788391, "learning_rate": 0.01, "loss": 2.0247, "step": 31431 }, { "epoch": 3.2299630086313194, "grad_norm": 0.052110929042100906, "learning_rate": 0.01, "loss": 2.0176, "step": 31434 }, { "epoch": 3.2302712700369915, "grad_norm": 0.1090363934636116, "learning_rate": 0.01, "loss": 2.011, "step": 31437 }, { "epoch": 3.2305795314426633, "grad_norm": 0.13336141407489777, "learning_rate": 0.01, "loss": 2.0023, "step": 31440 }, { "epoch": 3.2308877928483355, "grad_norm": 0.11287815868854523, "learning_rate": 0.01, "loss": 2.0384, "step": 31443 }, { "epoch": 3.2311960542540072, "grad_norm": 0.05775724723935127, "learning_rate": 0.01, "loss": 2.0001, "step": 31446 }, { "epoch": 3.2315043156596794, "grad_norm": 0.0489221066236496, "learning_rate": 0.01, "loss": 1.9857, "step": 31449 }, { "epoch": 3.2318125770653516, "grad_norm": 0.10708906501531601, "learning_rate": 0.01, "loss": 2.0214, "step": 31452 }, { "epoch": 3.2321208384710234, "grad_norm": 0.04961223527789116, "learning_rate": 0.01, "loss": 2.0244, "step": 31455 }, { "epoch": 3.2324290998766956, "grad_norm": 0.04106178134679794, "learning_rate": 0.01, "loss": 2.027, "step": 31458 }, { "epoch": 3.2327373612823673, "grad_norm": 0.051446568220853806, "learning_rate": 0.01, "loss": 2.0078, "step": 31461 }, { "epoch": 3.2330456226880395, "grad_norm": 0.046735066920518875, "learning_rate": 0.01, "loss": 2.0196, "step": 31464 }, { "epoch": 3.2333538840937113, "grad_norm": 0.04740822687745094, "learning_rate": 0.01, "loss": 1.9849, "step": 31467 }, { "epoch": 3.2336621454993835, "grad_norm": 0.03593340888619423, "learning_rate": 0.01, "loss": 2.0055, "step": 31470 }, { "epoch": 3.2339704069050557, "grad_norm": 0.058127082884311676, "learning_rate": 0.01, "loss": 2.0318, "step": 31473 }, { "epoch": 3.2342786683107274, "grad_norm": 0.05138585716485977, "learning_rate": 0.01, "loss": 2.0052, "step": 31476 }, { "epoch": 3.2345869297163996, "grad_norm": 0.04029039293527603, "learning_rate": 0.01, "loss": 2.0272, "step": 31479 }, { "epoch": 3.2348951911220714, "grad_norm": 0.05703110247850418, "learning_rate": 0.01, "loss": 2.0115, "step": 31482 }, { "epoch": 3.2352034525277436, "grad_norm": 0.06218143180012703, "learning_rate": 0.01, "loss": 1.9987, "step": 31485 }, { "epoch": 3.2355117139334153, "grad_norm": 0.07277306169271469, "learning_rate": 0.01, "loss": 2.0413, "step": 31488 }, { "epoch": 3.2358199753390875, "grad_norm": 0.08520001918077469, "learning_rate": 0.01, "loss": 2.0058, "step": 31491 }, { "epoch": 3.2361282367447597, "grad_norm": 0.062044426798820496, "learning_rate": 0.01, "loss": 2.0055, "step": 31494 }, { "epoch": 3.2364364981504314, "grad_norm": 0.062475502490997314, "learning_rate": 0.01, "loss": 2.0124, "step": 31497 }, { "epoch": 3.2367447595561036, "grad_norm": 0.05683014541864395, "learning_rate": 0.01, "loss": 2.0253, "step": 31500 }, { "epoch": 3.2370530209617754, "grad_norm": 0.0409809909760952, "learning_rate": 0.01, "loss": 2.0286, "step": 31503 }, { "epoch": 3.2373612823674476, "grad_norm": 0.05398055911064148, "learning_rate": 0.01, "loss": 2.0239, "step": 31506 }, { "epoch": 3.2376695437731198, "grad_norm": 0.09552880376577377, "learning_rate": 0.01, "loss": 2.0232, "step": 31509 }, { "epoch": 3.2379778051787915, "grad_norm": 0.042877551168203354, "learning_rate": 0.01, "loss": 2.007, "step": 31512 }, { "epoch": 3.2382860665844637, "grad_norm": 0.03086467832326889, "learning_rate": 0.01, "loss": 1.9831, "step": 31515 }, { "epoch": 3.2385943279901355, "grad_norm": 0.03575371578335762, "learning_rate": 0.01, "loss": 1.9614, "step": 31518 }, { "epoch": 3.2389025893958077, "grad_norm": 0.04962699115276337, "learning_rate": 0.01, "loss": 2.0084, "step": 31521 }, { "epoch": 3.23921085080148, "grad_norm": 0.07324356585741043, "learning_rate": 0.01, "loss": 2.0213, "step": 31524 }, { "epoch": 3.2395191122071516, "grad_norm": 0.08169112354516983, "learning_rate": 0.01, "loss": 2.0061, "step": 31527 }, { "epoch": 3.239827373612824, "grad_norm": 0.04129362106323242, "learning_rate": 0.01, "loss": 2.0143, "step": 31530 }, { "epoch": 3.2401356350184956, "grad_norm": 0.04921339079737663, "learning_rate": 0.01, "loss": 2.0248, "step": 31533 }, { "epoch": 3.2404438964241677, "grad_norm": 0.048255033791065216, "learning_rate": 0.01, "loss": 2.0486, "step": 31536 }, { "epoch": 3.2407521578298395, "grad_norm": 0.0420132540166378, "learning_rate": 0.01, "loss": 2.0254, "step": 31539 }, { "epoch": 3.2410604192355117, "grad_norm": 0.037761542946100235, "learning_rate": 0.01, "loss": 2.0105, "step": 31542 }, { "epoch": 3.241368680641184, "grad_norm": 0.059902340173721313, "learning_rate": 0.01, "loss": 2.0371, "step": 31545 }, { "epoch": 3.2416769420468556, "grad_norm": 0.07208621501922607, "learning_rate": 0.01, "loss": 2.014, "step": 31548 }, { "epoch": 3.241985203452528, "grad_norm": 0.039220135658979416, "learning_rate": 0.01, "loss": 2.0208, "step": 31551 }, { "epoch": 3.2422934648581996, "grad_norm": 0.09032812714576721, "learning_rate": 0.01, "loss": 1.9956, "step": 31554 }, { "epoch": 3.2426017262638718, "grad_norm": 0.04668070003390312, "learning_rate": 0.01, "loss": 2.0204, "step": 31557 }, { "epoch": 3.242909987669544, "grad_norm": 0.07167590409517288, "learning_rate": 0.01, "loss": 1.9882, "step": 31560 }, { "epoch": 3.2432182490752157, "grad_norm": 0.09092319756746292, "learning_rate": 0.01, "loss": 2.0257, "step": 31563 }, { "epoch": 3.243526510480888, "grad_norm": 0.05797998234629631, "learning_rate": 0.01, "loss": 1.9923, "step": 31566 }, { "epoch": 3.2438347718865597, "grad_norm": 0.05728116258978844, "learning_rate": 0.01, "loss": 2.0134, "step": 31569 }, { "epoch": 3.244143033292232, "grad_norm": 0.10221456736326218, "learning_rate": 0.01, "loss": 2.0452, "step": 31572 }, { "epoch": 3.244451294697904, "grad_norm": 0.0785583034157753, "learning_rate": 0.01, "loss": 1.9956, "step": 31575 }, { "epoch": 3.244759556103576, "grad_norm": 0.06105490028858185, "learning_rate": 0.01, "loss": 2.035, "step": 31578 }, { "epoch": 3.245067817509248, "grad_norm": 0.031959742307662964, "learning_rate": 0.01, "loss": 2.0186, "step": 31581 }, { "epoch": 3.2453760789149197, "grad_norm": 0.07373019307851791, "learning_rate": 0.01, "loss": 1.9778, "step": 31584 }, { "epoch": 3.245684340320592, "grad_norm": 0.05989330634474754, "learning_rate": 0.01, "loss": 2.0059, "step": 31587 }, { "epoch": 3.2459926017262637, "grad_norm": 0.0927957072854042, "learning_rate": 0.01, "loss": 1.9979, "step": 31590 }, { "epoch": 3.246300863131936, "grad_norm": 0.048555102199316025, "learning_rate": 0.01, "loss": 2.0111, "step": 31593 }, { "epoch": 3.246609124537608, "grad_norm": 0.05515364184975624, "learning_rate": 0.01, "loss": 2.0052, "step": 31596 }, { "epoch": 3.24691738594328, "grad_norm": 0.04753278195858002, "learning_rate": 0.01, "loss": 2.0171, "step": 31599 }, { "epoch": 3.247225647348952, "grad_norm": 0.06210014224052429, "learning_rate": 0.01, "loss": 1.9689, "step": 31602 }, { "epoch": 3.2475339087546238, "grad_norm": 0.060913342982530594, "learning_rate": 0.01, "loss": 2.0183, "step": 31605 }, { "epoch": 3.247842170160296, "grad_norm": 0.08301867544651031, "learning_rate": 0.01, "loss": 2.0182, "step": 31608 }, { "epoch": 3.2481504315659677, "grad_norm": 0.06884342432022095, "learning_rate": 0.01, "loss": 2.0332, "step": 31611 }, { "epoch": 3.24845869297164, "grad_norm": 0.046835094690322876, "learning_rate": 0.01, "loss": 2.0099, "step": 31614 }, { "epoch": 3.248766954377312, "grad_norm": 0.11094243824481964, "learning_rate": 0.01, "loss": 2.0319, "step": 31617 }, { "epoch": 3.249075215782984, "grad_norm": 0.0603213869035244, "learning_rate": 0.01, "loss": 2.0181, "step": 31620 }, { "epoch": 3.249383477188656, "grad_norm": 0.07283317297697067, "learning_rate": 0.01, "loss": 2.0205, "step": 31623 }, { "epoch": 3.249691738594328, "grad_norm": 0.05507282167673111, "learning_rate": 0.01, "loss": 2.0082, "step": 31626 }, { "epoch": 3.25, "grad_norm": 0.039191924035549164, "learning_rate": 0.01, "loss": 1.9936, "step": 31629 }, { "epoch": 3.250308261405672, "grad_norm": 0.1467886120080948, "learning_rate": 0.01, "loss": 2.0235, "step": 31632 }, { "epoch": 3.250616522811344, "grad_norm": 0.06955720484256744, "learning_rate": 0.01, "loss": 2.0168, "step": 31635 }, { "epoch": 3.250924784217016, "grad_norm": 0.0634884312748909, "learning_rate": 0.01, "loss": 2.0123, "step": 31638 }, { "epoch": 3.251233045622688, "grad_norm": 0.035188958048820496, "learning_rate": 0.01, "loss": 2.0061, "step": 31641 }, { "epoch": 3.25154130702836, "grad_norm": 0.07601841539144516, "learning_rate": 0.01, "loss": 2.0046, "step": 31644 }, { "epoch": 3.2518495684340323, "grad_norm": 0.06929823011159897, "learning_rate": 0.01, "loss": 2.0213, "step": 31647 }, { "epoch": 3.252157829839704, "grad_norm": 0.06300003081560135, "learning_rate": 0.01, "loss": 2.0111, "step": 31650 }, { "epoch": 3.2524660912453762, "grad_norm": 0.04881738871335983, "learning_rate": 0.01, "loss": 2.0423, "step": 31653 }, { "epoch": 3.252774352651048, "grad_norm": 0.04084230959415436, "learning_rate": 0.01, "loss": 2.0284, "step": 31656 }, { "epoch": 3.25308261405672, "grad_norm": 0.10158465802669525, "learning_rate": 0.01, "loss": 2.0084, "step": 31659 }, { "epoch": 3.253390875462392, "grad_norm": 0.04703471064567566, "learning_rate": 0.01, "loss": 1.9824, "step": 31662 }, { "epoch": 3.253699136868064, "grad_norm": 0.11601495742797852, "learning_rate": 0.01, "loss": 2.0249, "step": 31665 }, { "epoch": 3.2540073982737363, "grad_norm": 0.037081558257341385, "learning_rate": 0.01, "loss": 2.0104, "step": 31668 }, { "epoch": 3.254315659679408, "grad_norm": 0.03952954337000847, "learning_rate": 0.01, "loss": 2.023, "step": 31671 }, { "epoch": 3.2546239210850803, "grad_norm": 0.09437094628810883, "learning_rate": 0.01, "loss": 2.0528, "step": 31674 }, { "epoch": 3.254932182490752, "grad_norm": 0.04648669436573982, "learning_rate": 0.01, "loss": 1.9822, "step": 31677 }, { "epoch": 3.255240443896424, "grad_norm": 0.06830704212188721, "learning_rate": 0.01, "loss": 2.0029, "step": 31680 }, { "epoch": 3.255548705302096, "grad_norm": 0.039575349539518356, "learning_rate": 0.01, "loss": 2.0177, "step": 31683 }, { "epoch": 3.255856966707768, "grad_norm": 0.046931225806474686, "learning_rate": 0.01, "loss": 2.0081, "step": 31686 }, { "epoch": 3.2561652281134403, "grad_norm": 0.039921220391988754, "learning_rate": 0.01, "loss": 1.9982, "step": 31689 }, { "epoch": 3.256473489519112, "grad_norm": 0.1725001186132431, "learning_rate": 0.01, "loss": 1.9933, "step": 31692 }, { "epoch": 3.2567817509247843, "grad_norm": 0.10149878263473511, "learning_rate": 0.01, "loss": 2.0031, "step": 31695 }, { "epoch": 3.2570900123304565, "grad_norm": 0.0641789436340332, "learning_rate": 0.01, "loss": 2.0298, "step": 31698 }, { "epoch": 3.2573982737361282, "grad_norm": 0.09631607681512833, "learning_rate": 0.01, "loss": 2.0369, "step": 31701 }, { "epoch": 3.2577065351418004, "grad_norm": 0.04941624775528908, "learning_rate": 0.01, "loss": 2.0078, "step": 31704 }, { "epoch": 3.258014796547472, "grad_norm": 0.03704220801591873, "learning_rate": 0.01, "loss": 2.0192, "step": 31707 }, { "epoch": 3.2583230579531444, "grad_norm": 0.05716734007000923, "learning_rate": 0.01, "loss": 2.0117, "step": 31710 }, { "epoch": 3.258631319358816, "grad_norm": 0.04187513142824173, "learning_rate": 0.01, "loss": 2.0298, "step": 31713 }, { "epoch": 3.2589395807644883, "grad_norm": 0.04240023344755173, "learning_rate": 0.01, "loss": 2.0227, "step": 31716 }, { "epoch": 3.2592478421701605, "grad_norm": 0.0434168316423893, "learning_rate": 0.01, "loss": 2.0293, "step": 31719 }, { "epoch": 3.2595561035758323, "grad_norm": 0.10827293992042542, "learning_rate": 0.01, "loss": 2.019, "step": 31722 }, { "epoch": 3.2598643649815044, "grad_norm": 0.04294963926076889, "learning_rate": 0.01, "loss": 2.0289, "step": 31725 }, { "epoch": 3.260172626387176, "grad_norm": 0.11746285110712051, "learning_rate": 0.01, "loss": 2.0168, "step": 31728 }, { "epoch": 3.2604808877928484, "grad_norm": 0.13203038275241852, "learning_rate": 0.01, "loss": 2.0092, "step": 31731 }, { "epoch": 3.26078914919852, "grad_norm": 0.12324203550815582, "learning_rate": 0.01, "loss": 2.0158, "step": 31734 }, { "epoch": 3.2610974106041923, "grad_norm": 0.09455161541700363, "learning_rate": 0.01, "loss": 2.0113, "step": 31737 }, { "epoch": 3.2614056720098645, "grad_norm": 0.0551525354385376, "learning_rate": 0.01, "loss": 2.0042, "step": 31740 }, { "epoch": 3.2617139334155363, "grad_norm": 0.04528261721134186, "learning_rate": 0.01, "loss": 2.0083, "step": 31743 }, { "epoch": 3.2620221948212085, "grad_norm": 0.049497511237859726, "learning_rate": 0.01, "loss": 1.9953, "step": 31746 }, { "epoch": 3.2623304562268802, "grad_norm": 0.04797462373971939, "learning_rate": 0.01, "loss": 1.9974, "step": 31749 }, { "epoch": 3.2626387176325524, "grad_norm": 0.047107502818107605, "learning_rate": 0.01, "loss": 1.99, "step": 31752 }, { "epoch": 3.262946979038224, "grad_norm": 0.03374217078089714, "learning_rate": 0.01, "loss": 1.98, "step": 31755 }, { "epoch": 3.2632552404438964, "grad_norm": 0.05429157614707947, "learning_rate": 0.01, "loss": 2.0089, "step": 31758 }, { "epoch": 3.2635635018495686, "grad_norm": 0.12066765129566193, "learning_rate": 0.01, "loss": 2.0365, "step": 31761 }, { "epoch": 3.2638717632552403, "grad_norm": 0.15858452022075653, "learning_rate": 0.01, "loss": 2.021, "step": 31764 }, { "epoch": 3.2641800246609125, "grad_norm": 0.05770573392510414, "learning_rate": 0.01, "loss": 2.0513, "step": 31767 }, { "epoch": 3.2644882860665847, "grad_norm": 0.053706735372543335, "learning_rate": 0.01, "loss": 1.9939, "step": 31770 }, { "epoch": 3.2647965474722564, "grad_norm": 0.03511708602309227, "learning_rate": 0.01, "loss": 2.0108, "step": 31773 }, { "epoch": 3.2651048088779286, "grad_norm": 0.036902979016304016, "learning_rate": 0.01, "loss": 2.0264, "step": 31776 }, { "epoch": 3.2654130702836004, "grad_norm": 0.0450621172785759, "learning_rate": 0.01, "loss": 2.0203, "step": 31779 }, { "epoch": 3.2657213316892726, "grad_norm": 0.07691927254199982, "learning_rate": 0.01, "loss": 2.0167, "step": 31782 }, { "epoch": 3.2660295930949443, "grad_norm": 0.09160054475069046, "learning_rate": 0.01, "loss": 2.0221, "step": 31785 }, { "epoch": 3.2663378545006165, "grad_norm": 0.09052203595638275, "learning_rate": 0.01, "loss": 2.0185, "step": 31788 }, { "epoch": 3.2666461159062887, "grad_norm": 0.08806884288787842, "learning_rate": 0.01, "loss": 2.0103, "step": 31791 }, { "epoch": 3.2669543773119605, "grad_norm": 0.05619393661618233, "learning_rate": 0.01, "loss": 1.9998, "step": 31794 }, { "epoch": 3.2672626387176327, "grad_norm": 0.05563674122095108, "learning_rate": 0.01, "loss": 2.015, "step": 31797 }, { "epoch": 3.2675709001233044, "grad_norm": 0.058199040591716766, "learning_rate": 0.01, "loss": 2.014, "step": 31800 }, { "epoch": 3.2678791615289766, "grad_norm": 0.06011686101555824, "learning_rate": 0.01, "loss": 2.0103, "step": 31803 }, { "epoch": 3.2681874229346484, "grad_norm": 0.05391063541173935, "learning_rate": 0.01, "loss": 2.0047, "step": 31806 }, { "epoch": 3.2684956843403206, "grad_norm": 0.1030382513999939, "learning_rate": 0.01, "loss": 2.0132, "step": 31809 }, { "epoch": 3.2688039457459928, "grad_norm": 0.03978987783193588, "learning_rate": 0.01, "loss": 2.0099, "step": 31812 }, { "epoch": 3.2691122071516645, "grad_norm": 0.08974360674619675, "learning_rate": 0.01, "loss": 1.9975, "step": 31815 }, { "epoch": 3.2694204685573367, "grad_norm": 0.07530324161052704, "learning_rate": 0.01, "loss": 2.0224, "step": 31818 }, { "epoch": 3.2697287299630085, "grad_norm": 0.04715275764465332, "learning_rate": 0.01, "loss": 2.0011, "step": 31821 }, { "epoch": 3.2700369913686806, "grad_norm": 0.050625238567590714, "learning_rate": 0.01, "loss": 1.9988, "step": 31824 }, { "epoch": 3.270345252774353, "grad_norm": 0.05567210912704468, "learning_rate": 0.01, "loss": 2.0554, "step": 31827 }, { "epoch": 3.2706535141800246, "grad_norm": 0.09813915193080902, "learning_rate": 0.01, "loss": 1.9994, "step": 31830 }, { "epoch": 3.270961775585697, "grad_norm": 0.08543343842029572, "learning_rate": 0.01, "loss": 2.0405, "step": 31833 }, { "epoch": 3.2712700369913685, "grad_norm": 0.03340763971209526, "learning_rate": 0.01, "loss": 2.0302, "step": 31836 }, { "epoch": 3.2715782983970407, "grad_norm": 0.0335637666285038, "learning_rate": 0.01, "loss": 2.0286, "step": 31839 }, { "epoch": 3.271886559802713, "grad_norm": 0.06983290612697601, "learning_rate": 0.01, "loss": 2.0155, "step": 31842 }, { "epoch": 3.2721948212083847, "grad_norm": 0.1157732903957367, "learning_rate": 0.01, "loss": 2.0133, "step": 31845 }, { "epoch": 3.272503082614057, "grad_norm": 0.11864369362592697, "learning_rate": 0.01, "loss": 2.0367, "step": 31848 }, { "epoch": 3.2728113440197286, "grad_norm": 0.09083148092031479, "learning_rate": 0.01, "loss": 2.025, "step": 31851 }, { "epoch": 3.273119605425401, "grad_norm": 0.06563573330640793, "learning_rate": 0.01, "loss": 2.0175, "step": 31854 }, { "epoch": 3.2734278668310726, "grad_norm": 0.043879635632038116, "learning_rate": 0.01, "loss": 2.0288, "step": 31857 }, { "epoch": 3.2737361282367448, "grad_norm": 0.03548846393823624, "learning_rate": 0.01, "loss": 2.0246, "step": 31860 }, { "epoch": 3.274044389642417, "grad_norm": 0.036084435880184174, "learning_rate": 0.01, "loss": 2.0007, "step": 31863 }, { "epoch": 3.2743526510480887, "grad_norm": 0.03813619166612625, "learning_rate": 0.01, "loss": 2.0024, "step": 31866 }, { "epoch": 3.274660912453761, "grad_norm": 0.05276734009385109, "learning_rate": 0.01, "loss": 1.9945, "step": 31869 }, { "epoch": 3.2749691738594326, "grad_norm": 0.07557803392410278, "learning_rate": 0.01, "loss": 2.0038, "step": 31872 }, { "epoch": 3.275277435265105, "grad_norm": 0.060760460793972015, "learning_rate": 0.01, "loss": 2.0283, "step": 31875 }, { "epoch": 3.2755856966707766, "grad_norm": 0.11498884111642838, "learning_rate": 0.01, "loss": 2.0256, "step": 31878 }, { "epoch": 3.275893958076449, "grad_norm": 0.12193593382835388, "learning_rate": 0.01, "loss": 1.9791, "step": 31881 }, { "epoch": 3.276202219482121, "grad_norm": 0.10637890547513962, "learning_rate": 0.01, "loss": 2.0055, "step": 31884 }, { "epoch": 3.2765104808877927, "grad_norm": 0.08633279800415039, "learning_rate": 0.01, "loss": 2.0151, "step": 31887 }, { "epoch": 3.276818742293465, "grad_norm": 0.08105628192424774, "learning_rate": 0.01, "loss": 2.0124, "step": 31890 }, { "epoch": 3.2771270036991367, "grad_norm": 0.03692932799458504, "learning_rate": 0.01, "loss": 2.0134, "step": 31893 }, { "epoch": 3.277435265104809, "grad_norm": 0.03064770996570587, "learning_rate": 0.01, "loss": 2.0088, "step": 31896 }, { "epoch": 3.277743526510481, "grad_norm": 0.038218267261981964, "learning_rate": 0.01, "loss": 2.0045, "step": 31899 }, { "epoch": 3.278051787916153, "grad_norm": 0.05573924258351326, "learning_rate": 0.01, "loss": 2.0289, "step": 31902 }, { "epoch": 3.278360049321825, "grad_norm": 0.14293062686920166, "learning_rate": 0.01, "loss": 2.0147, "step": 31905 }, { "epoch": 3.2786683107274968, "grad_norm": 0.082049660384655, "learning_rate": 0.01, "loss": 2.0159, "step": 31908 }, { "epoch": 3.278976572133169, "grad_norm": 0.08122528344392776, "learning_rate": 0.01, "loss": 2.0043, "step": 31911 }, { "epoch": 3.279284833538841, "grad_norm": 0.054671067744493484, "learning_rate": 0.01, "loss": 2.0054, "step": 31914 }, { "epoch": 3.279593094944513, "grad_norm": 0.11112997680902481, "learning_rate": 0.01, "loss": 1.9826, "step": 31917 }, { "epoch": 3.279901356350185, "grad_norm": 0.12811101973056793, "learning_rate": 0.01, "loss": 2.0242, "step": 31920 }, { "epoch": 3.280209617755857, "grad_norm": 0.07637016475200653, "learning_rate": 0.01, "loss": 1.9879, "step": 31923 }, { "epoch": 3.280517879161529, "grad_norm": 0.03716239705681801, "learning_rate": 0.01, "loss": 2.0055, "step": 31926 }, { "epoch": 3.280826140567201, "grad_norm": 0.03950963541865349, "learning_rate": 0.01, "loss": 1.9905, "step": 31929 }, { "epoch": 3.281134401972873, "grad_norm": 0.09012940526008606, "learning_rate": 0.01, "loss": 2.0422, "step": 31932 }, { "epoch": 3.281442663378545, "grad_norm": 0.04000500217080116, "learning_rate": 0.01, "loss": 2.0054, "step": 31935 }, { "epoch": 3.281750924784217, "grad_norm": 0.06817129254341125, "learning_rate": 0.01, "loss": 2.0131, "step": 31938 }, { "epoch": 3.282059186189889, "grad_norm": 0.14285409450531006, "learning_rate": 0.01, "loss": 2.0235, "step": 31941 }, { "epoch": 3.282367447595561, "grad_norm": 0.03885696083307266, "learning_rate": 0.01, "loss": 2.007, "step": 31944 }, { "epoch": 3.282675709001233, "grad_norm": 0.047379713505506516, "learning_rate": 0.01, "loss": 1.9929, "step": 31947 }, { "epoch": 3.282983970406905, "grad_norm": 0.05476104095578194, "learning_rate": 0.01, "loss": 2.0199, "step": 31950 }, { "epoch": 3.283292231812577, "grad_norm": 0.03158089146018028, "learning_rate": 0.01, "loss": 2.0053, "step": 31953 }, { "epoch": 3.283600493218249, "grad_norm": 0.04537857696413994, "learning_rate": 0.01, "loss": 1.991, "step": 31956 }, { "epoch": 3.283908754623921, "grad_norm": 0.1525343656539917, "learning_rate": 0.01, "loss": 1.9972, "step": 31959 }, { "epoch": 3.284217016029593, "grad_norm": 0.11916167289018631, "learning_rate": 0.01, "loss": 2.0089, "step": 31962 }, { "epoch": 3.2845252774352653, "grad_norm": 0.09215250611305237, "learning_rate": 0.01, "loss": 1.9746, "step": 31965 }, { "epoch": 3.284833538840937, "grad_norm": 0.053620483726263046, "learning_rate": 0.01, "loss": 2.0148, "step": 31968 }, { "epoch": 3.2851418002466093, "grad_norm": 0.03575912117958069, "learning_rate": 0.01, "loss": 2.0237, "step": 31971 }, { "epoch": 3.285450061652281, "grad_norm": 0.09148744493722916, "learning_rate": 0.01, "loss": 2.0179, "step": 31974 }, { "epoch": 3.2857583230579532, "grad_norm": 0.06433127075433731, "learning_rate": 0.01, "loss": 2.0172, "step": 31977 }, { "epoch": 3.286066584463625, "grad_norm": 0.04605916887521744, "learning_rate": 0.01, "loss": 1.9956, "step": 31980 }, { "epoch": 3.286374845869297, "grad_norm": 0.07150716334581375, "learning_rate": 0.01, "loss": 2.0251, "step": 31983 }, { "epoch": 3.2866831072749694, "grad_norm": 0.04914524033665657, "learning_rate": 0.01, "loss": 2.0002, "step": 31986 }, { "epoch": 3.286991368680641, "grad_norm": 0.10281821340322495, "learning_rate": 0.01, "loss": 2.0137, "step": 31989 }, { "epoch": 3.2872996300863133, "grad_norm": 0.04685597866773605, "learning_rate": 0.01, "loss": 2.0283, "step": 31992 }, { "epoch": 3.287607891491985, "grad_norm": 0.04735150560736656, "learning_rate": 0.01, "loss": 2.005, "step": 31995 }, { "epoch": 3.2879161528976573, "grad_norm": 0.07897822558879852, "learning_rate": 0.01, "loss": 2.0123, "step": 31998 }, { "epoch": 3.288224414303329, "grad_norm": 0.08091110736131668, "learning_rate": 0.01, "loss": 2.0397, "step": 32001 }, { "epoch": 3.288532675709001, "grad_norm": 0.038828156888484955, "learning_rate": 0.01, "loss": 2.0245, "step": 32004 }, { "epoch": 3.2888409371146734, "grad_norm": 0.11410044133663177, "learning_rate": 0.01, "loss": 2.0081, "step": 32007 }, { "epoch": 3.289149198520345, "grad_norm": 0.11741339415311813, "learning_rate": 0.01, "loss": 2.0055, "step": 32010 }, { "epoch": 3.2894574599260173, "grad_norm": 0.054089032113552094, "learning_rate": 0.01, "loss": 2.0179, "step": 32013 }, { "epoch": 3.289765721331689, "grad_norm": 0.034770797938108444, "learning_rate": 0.01, "loss": 2.0148, "step": 32016 }, { "epoch": 3.2900739827373613, "grad_norm": 0.06313812732696533, "learning_rate": 0.01, "loss": 2.0294, "step": 32019 }, { "epoch": 3.2903822441430335, "grad_norm": 0.05844837799668312, "learning_rate": 0.01, "loss": 2.0227, "step": 32022 }, { "epoch": 3.2906905055487052, "grad_norm": 0.0374666191637516, "learning_rate": 0.01, "loss": 2.0342, "step": 32025 }, { "epoch": 3.2909987669543774, "grad_norm": 0.05456427484750748, "learning_rate": 0.01, "loss": 1.9794, "step": 32028 }, { "epoch": 3.291307028360049, "grad_norm": 0.11318197846412659, "learning_rate": 0.01, "loss": 1.9995, "step": 32031 }, { "epoch": 3.2916152897657214, "grad_norm": 0.04832073673605919, "learning_rate": 0.01, "loss": 2.0108, "step": 32034 }, { "epoch": 3.2919235511713936, "grad_norm": 0.07790713757276535, "learning_rate": 0.01, "loss": 1.9893, "step": 32037 }, { "epoch": 3.2922318125770653, "grad_norm": 0.05794338509440422, "learning_rate": 0.01, "loss": 2.0029, "step": 32040 }, { "epoch": 3.2925400739827375, "grad_norm": 0.06488461047410965, "learning_rate": 0.01, "loss": 1.9965, "step": 32043 }, { "epoch": 3.2928483353884093, "grad_norm": 0.10804028809070587, "learning_rate": 0.01, "loss": 1.9938, "step": 32046 }, { "epoch": 3.2931565967940815, "grad_norm": 0.04927225038409233, "learning_rate": 0.01, "loss": 2.0111, "step": 32049 }, { "epoch": 3.293464858199753, "grad_norm": 0.039539139717817307, "learning_rate": 0.01, "loss": 2.0129, "step": 32052 }, { "epoch": 3.2937731196054254, "grad_norm": 0.046698734164237976, "learning_rate": 0.01, "loss": 2.0149, "step": 32055 }, { "epoch": 3.2940813810110976, "grad_norm": 0.07978003472089767, "learning_rate": 0.01, "loss": 2.0001, "step": 32058 }, { "epoch": 3.2943896424167693, "grad_norm": 0.05909251049160957, "learning_rate": 0.01, "loss": 2.018, "step": 32061 }, { "epoch": 3.2946979038224415, "grad_norm": 0.09814689308404922, "learning_rate": 0.01, "loss": 1.9935, "step": 32064 }, { "epoch": 3.2950061652281133, "grad_norm": 0.07647134363651276, "learning_rate": 0.01, "loss": 2.0316, "step": 32067 }, { "epoch": 3.2953144266337855, "grad_norm": 0.09203072637319565, "learning_rate": 0.01, "loss": 2.0075, "step": 32070 }, { "epoch": 3.2956226880394572, "grad_norm": 0.14764010906219482, "learning_rate": 0.01, "loss": 2.0052, "step": 32073 }, { "epoch": 3.2959309494451294, "grad_norm": 0.07483326643705368, "learning_rate": 0.01, "loss": 2.0065, "step": 32076 }, { "epoch": 3.2962392108508016, "grad_norm": 0.03683464601635933, "learning_rate": 0.01, "loss": 2.0092, "step": 32079 }, { "epoch": 3.2965474722564734, "grad_norm": 0.0658450499176979, "learning_rate": 0.01, "loss": 2.0134, "step": 32082 }, { "epoch": 3.2968557336621456, "grad_norm": 0.05505736172199249, "learning_rate": 0.01, "loss": 1.9978, "step": 32085 }, { "epoch": 3.2971639950678173, "grad_norm": 0.07295443117618561, "learning_rate": 0.01, "loss": 2.0444, "step": 32088 }, { "epoch": 3.2974722564734895, "grad_norm": 0.03865521401166916, "learning_rate": 0.01, "loss": 2.0084, "step": 32091 }, { "epoch": 3.2977805178791617, "grad_norm": 0.12185568362474442, "learning_rate": 0.01, "loss": 2.0011, "step": 32094 }, { "epoch": 3.2980887792848335, "grad_norm": 0.04646170511841774, "learning_rate": 0.01, "loss": 1.9925, "step": 32097 }, { "epoch": 3.2983970406905057, "grad_norm": 0.10177022218704224, "learning_rate": 0.01, "loss": 2.0001, "step": 32100 }, { "epoch": 3.2987053020961774, "grad_norm": 0.04585393890738487, "learning_rate": 0.01, "loss": 2.0032, "step": 32103 }, { "epoch": 3.2990135635018496, "grad_norm": 0.1089714989066124, "learning_rate": 0.01, "loss": 1.9943, "step": 32106 }, { "epoch": 3.299321824907522, "grad_norm": 0.04579438269138336, "learning_rate": 0.01, "loss": 2.0241, "step": 32109 }, { "epoch": 3.2996300863131935, "grad_norm": 0.0699036568403244, "learning_rate": 0.01, "loss": 2.025, "step": 32112 }, { "epoch": 3.2999383477188657, "grad_norm": 0.10070015490055084, "learning_rate": 0.01, "loss": 2.0037, "step": 32115 }, { "epoch": 3.3002466091245375, "grad_norm": 0.08273176848888397, "learning_rate": 0.01, "loss": 2.0368, "step": 32118 }, { "epoch": 3.3005548705302097, "grad_norm": 0.08648907393217087, "learning_rate": 0.01, "loss": 2.0069, "step": 32121 }, { "epoch": 3.3008631319358814, "grad_norm": 0.059946708381175995, "learning_rate": 0.01, "loss": 2.0023, "step": 32124 }, { "epoch": 3.3011713933415536, "grad_norm": 0.034971099346876144, "learning_rate": 0.01, "loss": 1.9999, "step": 32127 }, { "epoch": 3.301479654747226, "grad_norm": 0.12256456911563873, "learning_rate": 0.01, "loss": 1.9763, "step": 32130 }, { "epoch": 3.3017879161528976, "grad_norm": 0.041138794273138046, "learning_rate": 0.01, "loss": 2.0417, "step": 32133 }, { "epoch": 3.3020961775585698, "grad_norm": 0.04145865887403488, "learning_rate": 0.01, "loss": 1.9986, "step": 32136 }, { "epoch": 3.3024044389642415, "grad_norm": 0.07052136212587357, "learning_rate": 0.01, "loss": 1.9986, "step": 32139 }, { "epoch": 3.3027127003699137, "grad_norm": 0.05553466081619263, "learning_rate": 0.01, "loss": 1.9964, "step": 32142 }, { "epoch": 3.3030209617755855, "grad_norm": 0.038209814578294754, "learning_rate": 0.01, "loss": 2.0056, "step": 32145 }, { "epoch": 3.3033292231812577, "grad_norm": 0.04882222041487694, "learning_rate": 0.01, "loss": 2.019, "step": 32148 }, { "epoch": 3.30363748458693, "grad_norm": 0.11157884448766708, "learning_rate": 0.01, "loss": 2.0012, "step": 32151 }, { "epoch": 3.3039457459926016, "grad_norm": 0.09312507510185242, "learning_rate": 0.01, "loss": 2.0221, "step": 32154 }, { "epoch": 3.304254007398274, "grad_norm": 0.05357594043016434, "learning_rate": 0.01, "loss": 1.9986, "step": 32157 }, { "epoch": 3.304562268803946, "grad_norm": 0.07024755328893661, "learning_rate": 0.01, "loss": 2.0032, "step": 32160 }, { "epoch": 3.3048705302096177, "grad_norm": 0.0773010402917862, "learning_rate": 0.01, "loss": 2.0273, "step": 32163 }, { "epoch": 3.30517879161529, "grad_norm": 0.04807543754577637, "learning_rate": 0.01, "loss": 2.0101, "step": 32166 }, { "epoch": 3.3054870530209617, "grad_norm": 0.03652816265821457, "learning_rate": 0.01, "loss": 1.9997, "step": 32169 }, { "epoch": 3.305795314426634, "grad_norm": 0.06250651925802231, "learning_rate": 0.01, "loss": 2.0255, "step": 32172 }, { "epoch": 3.3061035758323056, "grad_norm": 0.08029799908399582, "learning_rate": 0.01, "loss": 2.0066, "step": 32175 }, { "epoch": 3.306411837237978, "grad_norm": 0.10732737928628922, "learning_rate": 0.01, "loss": 1.9864, "step": 32178 }, { "epoch": 3.30672009864365, "grad_norm": 0.07458434998989105, "learning_rate": 0.01, "loss": 2.0043, "step": 32181 }, { "epoch": 3.3070283600493218, "grad_norm": 0.0794452428817749, "learning_rate": 0.01, "loss": 2.0239, "step": 32184 }, { "epoch": 3.307336621454994, "grad_norm": 0.06960796564817429, "learning_rate": 0.01, "loss": 2.0533, "step": 32187 }, { "epoch": 3.3076448828606657, "grad_norm": 0.09482841193675995, "learning_rate": 0.01, "loss": 1.9819, "step": 32190 }, { "epoch": 3.307953144266338, "grad_norm": 0.08358022570610046, "learning_rate": 0.01, "loss": 1.9936, "step": 32193 }, { "epoch": 3.3082614056720097, "grad_norm": 0.03823438659310341, "learning_rate": 0.01, "loss": 1.9934, "step": 32196 }, { "epoch": 3.308569667077682, "grad_norm": 0.07413027435541153, "learning_rate": 0.01, "loss": 2.0097, "step": 32199 }, { "epoch": 3.308877928483354, "grad_norm": 0.03165535256266594, "learning_rate": 0.01, "loss": 2.0157, "step": 32202 }, { "epoch": 3.309186189889026, "grad_norm": 0.1173541322350502, "learning_rate": 0.01, "loss": 2.0229, "step": 32205 }, { "epoch": 3.309494451294698, "grad_norm": 0.07542740553617477, "learning_rate": 0.01, "loss": 1.9974, "step": 32208 }, { "epoch": 3.3098027127003697, "grad_norm": 0.104609914124012, "learning_rate": 0.01, "loss": 2.0312, "step": 32211 }, { "epoch": 3.310110974106042, "grad_norm": 0.0530179999768734, "learning_rate": 0.01, "loss": 1.9999, "step": 32214 }, { "epoch": 3.3104192355117137, "grad_norm": 0.070836141705513, "learning_rate": 0.01, "loss": 1.992, "step": 32217 }, { "epoch": 3.310727496917386, "grad_norm": 0.04056829586625099, "learning_rate": 0.01, "loss": 2.0202, "step": 32220 }, { "epoch": 3.311035758323058, "grad_norm": 0.10000187903642654, "learning_rate": 0.01, "loss": 2.0125, "step": 32223 }, { "epoch": 3.31134401972873, "grad_norm": 0.04653482139110565, "learning_rate": 0.01, "loss": 1.9988, "step": 32226 }, { "epoch": 3.311652281134402, "grad_norm": 0.10693076252937317, "learning_rate": 0.01, "loss": 1.9757, "step": 32229 }, { "epoch": 3.311960542540074, "grad_norm": 0.055378034710884094, "learning_rate": 0.01, "loss": 2.021, "step": 32232 }, { "epoch": 3.312268803945746, "grad_norm": 0.037242140620946884, "learning_rate": 0.01, "loss": 2.0367, "step": 32235 }, { "epoch": 3.312577065351418, "grad_norm": 0.04497090354561806, "learning_rate": 0.01, "loss": 1.997, "step": 32238 }, { "epoch": 3.31288532675709, "grad_norm": 0.048384904861450195, "learning_rate": 0.01, "loss": 1.9945, "step": 32241 }, { "epoch": 3.313193588162762, "grad_norm": 0.05588208884000778, "learning_rate": 0.01, "loss": 2.0375, "step": 32244 }, { "epoch": 3.313501849568434, "grad_norm": 0.05643755942583084, "learning_rate": 0.01, "loss": 2.0384, "step": 32247 }, { "epoch": 3.313810110974106, "grad_norm": 0.03415621444582939, "learning_rate": 0.01, "loss": 2.0217, "step": 32250 }, { "epoch": 3.3141183723797782, "grad_norm": 0.03537356108427048, "learning_rate": 0.01, "loss": 2.0046, "step": 32253 }, { "epoch": 3.31442663378545, "grad_norm": 0.1299961805343628, "learning_rate": 0.01, "loss": 1.9806, "step": 32256 }, { "epoch": 3.314734895191122, "grad_norm": 0.05058746412396431, "learning_rate": 0.01, "loss": 2.0076, "step": 32259 }, { "epoch": 3.315043156596794, "grad_norm": 0.06381676346063614, "learning_rate": 0.01, "loss": 2.0156, "step": 32262 }, { "epoch": 3.315351418002466, "grad_norm": 0.039552103728055954, "learning_rate": 0.01, "loss": 1.9853, "step": 32265 }, { "epoch": 3.315659679408138, "grad_norm": 0.03872091323137283, "learning_rate": 0.01, "loss": 2.0174, "step": 32268 }, { "epoch": 3.31596794081381, "grad_norm": 0.04546678811311722, "learning_rate": 0.01, "loss": 2.0176, "step": 32271 }, { "epoch": 3.3162762022194823, "grad_norm": 0.04541923850774765, "learning_rate": 0.01, "loss": 2.023, "step": 32274 }, { "epoch": 3.316584463625154, "grad_norm": 0.04341261461377144, "learning_rate": 0.01, "loss": 2.0179, "step": 32277 }, { "epoch": 3.316892725030826, "grad_norm": 0.10473504662513733, "learning_rate": 0.01, "loss": 2.0263, "step": 32280 }, { "epoch": 3.317200986436498, "grad_norm": 0.0668938085436821, "learning_rate": 0.01, "loss": 2.0417, "step": 32283 }, { "epoch": 3.31750924784217, "grad_norm": 0.12829263508319855, "learning_rate": 0.01, "loss": 2.022, "step": 32286 }, { "epoch": 3.3178175092478424, "grad_norm": 0.05443095043301582, "learning_rate": 0.01, "loss": 2.0101, "step": 32289 }, { "epoch": 3.318125770653514, "grad_norm": 0.06268401443958282, "learning_rate": 0.01, "loss": 2.0178, "step": 32292 }, { "epoch": 3.3184340320591863, "grad_norm": 0.05700231343507767, "learning_rate": 0.01, "loss": 2.0203, "step": 32295 }, { "epoch": 3.318742293464858, "grad_norm": 0.07467647641897202, "learning_rate": 0.01, "loss": 2.0031, "step": 32298 }, { "epoch": 3.3190505548705302, "grad_norm": 0.0675196647644043, "learning_rate": 0.01, "loss": 2.013, "step": 32301 }, { "epoch": 3.3193588162762024, "grad_norm": 0.044399552047252655, "learning_rate": 0.01, "loss": 1.993, "step": 32304 }, { "epoch": 3.319667077681874, "grad_norm": 0.0451500304043293, "learning_rate": 0.01, "loss": 2.0158, "step": 32307 }, { "epoch": 3.3199753390875464, "grad_norm": 0.041543807834386826, "learning_rate": 0.01, "loss": 2.0157, "step": 32310 }, { "epoch": 3.320283600493218, "grad_norm": 0.08818163722753525, "learning_rate": 0.01, "loss": 1.9928, "step": 32313 }, { "epoch": 3.3205918618988903, "grad_norm": 0.05897468701004982, "learning_rate": 0.01, "loss": 1.999, "step": 32316 }, { "epoch": 3.320900123304562, "grad_norm": 0.10485360026359558, "learning_rate": 0.01, "loss": 2.0178, "step": 32319 }, { "epoch": 3.3212083847102343, "grad_norm": 0.048387110233306885, "learning_rate": 0.01, "loss": 2.021, "step": 32322 }, { "epoch": 3.3215166461159065, "grad_norm": 0.07535526156425476, "learning_rate": 0.01, "loss": 2.0228, "step": 32325 }, { "epoch": 3.321824907521578, "grad_norm": 0.04594804719090462, "learning_rate": 0.01, "loss": 2.0271, "step": 32328 }, { "epoch": 3.3221331689272504, "grad_norm": 0.03757678344845772, "learning_rate": 0.01, "loss": 2.0177, "step": 32331 }, { "epoch": 3.322441430332922, "grad_norm": 0.046382974833250046, "learning_rate": 0.01, "loss": 2.021, "step": 32334 }, { "epoch": 3.3227496917385944, "grad_norm": 0.03983695060014725, "learning_rate": 0.01, "loss": 1.9877, "step": 32337 }, { "epoch": 3.323057953144266, "grad_norm": 0.0615588016808033, "learning_rate": 0.01, "loss": 2.0374, "step": 32340 }, { "epoch": 3.3233662145499383, "grad_norm": 0.10143701732158661, "learning_rate": 0.01, "loss": 2.0262, "step": 32343 }, { "epoch": 3.3236744759556105, "grad_norm": 0.06885727494955063, "learning_rate": 0.01, "loss": 2.0055, "step": 32346 }, { "epoch": 3.3239827373612822, "grad_norm": 0.10029948502779007, "learning_rate": 0.01, "loss": 2.0224, "step": 32349 }, { "epoch": 3.3242909987669544, "grad_norm": 0.08583710342645645, "learning_rate": 0.01, "loss": 1.988, "step": 32352 }, { "epoch": 3.3245992601726266, "grad_norm": 0.08596043288707733, "learning_rate": 0.01, "loss": 2.0065, "step": 32355 }, { "epoch": 3.3249075215782984, "grad_norm": 0.0431043840944767, "learning_rate": 0.01, "loss": 2.0107, "step": 32358 }, { "epoch": 3.3252157829839706, "grad_norm": 0.044695861637592316, "learning_rate": 0.01, "loss": 1.9898, "step": 32361 }, { "epoch": 3.3255240443896423, "grad_norm": 0.03210937976837158, "learning_rate": 0.01, "loss": 2.0058, "step": 32364 }, { "epoch": 3.3258323057953145, "grad_norm": 0.03838266804814339, "learning_rate": 0.01, "loss": 2.0145, "step": 32367 }, { "epoch": 3.3261405672009863, "grad_norm": 0.05611315369606018, "learning_rate": 0.01, "loss": 2.0045, "step": 32370 }, { "epoch": 3.3264488286066585, "grad_norm": 0.10025975853204727, "learning_rate": 0.01, "loss": 2.0047, "step": 32373 }, { "epoch": 3.3267570900123307, "grad_norm": 0.10004039853811264, "learning_rate": 0.01, "loss": 2.0171, "step": 32376 }, { "epoch": 3.3270653514180024, "grad_norm": 0.0655856728553772, "learning_rate": 0.01, "loss": 2.0079, "step": 32379 }, { "epoch": 3.3273736128236746, "grad_norm": 0.09608997404575348, "learning_rate": 0.01, "loss": 2.0069, "step": 32382 }, { "epoch": 3.3276818742293464, "grad_norm": 0.14821460843086243, "learning_rate": 0.01, "loss": 2.0107, "step": 32385 }, { "epoch": 3.3279901356350186, "grad_norm": 0.05276164412498474, "learning_rate": 0.01, "loss": 2.0369, "step": 32388 }, { "epoch": 3.3282983970406903, "grad_norm": 0.049289003014564514, "learning_rate": 0.01, "loss": 1.9977, "step": 32391 }, { "epoch": 3.3286066584463625, "grad_norm": 0.05115703493356705, "learning_rate": 0.01, "loss": 2.004, "step": 32394 }, { "epoch": 3.3289149198520347, "grad_norm": 0.04041285440325737, "learning_rate": 0.01, "loss": 2.0262, "step": 32397 }, { "epoch": 3.3292231812577064, "grad_norm": 0.04861520603299141, "learning_rate": 0.01, "loss": 1.9955, "step": 32400 }, { "epoch": 3.3295314426633786, "grad_norm": 0.1111973226070404, "learning_rate": 0.01, "loss": 2.0101, "step": 32403 }, { "epoch": 3.3298397040690504, "grad_norm": 0.061960045248270035, "learning_rate": 0.01, "loss": 1.9887, "step": 32406 }, { "epoch": 3.3301479654747226, "grad_norm": 0.1108783558011055, "learning_rate": 0.01, "loss": 2.0207, "step": 32409 }, { "epoch": 3.3304562268803943, "grad_norm": 0.06897444278001785, "learning_rate": 0.01, "loss": 2.0321, "step": 32412 }, { "epoch": 3.3307644882860665, "grad_norm": 0.046277862042188644, "learning_rate": 0.01, "loss": 2.0018, "step": 32415 }, { "epoch": 3.3310727496917387, "grad_norm": 0.09898782521486282, "learning_rate": 0.01, "loss": 2.0146, "step": 32418 }, { "epoch": 3.3313810110974105, "grad_norm": 0.059529174119234085, "learning_rate": 0.01, "loss": 1.9783, "step": 32421 }, { "epoch": 3.3316892725030827, "grad_norm": 0.08007462322711945, "learning_rate": 0.01, "loss": 2.0324, "step": 32424 }, { "epoch": 3.331997533908755, "grad_norm": 0.07130561769008636, "learning_rate": 0.01, "loss": 2.0061, "step": 32427 }, { "epoch": 3.3323057953144266, "grad_norm": 0.04967787116765976, "learning_rate": 0.01, "loss": 2.0259, "step": 32430 }, { "epoch": 3.332614056720099, "grad_norm": 0.08194708079099655, "learning_rate": 0.01, "loss": 2.0137, "step": 32433 }, { "epoch": 3.3329223181257706, "grad_norm": 0.056519269943237305, "learning_rate": 0.01, "loss": 1.9917, "step": 32436 }, { "epoch": 3.3332305795314427, "grad_norm": 0.08086001873016357, "learning_rate": 0.01, "loss": 2.021, "step": 32439 }, { "epoch": 3.3335388409371145, "grad_norm": 0.04036881402134895, "learning_rate": 0.01, "loss": 2.0205, "step": 32442 }, { "epoch": 3.3338471023427867, "grad_norm": 0.04373360425233841, "learning_rate": 0.01, "loss": 2.0214, "step": 32445 }, { "epoch": 3.334155363748459, "grad_norm": 0.04562424495816231, "learning_rate": 0.01, "loss": 2.0093, "step": 32448 }, { "epoch": 3.3344636251541306, "grad_norm": 0.2069234549999237, "learning_rate": 0.01, "loss": 1.9904, "step": 32451 }, { "epoch": 3.334771886559803, "grad_norm": 0.08092590421438217, "learning_rate": 0.01, "loss": 2.0097, "step": 32454 }, { "epoch": 3.3350801479654746, "grad_norm": 0.059557970613241196, "learning_rate": 0.01, "loss": 2.0523, "step": 32457 }, { "epoch": 3.335388409371147, "grad_norm": 0.039045874029397964, "learning_rate": 0.01, "loss": 1.9891, "step": 32460 }, { "epoch": 3.3356966707768185, "grad_norm": 0.03718112036585808, "learning_rate": 0.01, "loss": 2.0099, "step": 32463 }, { "epoch": 3.3360049321824907, "grad_norm": 0.05162828043103218, "learning_rate": 0.01, "loss": 2.0151, "step": 32466 }, { "epoch": 3.336313193588163, "grad_norm": 0.05825696140527725, "learning_rate": 0.01, "loss": 2.0252, "step": 32469 }, { "epoch": 3.3366214549938347, "grad_norm": 0.0458202064037323, "learning_rate": 0.01, "loss": 2.0273, "step": 32472 }, { "epoch": 3.336929716399507, "grad_norm": 0.07931084930896759, "learning_rate": 0.01, "loss": 2.0026, "step": 32475 }, { "epoch": 3.3372379778051786, "grad_norm": 0.05673946067690849, "learning_rate": 0.01, "loss": 1.998, "step": 32478 }, { "epoch": 3.337546239210851, "grad_norm": 0.05536726489663124, "learning_rate": 0.01, "loss": 1.9994, "step": 32481 }, { "epoch": 3.337854500616523, "grad_norm": 0.053581688553094864, "learning_rate": 0.01, "loss": 1.9913, "step": 32484 }, { "epoch": 3.3381627620221948, "grad_norm": 0.057449061423540115, "learning_rate": 0.01, "loss": 1.9973, "step": 32487 }, { "epoch": 3.338471023427867, "grad_norm": 0.0768120214343071, "learning_rate": 0.01, "loss": 2.0294, "step": 32490 }, { "epoch": 3.3387792848335387, "grad_norm": 0.1322673261165619, "learning_rate": 0.01, "loss": 1.9827, "step": 32493 }, { "epoch": 3.339087546239211, "grad_norm": 0.10641443729400635, "learning_rate": 0.01, "loss": 1.9858, "step": 32496 }, { "epoch": 3.339395807644883, "grad_norm": 0.13880647718906403, "learning_rate": 0.01, "loss": 1.979, "step": 32499 }, { "epoch": 3.339704069050555, "grad_norm": 0.04653307795524597, "learning_rate": 0.01, "loss": 2.0206, "step": 32502 }, { "epoch": 3.340012330456227, "grad_norm": 0.05031618848443031, "learning_rate": 0.01, "loss": 2.0221, "step": 32505 }, { "epoch": 3.340320591861899, "grad_norm": 0.042637672275304794, "learning_rate": 0.01, "loss": 1.9986, "step": 32508 }, { "epoch": 3.340628853267571, "grad_norm": 0.03557129204273224, "learning_rate": 0.01, "loss": 2.0186, "step": 32511 }, { "epoch": 3.3409371146732427, "grad_norm": 0.04271169379353523, "learning_rate": 0.01, "loss": 1.9907, "step": 32514 }, { "epoch": 3.341245376078915, "grad_norm": 0.06750103831291199, "learning_rate": 0.01, "loss": 2.0013, "step": 32517 }, { "epoch": 3.341553637484587, "grad_norm": 0.05727256461977959, "learning_rate": 0.01, "loss": 2.0192, "step": 32520 }, { "epoch": 3.341861898890259, "grad_norm": 0.05737854540348053, "learning_rate": 0.01, "loss": 2.0194, "step": 32523 }, { "epoch": 3.342170160295931, "grad_norm": 0.04525689780712128, "learning_rate": 0.01, "loss": 1.9733, "step": 32526 }, { "epoch": 3.342478421701603, "grad_norm": 0.038834329694509506, "learning_rate": 0.01, "loss": 2.0287, "step": 32529 }, { "epoch": 3.342786683107275, "grad_norm": 0.14812909066677094, "learning_rate": 0.01, "loss": 1.9926, "step": 32532 }, { "epoch": 3.3430949445129468, "grad_norm": 0.09346423298120499, "learning_rate": 0.01, "loss": 1.9866, "step": 32535 }, { "epoch": 3.343403205918619, "grad_norm": 0.03221321851015091, "learning_rate": 0.01, "loss": 2.0114, "step": 32538 }, { "epoch": 3.343711467324291, "grad_norm": 0.05457564815878868, "learning_rate": 0.01, "loss": 2.0044, "step": 32541 }, { "epoch": 3.344019728729963, "grad_norm": 0.05203322321176529, "learning_rate": 0.01, "loss": 1.9944, "step": 32544 }, { "epoch": 3.344327990135635, "grad_norm": 0.05318872258067131, "learning_rate": 0.01, "loss": 2.0199, "step": 32547 }, { "epoch": 3.344636251541307, "grad_norm": 0.034635335206985474, "learning_rate": 0.01, "loss": 1.9841, "step": 32550 }, { "epoch": 3.344944512946979, "grad_norm": 0.0421120747923851, "learning_rate": 0.01, "loss": 1.9923, "step": 32553 }, { "epoch": 3.3452527743526512, "grad_norm": 0.08523180335760117, "learning_rate": 0.01, "loss": 2.0162, "step": 32556 }, { "epoch": 3.345561035758323, "grad_norm": 0.11694061011075974, "learning_rate": 0.01, "loss": 1.9921, "step": 32559 }, { "epoch": 3.345869297163995, "grad_norm": 0.05000199005007744, "learning_rate": 0.01, "loss": 2.0159, "step": 32562 }, { "epoch": 3.346177558569667, "grad_norm": 0.0399484746158123, "learning_rate": 0.01, "loss": 2.0316, "step": 32565 }, { "epoch": 3.346485819975339, "grad_norm": 0.0491316057741642, "learning_rate": 0.01, "loss": 2.0184, "step": 32568 }, { "epoch": 3.3467940813810113, "grad_norm": 0.042924270033836365, "learning_rate": 0.01, "loss": 1.9983, "step": 32571 }, { "epoch": 3.347102342786683, "grad_norm": 0.03486446663737297, "learning_rate": 0.01, "loss": 2.023, "step": 32574 }, { "epoch": 3.3474106041923553, "grad_norm": 0.031064294278621674, "learning_rate": 0.01, "loss": 2.0249, "step": 32577 }, { "epoch": 3.347718865598027, "grad_norm": 0.05951589718461037, "learning_rate": 0.01, "loss": 1.9989, "step": 32580 }, { "epoch": 3.348027127003699, "grad_norm": 0.04387381300330162, "learning_rate": 0.01, "loss": 2.0158, "step": 32583 }, { "epoch": 3.348335388409371, "grad_norm": 0.05328337103128433, "learning_rate": 0.01, "loss": 1.9941, "step": 32586 }, { "epoch": 3.348643649815043, "grad_norm": 0.04561325162649155, "learning_rate": 0.01, "loss": 2.0039, "step": 32589 }, { "epoch": 3.3489519112207153, "grad_norm": 0.047260623425245285, "learning_rate": 0.01, "loss": 2.0387, "step": 32592 }, { "epoch": 3.349260172626387, "grad_norm": 0.21082252264022827, "learning_rate": 0.01, "loss": 2.024, "step": 32595 }, { "epoch": 3.3495684340320593, "grad_norm": 0.08391027897596359, "learning_rate": 0.01, "loss": 1.9919, "step": 32598 }, { "epoch": 3.349876695437731, "grad_norm": 0.07036472856998444, "learning_rate": 0.01, "loss": 2.0218, "step": 32601 }, { "epoch": 3.3501849568434032, "grad_norm": 0.03812922164797783, "learning_rate": 0.01, "loss": 2.014, "step": 32604 }, { "epoch": 3.350493218249075, "grad_norm": 0.041235774755477905, "learning_rate": 0.01, "loss": 2.0135, "step": 32607 }, { "epoch": 3.350801479654747, "grad_norm": 0.05174950137734413, "learning_rate": 0.01, "loss": 2.026, "step": 32610 }, { "epoch": 3.3511097410604194, "grad_norm": 0.058260124176740646, "learning_rate": 0.01, "loss": 2.0177, "step": 32613 }, { "epoch": 3.351418002466091, "grad_norm": 0.045651067048311234, "learning_rate": 0.01, "loss": 2.0199, "step": 32616 }, { "epoch": 3.3517262638717633, "grad_norm": 0.043610829859972, "learning_rate": 0.01, "loss": 2.0253, "step": 32619 }, { "epoch": 3.3520345252774355, "grad_norm": 0.04924603924155235, "learning_rate": 0.01, "loss": 2.0141, "step": 32622 }, { "epoch": 3.3523427866831073, "grad_norm": 0.04765019193291664, "learning_rate": 0.01, "loss": 2.0398, "step": 32625 }, { "epoch": 3.3526510480887795, "grad_norm": 0.04744412377476692, "learning_rate": 0.01, "loss": 2.0159, "step": 32628 }, { "epoch": 3.352959309494451, "grad_norm": 0.09456950426101685, "learning_rate": 0.01, "loss": 2.0084, "step": 32631 }, { "epoch": 3.3532675709001234, "grad_norm": 0.17356513440608978, "learning_rate": 0.01, "loss": 2.0138, "step": 32634 }, { "epoch": 3.353575832305795, "grad_norm": 0.08420834690332413, "learning_rate": 0.01, "loss": 2.0004, "step": 32637 }, { "epoch": 3.3538840937114673, "grad_norm": 0.09453277289867401, "learning_rate": 0.01, "loss": 2.021, "step": 32640 }, { "epoch": 3.3541923551171395, "grad_norm": 0.05444180220365524, "learning_rate": 0.01, "loss": 2.0335, "step": 32643 }, { "epoch": 3.3545006165228113, "grad_norm": 0.04824339225888252, "learning_rate": 0.01, "loss": 1.9976, "step": 32646 }, { "epoch": 3.3548088779284835, "grad_norm": 0.06650727242231369, "learning_rate": 0.01, "loss": 2.0021, "step": 32649 }, { "epoch": 3.3551171393341552, "grad_norm": 0.05119656026363373, "learning_rate": 0.01, "loss": 2.0415, "step": 32652 }, { "epoch": 3.3554254007398274, "grad_norm": 0.044617343693971634, "learning_rate": 0.01, "loss": 1.9992, "step": 32655 }, { "epoch": 3.355733662145499, "grad_norm": 0.035579435527324677, "learning_rate": 0.01, "loss": 1.9993, "step": 32658 }, { "epoch": 3.3560419235511714, "grad_norm": 0.05802566558122635, "learning_rate": 0.01, "loss": 2.0112, "step": 32661 }, { "epoch": 3.3563501849568436, "grad_norm": 0.050934724509716034, "learning_rate": 0.01, "loss": 2.0039, "step": 32664 }, { "epoch": 3.3566584463625153, "grad_norm": 0.055400047451257706, "learning_rate": 0.01, "loss": 2.0179, "step": 32667 }, { "epoch": 3.3569667077681875, "grad_norm": 0.1315484195947647, "learning_rate": 0.01, "loss": 1.9883, "step": 32670 }, { "epoch": 3.3572749691738593, "grad_norm": 0.13136912882328033, "learning_rate": 0.01, "loss": 2.0123, "step": 32673 }, { "epoch": 3.3575832305795315, "grad_norm": 0.08574076741933823, "learning_rate": 0.01, "loss": 1.9902, "step": 32676 }, { "epoch": 3.357891491985203, "grad_norm": 0.04678389057517052, "learning_rate": 0.01, "loss": 2.0363, "step": 32679 }, { "epoch": 3.3581997533908754, "grad_norm": 0.03356343135237694, "learning_rate": 0.01, "loss": 2.0031, "step": 32682 }, { "epoch": 3.3585080147965476, "grad_norm": 0.046139661222696304, "learning_rate": 0.01, "loss": 2.0255, "step": 32685 }, { "epoch": 3.3588162762022193, "grad_norm": 0.03130761533975601, "learning_rate": 0.01, "loss": 2.0107, "step": 32688 }, { "epoch": 3.3591245376078915, "grad_norm": 0.17764367163181305, "learning_rate": 0.01, "loss": 1.9817, "step": 32691 }, { "epoch": 3.3594327990135637, "grad_norm": 0.04135056957602501, "learning_rate": 0.01, "loss": 2.0032, "step": 32694 }, { "epoch": 3.3597410604192355, "grad_norm": 0.11181548237800598, "learning_rate": 0.01, "loss": 2.048, "step": 32697 }, { "epoch": 3.3600493218249077, "grad_norm": 0.07631994783878326, "learning_rate": 0.01, "loss": 2.0222, "step": 32700 }, { "epoch": 3.3603575832305794, "grad_norm": 0.03839050978422165, "learning_rate": 0.01, "loss": 1.9715, "step": 32703 }, { "epoch": 3.3606658446362516, "grad_norm": 0.03893091529607773, "learning_rate": 0.01, "loss": 2.0204, "step": 32706 }, { "epoch": 3.3609741060419234, "grad_norm": 0.15776588022708893, "learning_rate": 0.01, "loss": 1.9747, "step": 32709 }, { "epoch": 3.3612823674475956, "grad_norm": 0.125548854470253, "learning_rate": 0.01, "loss": 2.0066, "step": 32712 }, { "epoch": 3.3615906288532678, "grad_norm": 0.06952936947345734, "learning_rate": 0.01, "loss": 2.0353, "step": 32715 }, { "epoch": 3.3618988902589395, "grad_norm": 0.03826635703444481, "learning_rate": 0.01, "loss": 2.0186, "step": 32718 }, { "epoch": 3.3622071516646117, "grad_norm": 0.03977655619382858, "learning_rate": 0.01, "loss": 2.0205, "step": 32721 }, { "epoch": 3.3625154130702835, "grad_norm": 0.02882550098001957, "learning_rate": 0.01, "loss": 1.9914, "step": 32724 }, { "epoch": 3.3628236744759556, "grad_norm": 0.03502441197633743, "learning_rate": 0.01, "loss": 1.9925, "step": 32727 }, { "epoch": 3.3631319358816274, "grad_norm": 0.04370797425508499, "learning_rate": 0.01, "loss": 2.0154, "step": 32730 }, { "epoch": 3.3634401972872996, "grad_norm": 0.03528802841901779, "learning_rate": 0.01, "loss": 2.0014, "step": 32733 }, { "epoch": 3.363748458692972, "grad_norm": 0.08671889454126358, "learning_rate": 0.01, "loss": 2.0048, "step": 32736 }, { "epoch": 3.3640567200986435, "grad_norm": 0.1123836413025856, "learning_rate": 0.01, "loss": 2.0119, "step": 32739 }, { "epoch": 3.3643649815043157, "grad_norm": 0.061064526438713074, "learning_rate": 0.01, "loss": 2.0054, "step": 32742 }, { "epoch": 3.3646732429099875, "grad_norm": 0.05037948489189148, "learning_rate": 0.01, "loss": 2.0282, "step": 32745 }, { "epoch": 3.3649815043156597, "grad_norm": 0.052206844091415405, "learning_rate": 0.01, "loss": 2.0226, "step": 32748 }, { "epoch": 3.365289765721332, "grad_norm": 0.05795833095908165, "learning_rate": 0.01, "loss": 2.0038, "step": 32751 }, { "epoch": 3.3655980271270036, "grad_norm": 0.030604414641857147, "learning_rate": 0.01, "loss": 2.0104, "step": 32754 }, { "epoch": 3.365906288532676, "grad_norm": 0.05441366508603096, "learning_rate": 0.01, "loss": 1.9975, "step": 32757 }, { "epoch": 3.3662145499383476, "grad_norm": 0.07509131729602814, "learning_rate": 0.01, "loss": 2.0117, "step": 32760 }, { "epoch": 3.3665228113440198, "grad_norm": 0.046888504177331924, "learning_rate": 0.01, "loss": 2.0158, "step": 32763 }, { "epoch": 3.366831072749692, "grad_norm": 0.05030560493469238, "learning_rate": 0.01, "loss": 2.0139, "step": 32766 }, { "epoch": 3.3671393341553637, "grad_norm": 0.0426168255507946, "learning_rate": 0.01, "loss": 2.0118, "step": 32769 }, { "epoch": 3.367447595561036, "grad_norm": 0.10896468907594681, "learning_rate": 0.01, "loss": 1.9939, "step": 32772 }, { "epoch": 3.3677558569667077, "grad_norm": 0.11696910113096237, "learning_rate": 0.01, "loss": 2.0353, "step": 32775 }, { "epoch": 3.36806411837238, "grad_norm": 0.07340724021196365, "learning_rate": 0.01, "loss": 2.0062, "step": 32778 }, { "epoch": 3.3683723797780516, "grad_norm": 0.037968121469020844, "learning_rate": 0.01, "loss": 2.0191, "step": 32781 }, { "epoch": 3.368680641183724, "grad_norm": 0.044434670358896255, "learning_rate": 0.01, "loss": 1.9993, "step": 32784 }, { "epoch": 3.368988902589396, "grad_norm": 0.03823886066675186, "learning_rate": 0.01, "loss": 2.0402, "step": 32787 }, { "epoch": 3.3692971639950677, "grad_norm": 0.06556801497936249, "learning_rate": 0.01, "loss": 2.0097, "step": 32790 }, { "epoch": 3.36960542540074, "grad_norm": 0.06128913164138794, "learning_rate": 0.01, "loss": 2.032, "step": 32793 }, { "epoch": 3.3699136868064117, "grad_norm": 0.08499012142419815, "learning_rate": 0.01, "loss": 2.0395, "step": 32796 }, { "epoch": 3.370221948212084, "grad_norm": 0.03410051763057709, "learning_rate": 0.01, "loss": 2.0055, "step": 32799 }, { "epoch": 3.3705302096177556, "grad_norm": 0.08818015456199646, "learning_rate": 0.01, "loss": 2.0034, "step": 32802 }, { "epoch": 3.370838471023428, "grad_norm": 0.045091863721609116, "learning_rate": 0.01, "loss": 2.0252, "step": 32805 }, { "epoch": 3.3711467324291, "grad_norm": 0.10982260853052139, "learning_rate": 0.01, "loss": 1.9912, "step": 32808 }, { "epoch": 3.3714549938347718, "grad_norm": 0.04633982852101326, "learning_rate": 0.01, "loss": 2.0256, "step": 32811 }, { "epoch": 3.371763255240444, "grad_norm": 0.04701898992061615, "learning_rate": 0.01, "loss": 2.0098, "step": 32814 }, { "epoch": 3.372071516646116, "grad_norm": 0.03449505567550659, "learning_rate": 0.01, "loss": 2.0046, "step": 32817 }, { "epoch": 3.372379778051788, "grad_norm": 0.03621023893356323, "learning_rate": 0.01, "loss": 1.9677, "step": 32820 }, { "epoch": 3.37268803945746, "grad_norm": 0.04743462800979614, "learning_rate": 0.01, "loss": 2.0308, "step": 32823 }, { "epoch": 3.372996300863132, "grad_norm": 0.04240218549966812, "learning_rate": 0.01, "loss": 2.0152, "step": 32826 }, { "epoch": 3.373304562268804, "grad_norm": 0.09400332719087601, "learning_rate": 0.01, "loss": 2.0098, "step": 32829 }, { "epoch": 3.373612823674476, "grad_norm": 0.07313279062509537, "learning_rate": 0.01, "loss": 2.0366, "step": 32832 }, { "epoch": 3.373921085080148, "grad_norm": 0.07604516297578812, "learning_rate": 0.01, "loss": 2.0013, "step": 32835 }, { "epoch": 3.37422934648582, "grad_norm": 0.044236812740564346, "learning_rate": 0.01, "loss": 2.0005, "step": 32838 }, { "epoch": 3.374537607891492, "grad_norm": 0.051601652055978775, "learning_rate": 0.01, "loss": 1.9981, "step": 32841 }, { "epoch": 3.374845869297164, "grad_norm": 0.10818912088871002, "learning_rate": 0.01, "loss": 2.001, "step": 32844 }, { "epoch": 3.375154130702836, "grad_norm": 0.04563935101032257, "learning_rate": 0.01, "loss": 2.0138, "step": 32847 }, { "epoch": 3.375462392108508, "grad_norm": 0.053665511310100555, "learning_rate": 0.01, "loss": 2.0193, "step": 32850 }, { "epoch": 3.37577065351418, "grad_norm": 0.08934652805328369, "learning_rate": 0.01, "loss": 2.0243, "step": 32853 }, { "epoch": 3.376078914919852, "grad_norm": 0.0752192884683609, "learning_rate": 0.01, "loss": 2.0429, "step": 32856 }, { "epoch": 3.376387176325524, "grad_norm": 0.05763142928481102, "learning_rate": 0.01, "loss": 2.0022, "step": 32859 }, { "epoch": 3.376695437731196, "grad_norm": 0.03926476463675499, "learning_rate": 0.01, "loss": 2.0132, "step": 32862 }, { "epoch": 3.377003699136868, "grad_norm": 0.05735384672880173, "learning_rate": 0.01, "loss": 2.0144, "step": 32865 }, { "epoch": 3.37731196054254, "grad_norm": 0.18665596842765808, "learning_rate": 0.01, "loss": 2.0028, "step": 32868 }, { "epoch": 3.377620221948212, "grad_norm": 0.06702150404453278, "learning_rate": 0.01, "loss": 2.0254, "step": 32871 }, { "epoch": 3.377928483353884, "grad_norm": 0.051258910447359085, "learning_rate": 0.01, "loss": 2.0135, "step": 32874 }, { "epoch": 3.378236744759556, "grad_norm": 0.05804390087723732, "learning_rate": 0.01, "loss": 2.0006, "step": 32877 }, { "epoch": 3.3785450061652282, "grad_norm": 0.04688677936792374, "learning_rate": 0.01, "loss": 2.0244, "step": 32880 }, { "epoch": 3.3788532675709, "grad_norm": 0.057768989354372025, "learning_rate": 0.01, "loss": 2.0082, "step": 32883 }, { "epoch": 3.379161528976572, "grad_norm": 0.05571329593658447, "learning_rate": 0.01, "loss": 2.0364, "step": 32886 }, { "epoch": 3.3794697903822444, "grad_norm": 0.04497957229614258, "learning_rate": 0.01, "loss": 1.9896, "step": 32889 }, { "epoch": 3.379778051787916, "grad_norm": 0.043453045189380646, "learning_rate": 0.01, "loss": 2.0196, "step": 32892 }, { "epoch": 3.3800863131935883, "grad_norm": 0.045709915459156036, "learning_rate": 0.01, "loss": 1.9904, "step": 32895 }, { "epoch": 3.38039457459926, "grad_norm": 0.05974254012107849, "learning_rate": 0.01, "loss": 2.0022, "step": 32898 }, { "epoch": 3.3807028360049323, "grad_norm": 0.16592206060886383, "learning_rate": 0.01, "loss": 2.0035, "step": 32901 }, { "epoch": 3.381011097410604, "grad_norm": 0.04671747609972954, "learning_rate": 0.01, "loss": 1.9948, "step": 32904 }, { "epoch": 3.381319358816276, "grad_norm": 0.07180918008089066, "learning_rate": 0.01, "loss": 2.0097, "step": 32907 }, { "epoch": 3.3816276202219484, "grad_norm": 0.06775587797164917, "learning_rate": 0.01, "loss": 2.0101, "step": 32910 }, { "epoch": 3.38193588162762, "grad_norm": 0.04381205141544342, "learning_rate": 0.01, "loss": 2.0053, "step": 32913 }, { "epoch": 3.3822441430332923, "grad_norm": 0.0345197468996048, "learning_rate": 0.01, "loss": 1.9829, "step": 32916 }, { "epoch": 3.382552404438964, "grad_norm": 0.04965033382177353, "learning_rate": 0.01, "loss": 2.0111, "step": 32919 }, { "epoch": 3.3828606658446363, "grad_norm": 0.20276527106761932, "learning_rate": 0.01, "loss": 2.0342, "step": 32922 }, { "epoch": 3.383168927250308, "grad_norm": 0.0702800378203392, "learning_rate": 0.01, "loss": 1.9971, "step": 32925 }, { "epoch": 3.3834771886559802, "grad_norm": 0.05775219202041626, "learning_rate": 0.01, "loss": 2.0062, "step": 32928 }, { "epoch": 3.3837854500616524, "grad_norm": 0.0662347599864006, "learning_rate": 0.01, "loss": 1.9962, "step": 32931 }, { "epoch": 3.384093711467324, "grad_norm": 0.05067736655473709, "learning_rate": 0.01, "loss": 1.9963, "step": 32934 }, { "epoch": 3.3844019728729964, "grad_norm": 0.057027652859687805, "learning_rate": 0.01, "loss": 2.0261, "step": 32937 }, { "epoch": 3.384710234278668, "grad_norm": 0.0408274307847023, "learning_rate": 0.01, "loss": 2.0139, "step": 32940 }, { "epoch": 3.3850184956843403, "grad_norm": 0.049467723816633224, "learning_rate": 0.01, "loss": 2.0133, "step": 32943 }, { "epoch": 3.3853267570900125, "grad_norm": 0.07573775947093964, "learning_rate": 0.01, "loss": 2.0331, "step": 32946 }, { "epoch": 3.3856350184956843, "grad_norm": 0.04027678817510605, "learning_rate": 0.01, "loss": 2.0118, "step": 32949 }, { "epoch": 3.3859432799013565, "grad_norm": 0.09980335086584091, "learning_rate": 0.01, "loss": 2.0158, "step": 32952 }, { "epoch": 3.386251541307028, "grad_norm": 0.06230602413415909, "learning_rate": 0.01, "loss": 2.0235, "step": 32955 }, { "epoch": 3.3865598027127004, "grad_norm": 0.09655454754829407, "learning_rate": 0.01, "loss": 2.0003, "step": 32958 }, { "epoch": 3.3868680641183726, "grad_norm": 0.053587790578603745, "learning_rate": 0.01, "loss": 2.002, "step": 32961 }, { "epoch": 3.3871763255240444, "grad_norm": 0.08395679295063019, "learning_rate": 0.01, "loss": 1.9928, "step": 32964 }, { "epoch": 3.3874845869297165, "grad_norm": 0.08353892713785172, "learning_rate": 0.01, "loss": 2.0052, "step": 32967 }, { "epoch": 3.3877928483353883, "grad_norm": 0.11298651993274689, "learning_rate": 0.01, "loss": 2.0039, "step": 32970 }, { "epoch": 3.3881011097410605, "grad_norm": 0.08317071199417114, "learning_rate": 0.01, "loss": 2.0095, "step": 32973 }, { "epoch": 3.3884093711467322, "grad_norm": 0.07725278288125992, "learning_rate": 0.01, "loss": 2.0255, "step": 32976 }, { "epoch": 3.3887176325524044, "grad_norm": 0.06264784932136536, "learning_rate": 0.01, "loss": 2.017, "step": 32979 }, { "epoch": 3.3890258939580766, "grad_norm": 0.0588025264441967, "learning_rate": 0.01, "loss": 2.0138, "step": 32982 }, { "epoch": 3.3893341553637484, "grad_norm": 0.033383727073669434, "learning_rate": 0.01, "loss": 2.0105, "step": 32985 }, { "epoch": 3.3896424167694206, "grad_norm": 0.04963357746601105, "learning_rate": 0.01, "loss": 2.0332, "step": 32988 }, { "epoch": 3.3899506781750923, "grad_norm": 0.03166192024946213, "learning_rate": 0.01, "loss": 1.9884, "step": 32991 }, { "epoch": 3.3902589395807645, "grad_norm": 0.0424019880592823, "learning_rate": 0.01, "loss": 1.9727, "step": 32994 }, { "epoch": 3.3905672009864363, "grad_norm": 0.0549466572701931, "learning_rate": 0.01, "loss": 1.9858, "step": 32997 }, { "epoch": 3.3908754623921085, "grad_norm": 0.06859169900417328, "learning_rate": 0.01, "loss": 1.9981, "step": 33000 }, { "epoch": 3.3911837237977807, "grad_norm": 0.05035685375332832, "learning_rate": 0.01, "loss": 2.0422, "step": 33003 }, { "epoch": 3.3914919852034524, "grad_norm": 0.10227832943201065, "learning_rate": 0.01, "loss": 1.9835, "step": 33006 }, { "epoch": 3.3918002466091246, "grad_norm": 0.052029043436050415, "learning_rate": 0.01, "loss": 2.0341, "step": 33009 }, { "epoch": 3.392108508014797, "grad_norm": 0.03569505736231804, "learning_rate": 0.01, "loss": 2.0065, "step": 33012 }, { "epoch": 3.3924167694204685, "grad_norm": 0.05492673069238663, "learning_rate": 0.01, "loss": 1.9839, "step": 33015 }, { "epoch": 3.3927250308261407, "grad_norm": 0.10698610544204712, "learning_rate": 0.01, "loss": 1.98, "step": 33018 }, { "epoch": 3.3930332922318125, "grad_norm": 0.051218993961811066, "learning_rate": 0.01, "loss": 1.9778, "step": 33021 }, { "epoch": 3.3933415536374847, "grad_norm": 0.10021807998418808, "learning_rate": 0.01, "loss": 1.997, "step": 33024 }, { "epoch": 3.3936498150431564, "grad_norm": 0.043556150048971176, "learning_rate": 0.01, "loss": 2.0145, "step": 33027 }, { "epoch": 3.3939580764488286, "grad_norm": 0.052555110305547714, "learning_rate": 0.01, "loss": 2.0, "step": 33030 }, { "epoch": 3.394266337854501, "grad_norm": 0.1656499058008194, "learning_rate": 0.01, "loss": 1.9747, "step": 33033 }, { "epoch": 3.3945745992601726, "grad_norm": 0.08107822388410568, "learning_rate": 0.01, "loss": 1.9894, "step": 33036 }, { "epoch": 3.3948828606658448, "grad_norm": 0.05703389272093773, "learning_rate": 0.01, "loss": 1.9935, "step": 33039 }, { "epoch": 3.3951911220715165, "grad_norm": 0.060602329671382904, "learning_rate": 0.01, "loss": 2.01, "step": 33042 }, { "epoch": 3.3954993834771887, "grad_norm": 0.05680840089917183, "learning_rate": 0.01, "loss": 2.0178, "step": 33045 }, { "epoch": 3.3958076448828605, "grad_norm": 0.053718626499176025, "learning_rate": 0.01, "loss": 2.0021, "step": 33048 }, { "epoch": 3.3961159062885327, "grad_norm": 0.04885102063417435, "learning_rate": 0.01, "loss": 2.0215, "step": 33051 }, { "epoch": 3.396424167694205, "grad_norm": 0.046444397419691086, "learning_rate": 0.01, "loss": 2.0117, "step": 33054 }, { "epoch": 3.3967324290998766, "grad_norm": 0.03031921572983265, "learning_rate": 0.01, "loss": 1.9973, "step": 33057 }, { "epoch": 3.397040690505549, "grad_norm": 0.048908550292253494, "learning_rate": 0.01, "loss": 2.0008, "step": 33060 }, { "epoch": 3.3973489519112205, "grad_norm": 0.06003925949335098, "learning_rate": 0.01, "loss": 1.9769, "step": 33063 }, { "epoch": 3.3976572133168927, "grad_norm": 0.10929730534553528, "learning_rate": 0.01, "loss": 1.998, "step": 33066 }, { "epoch": 3.3979654747225645, "grad_norm": 0.09032581746578217, "learning_rate": 0.01, "loss": 2.0331, "step": 33069 }, { "epoch": 3.3982737361282367, "grad_norm": 0.043940551578998566, "learning_rate": 0.01, "loss": 2.0076, "step": 33072 }, { "epoch": 3.398581997533909, "grad_norm": 0.10148674994707108, "learning_rate": 0.01, "loss": 1.9994, "step": 33075 }, { "epoch": 3.3988902589395806, "grad_norm": 0.04877715930342674, "learning_rate": 0.01, "loss": 2.0257, "step": 33078 }, { "epoch": 3.399198520345253, "grad_norm": 0.10485149919986725, "learning_rate": 0.01, "loss": 2.0297, "step": 33081 }, { "epoch": 3.399506781750925, "grad_norm": 0.12222550064325333, "learning_rate": 0.01, "loss": 2.0118, "step": 33084 }, { "epoch": 3.3998150431565968, "grad_norm": 0.0744134709239006, "learning_rate": 0.01, "loss": 2.0157, "step": 33087 }, { "epoch": 3.400123304562269, "grad_norm": 0.08050314337015152, "learning_rate": 0.01, "loss": 1.9932, "step": 33090 }, { "epoch": 3.4004315659679407, "grad_norm": 0.0806950256228447, "learning_rate": 0.01, "loss": 1.997, "step": 33093 }, { "epoch": 3.400739827373613, "grad_norm": 0.06369089335203171, "learning_rate": 0.01, "loss": 2.0101, "step": 33096 }, { "epoch": 3.4010480887792847, "grad_norm": 0.06041014939546585, "learning_rate": 0.01, "loss": 2.0408, "step": 33099 }, { "epoch": 3.401356350184957, "grad_norm": 0.0476149246096611, "learning_rate": 0.01, "loss": 1.988, "step": 33102 }, { "epoch": 3.401664611590629, "grad_norm": 0.05710010603070259, "learning_rate": 0.01, "loss": 2.0152, "step": 33105 }, { "epoch": 3.401972872996301, "grad_norm": 0.04351675137877464, "learning_rate": 0.01, "loss": 1.9752, "step": 33108 }, { "epoch": 3.402281134401973, "grad_norm": 0.04341613128781319, "learning_rate": 0.01, "loss": 2.0149, "step": 33111 }, { "epoch": 3.4025893958076447, "grad_norm": 0.04619600623846054, "learning_rate": 0.01, "loss": 2.0107, "step": 33114 }, { "epoch": 3.402897657213317, "grad_norm": 0.07674260437488556, "learning_rate": 0.01, "loss": 2.0243, "step": 33117 }, { "epoch": 3.4032059186189887, "grad_norm": 0.03869005665183067, "learning_rate": 0.01, "loss": 1.9774, "step": 33120 }, { "epoch": 3.403514180024661, "grad_norm": 0.08710911124944687, "learning_rate": 0.01, "loss": 1.995, "step": 33123 }, { "epoch": 3.403822441430333, "grad_norm": 0.09343576431274414, "learning_rate": 0.01, "loss": 1.9881, "step": 33126 }, { "epoch": 3.404130702836005, "grad_norm": 0.048723649233579636, "learning_rate": 0.01, "loss": 2.0273, "step": 33129 }, { "epoch": 3.404438964241677, "grad_norm": 0.07655228674411774, "learning_rate": 0.01, "loss": 2.0235, "step": 33132 }, { "epoch": 3.4047472256473488, "grad_norm": 0.08873139321804047, "learning_rate": 0.01, "loss": 1.9975, "step": 33135 }, { "epoch": 3.405055487053021, "grad_norm": 0.06514773517847061, "learning_rate": 0.01, "loss": 2.0167, "step": 33138 }, { "epoch": 3.405363748458693, "grad_norm": 0.06446196138858795, "learning_rate": 0.01, "loss": 2.0051, "step": 33141 }, { "epoch": 3.405672009864365, "grad_norm": 0.12146038562059402, "learning_rate": 0.01, "loss": 2.0171, "step": 33144 }, { "epoch": 3.405980271270037, "grad_norm": 0.06639200448989868, "learning_rate": 0.01, "loss": 2.0049, "step": 33147 }, { "epoch": 3.406288532675709, "grad_norm": 0.05644892901182175, "learning_rate": 0.01, "loss": 2.0172, "step": 33150 }, { "epoch": 3.406596794081381, "grad_norm": 0.037636831402778625, "learning_rate": 0.01, "loss": 2.008, "step": 33153 }, { "epoch": 3.4069050554870532, "grad_norm": 0.031332679092884064, "learning_rate": 0.01, "loss": 1.9821, "step": 33156 }, { "epoch": 3.407213316892725, "grad_norm": 0.051785338670015335, "learning_rate": 0.01, "loss": 2.011, "step": 33159 }, { "epoch": 3.407521578298397, "grad_norm": 0.07761172205209732, "learning_rate": 0.01, "loss": 2.0277, "step": 33162 }, { "epoch": 3.407829839704069, "grad_norm": 0.09536123275756836, "learning_rate": 0.01, "loss": 2.0222, "step": 33165 }, { "epoch": 3.408138101109741, "grad_norm": 0.04122615605592728, "learning_rate": 0.01, "loss": 2.0065, "step": 33168 }, { "epoch": 3.408446362515413, "grad_norm": 0.0360184982419014, "learning_rate": 0.01, "loss": 2.002, "step": 33171 }, { "epoch": 3.408754623921085, "grad_norm": 0.03222360834479332, "learning_rate": 0.01, "loss": 2.0083, "step": 33174 }, { "epoch": 3.4090628853267573, "grad_norm": 0.043042391538619995, "learning_rate": 0.01, "loss": 1.989, "step": 33177 }, { "epoch": 3.409371146732429, "grad_norm": 0.06593729555606842, "learning_rate": 0.01, "loss": 2.0115, "step": 33180 }, { "epoch": 3.409679408138101, "grad_norm": 0.09074060618877411, "learning_rate": 0.01, "loss": 2.0168, "step": 33183 }, { "epoch": 3.409987669543773, "grad_norm": 0.06270799785852432, "learning_rate": 0.01, "loss": 1.9973, "step": 33186 }, { "epoch": 3.410295930949445, "grad_norm": 0.08982829004526138, "learning_rate": 0.01, "loss": 1.9966, "step": 33189 }, { "epoch": 3.410604192355117, "grad_norm": 0.04844099283218384, "learning_rate": 0.01, "loss": 2.0223, "step": 33192 }, { "epoch": 3.410912453760789, "grad_norm": 0.07782240957021713, "learning_rate": 0.01, "loss": 2.025, "step": 33195 }, { "epoch": 3.4112207151664613, "grad_norm": 0.07867666333913803, "learning_rate": 0.01, "loss": 2.0076, "step": 33198 }, { "epoch": 3.411528976572133, "grad_norm": 0.07254987210035324, "learning_rate": 0.01, "loss": 2.0129, "step": 33201 }, { "epoch": 3.4118372379778052, "grad_norm": 0.04480341821908951, "learning_rate": 0.01, "loss": 1.9914, "step": 33204 }, { "epoch": 3.412145499383477, "grad_norm": 0.03986749053001404, "learning_rate": 0.01, "loss": 1.9867, "step": 33207 }, { "epoch": 3.412453760789149, "grad_norm": 0.04472361132502556, "learning_rate": 0.01, "loss": 2.0184, "step": 33210 }, { "epoch": 3.4127620221948214, "grad_norm": 0.07955579459667206, "learning_rate": 0.01, "loss": 2.01, "step": 33213 }, { "epoch": 3.413070283600493, "grad_norm": 0.056707967072725296, "learning_rate": 0.01, "loss": 2.0105, "step": 33216 }, { "epoch": 3.4133785450061653, "grad_norm": 0.04070746898651123, "learning_rate": 0.01, "loss": 2.0226, "step": 33219 }, { "epoch": 3.413686806411837, "grad_norm": 0.03773896023631096, "learning_rate": 0.01, "loss": 2.015, "step": 33222 }, { "epoch": 3.4139950678175093, "grad_norm": 0.10554299503564835, "learning_rate": 0.01, "loss": 2.0331, "step": 33225 }, { "epoch": 3.4143033292231815, "grad_norm": 0.0555710643529892, "learning_rate": 0.01, "loss": 1.996, "step": 33228 }, { "epoch": 3.414611590628853, "grad_norm": 0.1139519140124321, "learning_rate": 0.01, "loss": 1.9821, "step": 33231 }, { "epoch": 3.4149198520345254, "grad_norm": 0.042904384434223175, "learning_rate": 0.01, "loss": 2.0057, "step": 33234 }, { "epoch": 3.415228113440197, "grad_norm": 0.10528502613306046, "learning_rate": 0.01, "loss": 1.9781, "step": 33237 }, { "epoch": 3.4155363748458694, "grad_norm": 0.03914659097790718, "learning_rate": 0.01, "loss": 2.0029, "step": 33240 }, { "epoch": 3.415844636251541, "grad_norm": 0.11122586578130722, "learning_rate": 0.01, "loss": 1.9968, "step": 33243 }, { "epoch": 3.4161528976572133, "grad_norm": 0.06572670489549637, "learning_rate": 0.01, "loss": 2.0077, "step": 33246 }, { "epoch": 3.4164611590628855, "grad_norm": 0.05022534728050232, "learning_rate": 0.01, "loss": 2.0224, "step": 33249 }, { "epoch": 3.4167694204685573, "grad_norm": 0.08149400353431702, "learning_rate": 0.01, "loss": 2.0326, "step": 33252 }, { "epoch": 3.4170776818742294, "grad_norm": 0.04350002855062485, "learning_rate": 0.01, "loss": 1.9947, "step": 33255 }, { "epoch": 3.417385943279901, "grad_norm": 0.0445462241768837, "learning_rate": 0.01, "loss": 2.0372, "step": 33258 }, { "epoch": 3.4176942046855734, "grad_norm": 0.04082934185862541, "learning_rate": 0.01, "loss": 2.0199, "step": 33261 }, { "epoch": 3.418002466091245, "grad_norm": 0.060355301946401596, "learning_rate": 0.01, "loss": 2.0204, "step": 33264 }, { "epoch": 3.4183107274969173, "grad_norm": 0.04439264163374901, "learning_rate": 0.01, "loss": 2.0216, "step": 33267 }, { "epoch": 3.4186189889025895, "grad_norm": 0.11760549992322922, "learning_rate": 0.01, "loss": 2.0284, "step": 33270 }, { "epoch": 3.4189272503082613, "grad_norm": 0.08547448366880417, "learning_rate": 0.01, "loss": 2.0307, "step": 33273 }, { "epoch": 3.4192355117139335, "grad_norm": 0.06072860211133957, "learning_rate": 0.01, "loss": 1.992, "step": 33276 }, { "epoch": 3.4195437731196057, "grad_norm": 0.03637344762682915, "learning_rate": 0.01, "loss": 2.02, "step": 33279 }, { "epoch": 3.4198520345252774, "grad_norm": 0.0440024733543396, "learning_rate": 0.01, "loss": 2.007, "step": 33282 }, { "epoch": 3.4201602959309496, "grad_norm": 0.040933944284915924, "learning_rate": 0.01, "loss": 2.0253, "step": 33285 }, { "epoch": 3.4204685573366214, "grad_norm": 0.10233576595783234, "learning_rate": 0.01, "loss": 2.0433, "step": 33288 }, { "epoch": 3.4207768187422936, "grad_norm": 0.10494884103536606, "learning_rate": 0.01, "loss": 2.0007, "step": 33291 }, { "epoch": 3.4210850801479653, "grad_norm": 0.04247460886836052, "learning_rate": 0.01, "loss": 2.0119, "step": 33294 }, { "epoch": 3.4213933415536375, "grad_norm": 0.07929468154907227, "learning_rate": 0.01, "loss": 2.0067, "step": 33297 }, { "epoch": 3.4217016029593097, "grad_norm": 0.04947086423635483, "learning_rate": 0.01, "loss": 2.0049, "step": 33300 }, { "epoch": 3.4220098643649814, "grad_norm": 0.05473649874329567, "learning_rate": 0.01, "loss": 1.9872, "step": 33303 }, { "epoch": 3.4223181257706536, "grad_norm": 0.06789970397949219, "learning_rate": 0.01, "loss": 2.0173, "step": 33306 }, { "epoch": 3.4226263871763254, "grad_norm": 0.044122181832790375, "learning_rate": 0.01, "loss": 1.9957, "step": 33309 }, { "epoch": 3.4229346485819976, "grad_norm": 0.0713338777422905, "learning_rate": 0.01, "loss": 1.9912, "step": 33312 }, { "epoch": 3.4232429099876693, "grad_norm": 0.09774953871965408, "learning_rate": 0.01, "loss": 1.999, "step": 33315 }, { "epoch": 3.4235511713933415, "grad_norm": 0.049434032291173935, "learning_rate": 0.01, "loss": 2.0194, "step": 33318 }, { "epoch": 3.4238594327990137, "grad_norm": 0.06290262192487717, "learning_rate": 0.01, "loss": 2.0014, "step": 33321 }, { "epoch": 3.4241676942046855, "grad_norm": 0.042732108384370804, "learning_rate": 0.01, "loss": 1.9909, "step": 33324 }, { "epoch": 3.4244759556103577, "grad_norm": 0.03461041674017906, "learning_rate": 0.01, "loss": 1.9881, "step": 33327 }, { "epoch": 3.4247842170160294, "grad_norm": 0.04503572732210159, "learning_rate": 0.01, "loss": 1.9981, "step": 33330 }, { "epoch": 3.4250924784217016, "grad_norm": 0.04774646461009979, "learning_rate": 0.01, "loss": 2.0216, "step": 33333 }, { "epoch": 3.4254007398273734, "grad_norm": 0.08576779067516327, "learning_rate": 0.01, "loss": 2.0124, "step": 33336 }, { "epoch": 3.4257090012330456, "grad_norm": 0.13729599118232727, "learning_rate": 0.01, "loss": 2.0126, "step": 33339 }, { "epoch": 3.4260172626387178, "grad_norm": 0.060716260224580765, "learning_rate": 0.01, "loss": 2.0153, "step": 33342 }, { "epoch": 3.4263255240443895, "grad_norm": 0.037777479737997055, "learning_rate": 0.01, "loss": 2.0181, "step": 33345 }, { "epoch": 3.4266337854500617, "grad_norm": 0.042688485234975815, "learning_rate": 0.01, "loss": 1.9792, "step": 33348 }, { "epoch": 3.426942046855734, "grad_norm": 0.07009312510490417, "learning_rate": 0.01, "loss": 2.0143, "step": 33351 }, { "epoch": 3.4272503082614056, "grad_norm": 0.10260313749313354, "learning_rate": 0.01, "loss": 1.9985, "step": 33354 }, { "epoch": 3.427558569667078, "grad_norm": 0.08466068655252457, "learning_rate": 0.01, "loss": 1.9981, "step": 33357 }, { "epoch": 3.4278668310727496, "grad_norm": 0.061912521719932556, "learning_rate": 0.01, "loss": 2.0144, "step": 33360 }, { "epoch": 3.428175092478422, "grad_norm": 0.0470789298415184, "learning_rate": 0.01, "loss": 1.9863, "step": 33363 }, { "epoch": 3.4284833538840935, "grad_norm": 0.0477573424577713, "learning_rate": 0.01, "loss": 2.0189, "step": 33366 }, { "epoch": 3.4287916152897657, "grad_norm": 0.03324504569172859, "learning_rate": 0.01, "loss": 2.0107, "step": 33369 }, { "epoch": 3.429099876695438, "grad_norm": 0.07741666585206985, "learning_rate": 0.01, "loss": 2.0103, "step": 33372 }, { "epoch": 3.4294081381011097, "grad_norm": 0.05770926922559738, "learning_rate": 0.01, "loss": 2.0398, "step": 33375 }, { "epoch": 3.429716399506782, "grad_norm": 0.08471731096506119, "learning_rate": 0.01, "loss": 2.0451, "step": 33378 }, { "epoch": 3.4300246609124536, "grad_norm": 0.04667286202311516, "learning_rate": 0.01, "loss": 2.0136, "step": 33381 }, { "epoch": 3.430332922318126, "grad_norm": 0.0683809220790863, "learning_rate": 0.01, "loss": 2.0032, "step": 33384 }, { "epoch": 3.4306411837237976, "grad_norm": 0.07834406197071075, "learning_rate": 0.01, "loss": 2.0172, "step": 33387 }, { "epoch": 3.4309494451294698, "grad_norm": 0.04956913739442825, "learning_rate": 0.01, "loss": 2.0226, "step": 33390 }, { "epoch": 3.431257706535142, "grad_norm": 0.05492135509848595, "learning_rate": 0.01, "loss": 2.0153, "step": 33393 }, { "epoch": 3.4315659679408137, "grad_norm": 0.05343586578965187, "learning_rate": 0.01, "loss": 1.9995, "step": 33396 }, { "epoch": 3.431874229346486, "grad_norm": 0.04083942621946335, "learning_rate": 0.01, "loss": 2.005, "step": 33399 }, { "epoch": 3.4321824907521576, "grad_norm": 0.06474661827087402, "learning_rate": 0.01, "loss": 2.0238, "step": 33402 }, { "epoch": 3.43249075215783, "grad_norm": 0.09690015017986298, "learning_rate": 0.01, "loss": 1.9986, "step": 33405 }, { "epoch": 3.432799013563502, "grad_norm": 0.17796829342842102, "learning_rate": 0.01, "loss": 2.0288, "step": 33408 }, { "epoch": 3.433107274969174, "grad_norm": 0.11173928529024124, "learning_rate": 0.01, "loss": 2.0249, "step": 33411 }, { "epoch": 3.433415536374846, "grad_norm": 0.045607730746269226, "learning_rate": 0.01, "loss": 1.9694, "step": 33414 }, { "epoch": 3.4337237977805177, "grad_norm": 0.03982311487197876, "learning_rate": 0.01, "loss": 1.9895, "step": 33417 }, { "epoch": 3.43403205918619, "grad_norm": 0.03420604392886162, "learning_rate": 0.01, "loss": 1.9957, "step": 33420 }, { "epoch": 3.434340320591862, "grad_norm": 0.03757992014288902, "learning_rate": 0.01, "loss": 2.026, "step": 33423 }, { "epoch": 3.434648581997534, "grad_norm": 0.05664653331041336, "learning_rate": 0.01, "loss": 1.9884, "step": 33426 }, { "epoch": 3.434956843403206, "grad_norm": 0.05265260115265846, "learning_rate": 0.01, "loss": 2.0409, "step": 33429 }, { "epoch": 3.435265104808878, "grad_norm": 0.0430876798927784, "learning_rate": 0.01, "loss": 2.0004, "step": 33432 }, { "epoch": 3.43557336621455, "grad_norm": 0.04210485517978668, "learning_rate": 0.01, "loss": 2.0004, "step": 33435 }, { "epoch": 3.4358816276202218, "grad_norm": 0.049002841114997864, "learning_rate": 0.01, "loss": 2.0129, "step": 33438 }, { "epoch": 3.436189889025894, "grad_norm": 0.0786898285150528, "learning_rate": 0.01, "loss": 1.9915, "step": 33441 }, { "epoch": 3.436498150431566, "grad_norm": 0.04218638688325882, "learning_rate": 0.01, "loss": 2.0079, "step": 33444 }, { "epoch": 3.436806411837238, "grad_norm": 0.13452041149139404, "learning_rate": 0.01, "loss": 2.0105, "step": 33447 }, { "epoch": 3.43711467324291, "grad_norm": 0.06728319823741913, "learning_rate": 0.01, "loss": 2.0053, "step": 33450 }, { "epoch": 3.437422934648582, "grad_norm": 0.09041707217693329, "learning_rate": 0.01, "loss": 2.0059, "step": 33453 }, { "epoch": 3.437731196054254, "grad_norm": 0.04817497730255127, "learning_rate": 0.01, "loss": 2.0024, "step": 33456 }, { "epoch": 3.438039457459926, "grad_norm": 0.05033170431852341, "learning_rate": 0.01, "loss": 2.016, "step": 33459 }, { "epoch": 3.438347718865598, "grad_norm": 0.12856252491474152, "learning_rate": 0.01, "loss": 2.0202, "step": 33462 }, { "epoch": 3.43865598027127, "grad_norm": 0.03690528869628906, "learning_rate": 0.01, "loss": 2.0131, "step": 33465 }, { "epoch": 3.438964241676942, "grad_norm": 0.09053459018468857, "learning_rate": 0.01, "loss": 2.0057, "step": 33468 }, { "epoch": 3.439272503082614, "grad_norm": 0.11929309368133545, "learning_rate": 0.01, "loss": 1.9995, "step": 33471 }, { "epoch": 3.4395807644882863, "grad_norm": 0.06987284123897552, "learning_rate": 0.01, "loss": 1.9871, "step": 33474 }, { "epoch": 3.439889025893958, "grad_norm": 0.06181707605719566, "learning_rate": 0.01, "loss": 2.0167, "step": 33477 }, { "epoch": 3.4401972872996303, "grad_norm": 0.045914020389318466, "learning_rate": 0.01, "loss": 2.0067, "step": 33480 }, { "epoch": 3.440505548705302, "grad_norm": 0.04277556762099266, "learning_rate": 0.01, "loss": 2.0084, "step": 33483 }, { "epoch": 3.440813810110974, "grad_norm": 0.045943450182676315, "learning_rate": 0.01, "loss": 2.0117, "step": 33486 }, { "epoch": 3.441122071516646, "grad_norm": 0.04440785571932793, "learning_rate": 0.01, "loss": 1.9753, "step": 33489 }, { "epoch": 3.441430332922318, "grad_norm": 0.12033234536647797, "learning_rate": 0.01, "loss": 2.0216, "step": 33492 }, { "epoch": 3.4417385943279903, "grad_norm": 0.06069677323102951, "learning_rate": 0.01, "loss": 2.0139, "step": 33495 }, { "epoch": 3.442046855733662, "grad_norm": 0.08571046590805054, "learning_rate": 0.01, "loss": 2.0121, "step": 33498 }, { "epoch": 3.4423551171393343, "grad_norm": 0.05251142010092735, "learning_rate": 0.01, "loss": 2.0234, "step": 33501 }, { "epoch": 3.442663378545006, "grad_norm": 0.09658701717853546, "learning_rate": 0.01, "loss": 2.023, "step": 33504 }, { "epoch": 3.4429716399506782, "grad_norm": 0.10625968873500824, "learning_rate": 0.01, "loss": 2.0158, "step": 33507 }, { "epoch": 3.44327990135635, "grad_norm": 0.061645977199077606, "learning_rate": 0.01, "loss": 1.987, "step": 33510 }, { "epoch": 3.443588162762022, "grad_norm": 0.06879527121782303, "learning_rate": 0.01, "loss": 1.9988, "step": 33513 }, { "epoch": 3.4438964241676944, "grad_norm": 0.07986783981323242, "learning_rate": 0.01, "loss": 2.0087, "step": 33516 }, { "epoch": 3.444204685573366, "grad_norm": 0.06323929876089096, "learning_rate": 0.01, "loss": 2.0047, "step": 33519 }, { "epoch": 3.4445129469790383, "grad_norm": 0.08186205476522446, "learning_rate": 0.01, "loss": 2.0312, "step": 33522 }, { "epoch": 3.44482120838471, "grad_norm": 0.1049259677529335, "learning_rate": 0.01, "loss": 1.9993, "step": 33525 }, { "epoch": 3.4451294697903823, "grad_norm": 0.12427592277526855, "learning_rate": 0.01, "loss": 1.9984, "step": 33528 }, { "epoch": 3.445437731196054, "grad_norm": 0.04911283776164055, "learning_rate": 0.01, "loss": 1.9979, "step": 33531 }, { "epoch": 3.445745992601726, "grad_norm": 0.07451221346855164, "learning_rate": 0.01, "loss": 1.9814, "step": 33534 }, { "epoch": 3.4460542540073984, "grad_norm": 0.04838255047798157, "learning_rate": 0.01, "loss": 2.0044, "step": 33537 }, { "epoch": 3.44636251541307, "grad_norm": 0.0435669869184494, "learning_rate": 0.01, "loss": 2.0134, "step": 33540 }, { "epoch": 3.4466707768187423, "grad_norm": 0.12010036408901215, "learning_rate": 0.01, "loss": 1.965, "step": 33543 }, { "epoch": 3.4469790382244145, "grad_norm": 0.04258548840880394, "learning_rate": 0.01, "loss": 1.9871, "step": 33546 }, { "epoch": 3.4472872996300863, "grad_norm": 0.04736476391553879, "learning_rate": 0.01, "loss": 1.983, "step": 33549 }, { "epoch": 3.4475955610357585, "grad_norm": 0.04423545300960541, "learning_rate": 0.01, "loss": 2.001, "step": 33552 }, { "epoch": 3.4479038224414302, "grad_norm": 0.07585839927196503, "learning_rate": 0.01, "loss": 2.0094, "step": 33555 }, { "epoch": 3.4482120838471024, "grad_norm": 0.03559425473213196, "learning_rate": 0.01, "loss": 1.9835, "step": 33558 }, { "epoch": 3.448520345252774, "grad_norm": 0.058567702770233154, "learning_rate": 0.01, "loss": 1.9996, "step": 33561 }, { "epoch": 3.4488286066584464, "grad_norm": 0.05344400927424431, "learning_rate": 0.01, "loss": 2.0128, "step": 33564 }, { "epoch": 3.4491368680641186, "grad_norm": 0.0584418885409832, "learning_rate": 0.01, "loss": 2.0334, "step": 33567 }, { "epoch": 3.4494451294697903, "grad_norm": 0.14322839677333832, "learning_rate": 0.01, "loss": 2.0362, "step": 33570 }, { "epoch": 3.4497533908754625, "grad_norm": 0.039136361330747604, "learning_rate": 0.01, "loss": 1.9792, "step": 33573 }, { "epoch": 3.4500616522811343, "grad_norm": 0.0871317982673645, "learning_rate": 0.01, "loss": 2.03, "step": 33576 }, { "epoch": 3.4503699136868065, "grad_norm": 0.07295375317335129, "learning_rate": 0.01, "loss": 1.9906, "step": 33579 }, { "epoch": 3.450678175092478, "grad_norm": 0.04469291865825653, "learning_rate": 0.01, "loss": 2.0245, "step": 33582 }, { "epoch": 3.4509864364981504, "grad_norm": 0.06063535064458847, "learning_rate": 0.01, "loss": 2.0083, "step": 33585 }, { "epoch": 3.4512946979038226, "grad_norm": 0.07924182713031769, "learning_rate": 0.01, "loss": 2.0115, "step": 33588 }, { "epoch": 3.4516029593094943, "grad_norm": 0.089077427983284, "learning_rate": 0.01, "loss": 2.017, "step": 33591 }, { "epoch": 3.4519112207151665, "grad_norm": 0.08197617530822754, "learning_rate": 0.01, "loss": 1.9987, "step": 33594 }, { "epoch": 3.4522194821208383, "grad_norm": 0.039551399648189545, "learning_rate": 0.01, "loss": 2.0211, "step": 33597 }, { "epoch": 3.4525277435265105, "grad_norm": 0.08920145779848099, "learning_rate": 0.01, "loss": 1.9952, "step": 33600 }, { "epoch": 3.4528360049321827, "grad_norm": 0.04301462695002556, "learning_rate": 0.01, "loss": 1.9898, "step": 33603 }, { "epoch": 3.4531442663378544, "grad_norm": 0.05201060697436333, "learning_rate": 0.01, "loss": 2.0022, "step": 33606 }, { "epoch": 3.4534525277435266, "grad_norm": 0.05899956077337265, "learning_rate": 0.01, "loss": 2.0318, "step": 33609 }, { "epoch": 3.4537607891491984, "grad_norm": 0.09653299301862717, "learning_rate": 0.01, "loss": 1.9955, "step": 33612 }, { "epoch": 3.4540690505548706, "grad_norm": 0.06416913866996765, "learning_rate": 0.01, "loss": 1.9858, "step": 33615 }, { "epoch": 3.4543773119605428, "grad_norm": 0.07932529598474503, "learning_rate": 0.01, "loss": 1.9941, "step": 33618 }, { "epoch": 3.4546855733662145, "grad_norm": 0.06251846253871918, "learning_rate": 0.01, "loss": 1.984, "step": 33621 }, { "epoch": 3.4549938347718867, "grad_norm": 0.07022767513990402, "learning_rate": 0.01, "loss": 2.0032, "step": 33624 }, { "epoch": 3.4553020961775585, "grad_norm": 0.08116226643323898, "learning_rate": 0.01, "loss": 2.0229, "step": 33627 }, { "epoch": 3.4556103575832307, "grad_norm": 0.07052188366651535, "learning_rate": 0.01, "loss": 1.986, "step": 33630 }, { "epoch": 3.4559186189889024, "grad_norm": 0.055427566170692444, "learning_rate": 0.01, "loss": 1.9792, "step": 33633 }, { "epoch": 3.4562268803945746, "grad_norm": 0.049462925642728806, "learning_rate": 0.01, "loss": 1.9937, "step": 33636 }, { "epoch": 3.456535141800247, "grad_norm": 0.05117397755384445, "learning_rate": 0.01, "loss": 2.0058, "step": 33639 }, { "epoch": 3.4568434032059185, "grad_norm": 0.14244894683361053, "learning_rate": 0.01, "loss": 1.9981, "step": 33642 }, { "epoch": 3.4571516646115907, "grad_norm": 0.040531981736421585, "learning_rate": 0.01, "loss": 2.0291, "step": 33645 }, { "epoch": 3.4574599260172625, "grad_norm": 0.0898265540599823, "learning_rate": 0.01, "loss": 2.0103, "step": 33648 }, { "epoch": 3.4577681874229347, "grad_norm": 0.06414288282394409, "learning_rate": 0.01, "loss": 1.9944, "step": 33651 }, { "epoch": 3.4580764488286064, "grad_norm": 0.06362918019294739, "learning_rate": 0.01, "loss": 2.031, "step": 33654 }, { "epoch": 3.4583847102342786, "grad_norm": 0.0881992056965828, "learning_rate": 0.01, "loss": 2.0363, "step": 33657 }, { "epoch": 3.458692971639951, "grad_norm": 0.03909778594970703, "learning_rate": 0.01, "loss": 1.9831, "step": 33660 }, { "epoch": 3.4590012330456226, "grad_norm": 0.06597696989774704, "learning_rate": 0.01, "loss": 2.0153, "step": 33663 }, { "epoch": 3.4593094944512948, "grad_norm": 0.05599299073219299, "learning_rate": 0.01, "loss": 2.0203, "step": 33666 }, { "epoch": 3.4596177558569665, "grad_norm": 0.0734795406460762, "learning_rate": 0.01, "loss": 2.0126, "step": 33669 }, { "epoch": 3.4599260172626387, "grad_norm": 0.07773378491401672, "learning_rate": 0.01, "loss": 1.9858, "step": 33672 }, { "epoch": 3.460234278668311, "grad_norm": 0.04273884743452072, "learning_rate": 0.01, "loss": 2.0054, "step": 33675 }, { "epoch": 3.4605425400739827, "grad_norm": 0.08914119005203247, "learning_rate": 0.01, "loss": 2.0455, "step": 33678 }, { "epoch": 3.460850801479655, "grad_norm": 0.059121765196323395, "learning_rate": 0.01, "loss": 1.9822, "step": 33681 }, { "epoch": 3.4611590628853266, "grad_norm": 0.0828641727566719, "learning_rate": 0.01, "loss": 1.9946, "step": 33684 }, { "epoch": 3.461467324290999, "grad_norm": 0.07057880610227585, "learning_rate": 0.01, "loss": 1.9918, "step": 33687 }, { "epoch": 3.461775585696671, "grad_norm": 0.0789676234126091, "learning_rate": 0.01, "loss": 2.005, "step": 33690 }, { "epoch": 3.4620838471023427, "grad_norm": 0.06654086709022522, "learning_rate": 0.01, "loss": 1.9751, "step": 33693 }, { "epoch": 3.462392108508015, "grad_norm": 0.08804110437631607, "learning_rate": 0.01, "loss": 1.9984, "step": 33696 }, { "epoch": 3.4627003699136867, "grad_norm": 0.05654985085129738, "learning_rate": 0.01, "loss": 1.9957, "step": 33699 }, { "epoch": 3.463008631319359, "grad_norm": 0.03474681079387665, "learning_rate": 0.01, "loss": 1.9757, "step": 33702 }, { "epoch": 3.4633168927250306, "grad_norm": 0.03495550900697708, "learning_rate": 0.01, "loss": 1.985, "step": 33705 }, { "epoch": 3.463625154130703, "grad_norm": 0.07207003980875015, "learning_rate": 0.01, "loss": 2.0038, "step": 33708 }, { "epoch": 3.463933415536375, "grad_norm": 0.10733482986688614, "learning_rate": 0.01, "loss": 1.998, "step": 33711 }, { "epoch": 3.4642416769420468, "grad_norm": 0.17830905318260193, "learning_rate": 0.01, "loss": 2.0263, "step": 33714 }, { "epoch": 3.464549938347719, "grad_norm": 0.10432233661413193, "learning_rate": 0.01, "loss": 2.0258, "step": 33717 }, { "epoch": 3.4648581997533907, "grad_norm": 0.08804329484701157, "learning_rate": 0.01, "loss": 2.0208, "step": 33720 }, { "epoch": 3.465166461159063, "grad_norm": 0.09405447542667389, "learning_rate": 0.01, "loss": 2.0228, "step": 33723 }, { "epoch": 3.4654747225647347, "grad_norm": 0.08153831958770752, "learning_rate": 0.01, "loss": 2.0129, "step": 33726 }, { "epoch": 3.465782983970407, "grad_norm": 0.04865524545311928, "learning_rate": 0.01, "loss": 2.0067, "step": 33729 }, { "epoch": 3.466091245376079, "grad_norm": 0.0705978274345398, "learning_rate": 0.01, "loss": 2.005, "step": 33732 }, { "epoch": 3.466399506781751, "grad_norm": 0.08283443003892899, "learning_rate": 0.01, "loss": 1.9943, "step": 33735 }, { "epoch": 3.466707768187423, "grad_norm": 0.059983205050230026, "learning_rate": 0.01, "loss": 1.9991, "step": 33738 }, { "epoch": 3.467016029593095, "grad_norm": 0.045960474759340286, "learning_rate": 0.01, "loss": 1.9823, "step": 33741 }, { "epoch": 3.467324290998767, "grad_norm": 0.03882883861660957, "learning_rate": 0.01, "loss": 2.0191, "step": 33744 }, { "epoch": 3.467632552404439, "grad_norm": 0.07004483044147491, "learning_rate": 0.01, "loss": 1.9851, "step": 33747 }, { "epoch": 3.467940813810111, "grad_norm": 0.047444459050893784, "learning_rate": 0.01, "loss": 2.0257, "step": 33750 }, { "epoch": 3.468249075215783, "grad_norm": 0.04262397438287735, "learning_rate": 0.01, "loss": 1.9981, "step": 33753 }, { "epoch": 3.468557336621455, "grad_norm": 0.05599913001060486, "learning_rate": 0.01, "loss": 2.005, "step": 33756 }, { "epoch": 3.468865598027127, "grad_norm": 0.03917532414197922, "learning_rate": 0.01, "loss": 2.012, "step": 33759 }, { "epoch": 3.469173859432799, "grad_norm": 0.14925096929073334, "learning_rate": 0.01, "loss": 1.9998, "step": 33762 }, { "epoch": 3.469482120838471, "grad_norm": 0.09570999443531036, "learning_rate": 0.01, "loss": 1.987, "step": 33765 }, { "epoch": 3.469790382244143, "grad_norm": 0.07883327454328537, "learning_rate": 0.01, "loss": 2.0051, "step": 33768 }, { "epoch": 3.470098643649815, "grad_norm": 0.06480975449085236, "learning_rate": 0.01, "loss": 2.0074, "step": 33771 }, { "epoch": 3.470406905055487, "grad_norm": 0.054496169090270996, "learning_rate": 0.01, "loss": 2.0306, "step": 33774 }, { "epoch": 3.470715166461159, "grad_norm": 0.04248659685254097, "learning_rate": 0.01, "loss": 2.0213, "step": 33777 }, { "epoch": 3.471023427866831, "grad_norm": 0.04117753356695175, "learning_rate": 0.01, "loss": 1.9845, "step": 33780 }, { "epoch": 3.4713316892725032, "grad_norm": 0.06876087188720703, "learning_rate": 0.01, "loss": 2.0142, "step": 33783 }, { "epoch": 3.471639950678175, "grad_norm": 0.08680430799722672, "learning_rate": 0.01, "loss": 2.0103, "step": 33786 }, { "epoch": 3.471948212083847, "grad_norm": 0.07227423787117004, "learning_rate": 0.01, "loss": 2.0229, "step": 33789 }, { "epoch": 3.472256473489519, "grad_norm": 0.04946593940258026, "learning_rate": 0.01, "loss": 2.0042, "step": 33792 }, { "epoch": 3.472564734895191, "grad_norm": 0.05215618014335632, "learning_rate": 0.01, "loss": 2.0202, "step": 33795 }, { "epoch": 3.4728729963008633, "grad_norm": 0.044869258999824524, "learning_rate": 0.01, "loss": 2.014, "step": 33798 }, { "epoch": 3.473181257706535, "grad_norm": 0.048952534794807434, "learning_rate": 0.01, "loss": 2.0097, "step": 33801 }, { "epoch": 3.4734895191122073, "grad_norm": 0.031407974660396576, "learning_rate": 0.01, "loss": 1.9993, "step": 33804 }, { "epoch": 3.473797780517879, "grad_norm": 0.033864185214042664, "learning_rate": 0.01, "loss": 2.0037, "step": 33807 }, { "epoch": 3.474106041923551, "grad_norm": 0.08060206472873688, "learning_rate": 0.01, "loss": 2.0053, "step": 33810 }, { "epoch": 3.4744143033292234, "grad_norm": 0.059409331530332565, "learning_rate": 0.01, "loss": 1.9876, "step": 33813 }, { "epoch": 3.474722564734895, "grad_norm": 0.04131161794066429, "learning_rate": 0.01, "loss": 1.9964, "step": 33816 }, { "epoch": 3.4750308261405674, "grad_norm": 0.04256697744131088, "learning_rate": 0.01, "loss": 2.0479, "step": 33819 }, { "epoch": 3.475339087546239, "grad_norm": 0.08916765451431274, "learning_rate": 0.01, "loss": 1.9885, "step": 33822 }, { "epoch": 3.4756473489519113, "grad_norm": 0.08474129438400269, "learning_rate": 0.01, "loss": 2.0012, "step": 33825 }, { "epoch": 3.475955610357583, "grad_norm": 0.05649823695421219, "learning_rate": 0.01, "loss": 1.9993, "step": 33828 }, { "epoch": 3.4762638717632552, "grad_norm": 0.05935325473546982, "learning_rate": 0.01, "loss": 2.0004, "step": 33831 }, { "epoch": 3.4765721331689274, "grad_norm": 0.04731081798672676, "learning_rate": 0.01, "loss": 1.9693, "step": 33834 }, { "epoch": 3.476880394574599, "grad_norm": 0.03790346160531044, "learning_rate": 0.01, "loss": 2.0295, "step": 33837 }, { "epoch": 3.4771886559802714, "grad_norm": 0.03726886212825775, "learning_rate": 0.01, "loss": 1.9737, "step": 33840 }, { "epoch": 3.477496917385943, "grad_norm": 0.07210247963666916, "learning_rate": 0.01, "loss": 1.9936, "step": 33843 }, { "epoch": 3.4778051787916153, "grad_norm": 0.07953071594238281, "learning_rate": 0.01, "loss": 2.0269, "step": 33846 }, { "epoch": 3.478113440197287, "grad_norm": 0.08004479855298996, "learning_rate": 0.01, "loss": 1.9947, "step": 33849 }, { "epoch": 3.4784217016029593, "grad_norm": 0.12617255747318268, "learning_rate": 0.01, "loss": 2.041, "step": 33852 }, { "epoch": 3.4787299630086315, "grad_norm": 0.08742248266935349, "learning_rate": 0.01, "loss": 1.9946, "step": 33855 }, { "epoch": 3.479038224414303, "grad_norm": 0.07119353115558624, "learning_rate": 0.01, "loss": 2.0041, "step": 33858 }, { "epoch": 3.4793464858199754, "grad_norm": 0.06368902325630188, "learning_rate": 0.01, "loss": 2.011, "step": 33861 }, { "epoch": 3.479654747225647, "grad_norm": 0.04121660441160202, "learning_rate": 0.01, "loss": 1.9902, "step": 33864 }, { "epoch": 3.4799630086313194, "grad_norm": 0.1237914115190506, "learning_rate": 0.01, "loss": 2.012, "step": 33867 }, { "epoch": 3.4802712700369915, "grad_norm": 0.02918284945189953, "learning_rate": 0.01, "loss": 2.0104, "step": 33870 }, { "epoch": 3.4805795314426633, "grad_norm": 0.0423772819340229, "learning_rate": 0.01, "loss": 2.0024, "step": 33873 }, { "epoch": 3.4808877928483355, "grad_norm": 0.05689895898103714, "learning_rate": 0.01, "loss": 2.0074, "step": 33876 }, { "epoch": 3.4811960542540072, "grad_norm": 0.057178955525159836, "learning_rate": 0.01, "loss": 1.9796, "step": 33879 }, { "epoch": 3.4815043156596794, "grad_norm": 0.04840772971510887, "learning_rate": 0.01, "loss": 1.9846, "step": 33882 }, { "epoch": 3.4818125770653516, "grad_norm": 0.0467451848089695, "learning_rate": 0.01, "loss": 2.0124, "step": 33885 }, { "epoch": 3.4821208384710234, "grad_norm": 0.08849336951971054, "learning_rate": 0.01, "loss": 2.0056, "step": 33888 }, { "epoch": 3.4824290998766956, "grad_norm": 0.040159258991479874, "learning_rate": 0.01, "loss": 2.0074, "step": 33891 }, { "epoch": 3.4827373612823673, "grad_norm": 0.03237557038664818, "learning_rate": 0.01, "loss": 1.9885, "step": 33894 }, { "epoch": 3.4830456226880395, "grad_norm": 0.11882402747869492, "learning_rate": 0.01, "loss": 1.988, "step": 33897 }, { "epoch": 3.4833538840937113, "grad_norm": 0.04206301271915436, "learning_rate": 0.01, "loss": 2.0138, "step": 33900 }, { "epoch": 3.4836621454993835, "grad_norm": 0.08875782787799835, "learning_rate": 0.01, "loss": 2.0009, "step": 33903 }, { "epoch": 3.4839704069050557, "grad_norm": 0.07771812379360199, "learning_rate": 0.01, "loss": 1.9787, "step": 33906 }, { "epoch": 3.4842786683107274, "grad_norm": 0.0839293822646141, "learning_rate": 0.01, "loss": 2.0191, "step": 33909 }, { "epoch": 3.4845869297163996, "grad_norm": 0.051187288016080856, "learning_rate": 0.01, "loss": 2.0112, "step": 33912 }, { "epoch": 3.4848951911220714, "grad_norm": 0.07497724145650864, "learning_rate": 0.01, "loss": 2.0153, "step": 33915 }, { "epoch": 3.4852034525277436, "grad_norm": 0.09341751039028168, "learning_rate": 0.01, "loss": 1.9815, "step": 33918 }, { "epoch": 3.4855117139334153, "grad_norm": 0.17380410432815552, "learning_rate": 0.01, "loss": 2.0015, "step": 33921 }, { "epoch": 3.4858199753390875, "grad_norm": 0.1407613456249237, "learning_rate": 0.01, "loss": 2.0066, "step": 33924 }, { "epoch": 3.4861282367447597, "grad_norm": 0.04020331799983978, "learning_rate": 0.01, "loss": 1.9915, "step": 33927 }, { "epoch": 3.4864364981504314, "grad_norm": 0.07008705288171768, "learning_rate": 0.01, "loss": 2.0045, "step": 33930 }, { "epoch": 3.4867447595561036, "grad_norm": 0.04310291260480881, "learning_rate": 0.01, "loss": 2.0103, "step": 33933 }, { "epoch": 3.487053020961776, "grad_norm": 0.03993843123316765, "learning_rate": 0.01, "loss": 2.0051, "step": 33936 }, { "epoch": 3.4873612823674476, "grad_norm": 0.04465002939105034, "learning_rate": 0.01, "loss": 2.0089, "step": 33939 }, { "epoch": 3.4876695437731198, "grad_norm": 0.03715788200497627, "learning_rate": 0.01, "loss": 1.9676, "step": 33942 }, { "epoch": 3.4879778051787915, "grad_norm": 0.06855741888284683, "learning_rate": 0.01, "loss": 2.0064, "step": 33945 }, { "epoch": 3.4882860665844637, "grad_norm": 0.05004393681883812, "learning_rate": 0.01, "loss": 2.0027, "step": 33948 }, { "epoch": 3.4885943279901355, "grad_norm": 0.11410415172576904, "learning_rate": 0.01, "loss": 1.9663, "step": 33951 }, { "epoch": 3.4889025893958077, "grad_norm": 0.09844734519720078, "learning_rate": 0.01, "loss": 1.9873, "step": 33954 }, { "epoch": 3.48921085080148, "grad_norm": 0.06708649545907974, "learning_rate": 0.01, "loss": 2.0026, "step": 33957 }, { "epoch": 3.4895191122071516, "grad_norm": 0.10117511451244354, "learning_rate": 0.01, "loss": 2.019, "step": 33960 }, { "epoch": 3.489827373612824, "grad_norm": 0.09758836030960083, "learning_rate": 0.01, "loss": 2.0272, "step": 33963 }, { "epoch": 3.4901356350184956, "grad_norm": 0.05189559981226921, "learning_rate": 0.01, "loss": 2.0107, "step": 33966 }, { "epoch": 3.4904438964241677, "grad_norm": 0.04805564507842064, "learning_rate": 0.01, "loss": 1.9984, "step": 33969 }, { "epoch": 3.4907521578298395, "grad_norm": 0.10716047137975693, "learning_rate": 0.01, "loss": 2.0004, "step": 33972 }, { "epoch": 3.4910604192355117, "grad_norm": 0.0761241689324379, "learning_rate": 0.01, "loss": 1.994, "step": 33975 }, { "epoch": 3.491368680641184, "grad_norm": 0.09611841291189194, "learning_rate": 0.01, "loss": 2.0152, "step": 33978 }, { "epoch": 3.4916769420468556, "grad_norm": 0.05685526877641678, "learning_rate": 0.01, "loss": 2.0061, "step": 33981 }, { "epoch": 3.491985203452528, "grad_norm": 0.044158194214105606, "learning_rate": 0.01, "loss": 1.9759, "step": 33984 }, { "epoch": 3.4922934648581996, "grad_norm": 0.0626639872789383, "learning_rate": 0.01, "loss": 2.0238, "step": 33987 }, { "epoch": 3.4926017262638718, "grad_norm": 0.0559966154396534, "learning_rate": 0.01, "loss": 2.0002, "step": 33990 }, { "epoch": 3.4929099876695435, "grad_norm": 0.05526448041200638, "learning_rate": 0.01, "loss": 2.0161, "step": 33993 }, { "epoch": 3.4932182490752157, "grad_norm": 0.07910171896219254, "learning_rate": 0.01, "loss": 2.0098, "step": 33996 }, { "epoch": 3.493526510480888, "grad_norm": 0.09646596759557724, "learning_rate": 0.01, "loss": 2.0161, "step": 33999 }, { "epoch": 3.4938347718865597, "grad_norm": 0.06635434925556183, "learning_rate": 0.01, "loss": 2.0271, "step": 34002 }, { "epoch": 3.494143033292232, "grad_norm": 0.04328935965895653, "learning_rate": 0.01, "loss": 2.0079, "step": 34005 }, { "epoch": 3.494451294697904, "grad_norm": 0.040060825645923615, "learning_rate": 0.01, "loss": 2.0076, "step": 34008 }, { "epoch": 3.494759556103576, "grad_norm": 0.0396769754588604, "learning_rate": 0.01, "loss": 2.0319, "step": 34011 }, { "epoch": 3.495067817509248, "grad_norm": 0.035870879888534546, "learning_rate": 0.01, "loss": 2.0013, "step": 34014 }, { "epoch": 3.4953760789149197, "grad_norm": 0.07099170982837677, "learning_rate": 0.01, "loss": 2.0024, "step": 34017 }, { "epoch": 3.495684340320592, "grad_norm": 0.06994140148162842, "learning_rate": 0.01, "loss": 2.009, "step": 34020 }, { "epoch": 3.4959926017262637, "grad_norm": 0.03673701733350754, "learning_rate": 0.01, "loss": 2.0091, "step": 34023 }, { "epoch": 3.496300863131936, "grad_norm": 0.036696165800094604, "learning_rate": 0.01, "loss": 2.0196, "step": 34026 }, { "epoch": 3.496609124537608, "grad_norm": 0.1089354008436203, "learning_rate": 0.01, "loss": 2.0083, "step": 34029 }, { "epoch": 3.49691738594328, "grad_norm": 0.04758044332265854, "learning_rate": 0.01, "loss": 1.9975, "step": 34032 }, { "epoch": 3.497225647348952, "grad_norm": 0.05122329294681549, "learning_rate": 0.01, "loss": 1.9944, "step": 34035 }, { "epoch": 3.4975339087546238, "grad_norm": 0.04168769717216492, "learning_rate": 0.01, "loss": 2.013, "step": 34038 }, { "epoch": 3.497842170160296, "grad_norm": 0.03426910564303398, "learning_rate": 0.01, "loss": 2.0217, "step": 34041 }, { "epoch": 3.4981504315659677, "grad_norm": 0.036272600293159485, "learning_rate": 0.01, "loss": 2.0134, "step": 34044 }, { "epoch": 3.49845869297164, "grad_norm": 0.11142997443675995, "learning_rate": 0.01, "loss": 2.0006, "step": 34047 }, { "epoch": 3.498766954377312, "grad_norm": 0.08410037308931351, "learning_rate": 0.01, "loss": 2.0274, "step": 34050 }, { "epoch": 3.499075215782984, "grad_norm": 0.10037896782159805, "learning_rate": 0.01, "loss": 2.0187, "step": 34053 }, { "epoch": 3.499383477188656, "grad_norm": 0.05194736644625664, "learning_rate": 0.01, "loss": 1.9862, "step": 34056 }, { "epoch": 3.499691738594328, "grad_norm": 0.037128567695617676, "learning_rate": 0.01, "loss": 1.9806, "step": 34059 }, { "epoch": 3.5, "grad_norm": 0.059042248874902725, "learning_rate": 0.01, "loss": 2.0134, "step": 34062 }, { "epoch": 3.5003082614056718, "grad_norm": 0.06282515078783035, "learning_rate": 0.01, "loss": 1.9964, "step": 34065 }, { "epoch": 3.500616522811344, "grad_norm": 0.04248201847076416, "learning_rate": 0.01, "loss": 2.0045, "step": 34068 }, { "epoch": 3.500924784217016, "grad_norm": 0.047459498047828674, "learning_rate": 0.01, "loss": 2.0171, "step": 34071 }, { "epoch": 3.501233045622688, "grad_norm": 0.04447497799992561, "learning_rate": 0.01, "loss": 2.0263, "step": 34074 }, { "epoch": 3.50154130702836, "grad_norm": 0.03569423034787178, "learning_rate": 0.01, "loss": 1.9867, "step": 34077 }, { "epoch": 3.5018495684340323, "grad_norm": 0.055864010006189346, "learning_rate": 0.01, "loss": 2.0025, "step": 34080 }, { "epoch": 3.502157829839704, "grad_norm": 0.055652111768722534, "learning_rate": 0.01, "loss": 1.9972, "step": 34083 }, { "epoch": 3.5024660912453762, "grad_norm": 0.1195710226893425, "learning_rate": 0.01, "loss": 1.9942, "step": 34086 }, { "epoch": 3.502774352651048, "grad_norm": 0.04436422884464264, "learning_rate": 0.01, "loss": 2.0008, "step": 34089 }, { "epoch": 3.50308261405672, "grad_norm": 0.047165922820568085, "learning_rate": 0.01, "loss": 2.0211, "step": 34092 }, { "epoch": 3.503390875462392, "grad_norm": 0.07143979519605637, "learning_rate": 0.01, "loss": 2.0108, "step": 34095 }, { "epoch": 3.503699136868064, "grad_norm": 0.04734091833233833, "learning_rate": 0.01, "loss": 2.0183, "step": 34098 }, { "epoch": 3.5040073982737363, "grad_norm": 0.05058762803673744, "learning_rate": 0.01, "loss": 1.9887, "step": 34101 }, { "epoch": 3.504315659679408, "grad_norm": 0.03917768597602844, "learning_rate": 0.01, "loss": 2.0052, "step": 34104 }, { "epoch": 3.5046239210850803, "grad_norm": 0.06384671479463577, "learning_rate": 0.01, "loss": 2.0188, "step": 34107 }, { "epoch": 3.504932182490752, "grad_norm": 0.046532079577445984, "learning_rate": 0.01, "loss": 1.9841, "step": 34110 }, { "epoch": 3.505240443896424, "grad_norm": 0.033960457891225815, "learning_rate": 0.01, "loss": 2.0045, "step": 34113 }, { "epoch": 3.505548705302096, "grad_norm": 0.061024975031614304, "learning_rate": 0.01, "loss": 2.0121, "step": 34116 }, { "epoch": 3.505856966707768, "grad_norm": 0.09465770423412323, "learning_rate": 0.01, "loss": 2.0043, "step": 34119 }, { "epoch": 3.5061652281134403, "grad_norm": 0.08510823547840118, "learning_rate": 0.01, "loss": 2.0116, "step": 34122 }, { "epoch": 3.506473489519112, "grad_norm": 0.12201209366321564, "learning_rate": 0.01, "loss": 2.0137, "step": 34125 }, { "epoch": 3.5067817509247843, "grad_norm": 0.08503463119268417, "learning_rate": 0.01, "loss": 2.0055, "step": 34128 }, { "epoch": 3.5070900123304565, "grad_norm": 0.07231762260198593, "learning_rate": 0.01, "loss": 1.9907, "step": 34131 }, { "epoch": 3.5073982737361282, "grad_norm": 0.042714521288871765, "learning_rate": 0.01, "loss": 2.0132, "step": 34134 }, { "epoch": 3.5077065351418, "grad_norm": 0.06119026616215706, "learning_rate": 0.01, "loss": 2.024, "step": 34137 }, { "epoch": 3.508014796547472, "grad_norm": 0.047117751091718674, "learning_rate": 0.01, "loss": 1.9938, "step": 34140 }, { "epoch": 3.5083230579531444, "grad_norm": 0.06318202614784241, "learning_rate": 0.01, "loss": 1.9826, "step": 34143 }, { "epoch": 3.508631319358816, "grad_norm": 0.05050423741340637, "learning_rate": 0.01, "loss": 1.9882, "step": 34146 }, { "epoch": 3.5089395807644883, "grad_norm": 0.06105552241206169, "learning_rate": 0.01, "loss": 1.9882, "step": 34149 }, { "epoch": 3.5092478421701605, "grad_norm": 0.05940508469939232, "learning_rate": 0.01, "loss": 2.0127, "step": 34152 }, { "epoch": 3.5095561035758323, "grad_norm": 0.05157890543341637, "learning_rate": 0.01, "loss": 1.9999, "step": 34155 }, { "epoch": 3.5098643649815044, "grad_norm": 0.06597436964511871, "learning_rate": 0.01, "loss": 2.0054, "step": 34158 }, { "epoch": 3.510172626387176, "grad_norm": 0.10424167662858963, "learning_rate": 0.01, "loss": 1.9899, "step": 34161 }, { "epoch": 3.5104808877928484, "grad_norm": 0.09572573006153107, "learning_rate": 0.01, "loss": 2.0132, "step": 34164 }, { "epoch": 3.51078914919852, "grad_norm": 0.07485774159431458, "learning_rate": 0.01, "loss": 2.0063, "step": 34167 }, { "epoch": 3.5110974106041923, "grad_norm": 0.12162437289953232, "learning_rate": 0.01, "loss": 2.0342, "step": 34170 }, { "epoch": 3.5114056720098645, "grad_norm": 0.09338829666376114, "learning_rate": 0.01, "loss": 2.0366, "step": 34173 }, { "epoch": 3.5117139334155363, "grad_norm": 0.06095868721604347, "learning_rate": 0.01, "loss": 2.0202, "step": 34176 }, { "epoch": 3.5120221948212085, "grad_norm": 0.057355985045433044, "learning_rate": 0.01, "loss": 2.0216, "step": 34179 }, { "epoch": 3.5123304562268807, "grad_norm": 0.10891444981098175, "learning_rate": 0.01, "loss": 2.0339, "step": 34182 }, { "epoch": 3.5126387176325524, "grad_norm": 0.10567829012870789, "learning_rate": 0.01, "loss": 2.0142, "step": 34185 }, { "epoch": 3.512946979038224, "grad_norm": 0.05244187265634537, "learning_rate": 0.01, "loss": 1.9864, "step": 34188 }, { "epoch": 3.5132552404438964, "grad_norm": 0.046076592057943344, "learning_rate": 0.01, "loss": 2.0013, "step": 34191 }, { "epoch": 3.5135635018495686, "grad_norm": 0.04405316337943077, "learning_rate": 0.01, "loss": 1.9941, "step": 34194 }, { "epoch": 3.5138717632552403, "grad_norm": 0.03997344523668289, "learning_rate": 0.01, "loss": 1.9915, "step": 34197 }, { "epoch": 3.5141800246609125, "grad_norm": 0.03909468650817871, "learning_rate": 0.01, "loss": 1.9767, "step": 34200 }, { "epoch": 3.5144882860665847, "grad_norm": 0.11657961457967758, "learning_rate": 0.01, "loss": 1.9941, "step": 34203 }, { "epoch": 3.5147965474722564, "grad_norm": 0.11744063347578049, "learning_rate": 0.01, "loss": 2.0029, "step": 34206 }, { "epoch": 3.5151048088779286, "grad_norm": 0.039932142943143845, "learning_rate": 0.01, "loss": 2.0168, "step": 34209 }, { "epoch": 3.5154130702836004, "grad_norm": 0.03299909457564354, "learning_rate": 0.01, "loss": 2.022, "step": 34212 }, { "epoch": 3.5157213316892726, "grad_norm": 0.1055513322353363, "learning_rate": 0.01, "loss": 2.0165, "step": 34215 }, { "epoch": 3.5160295930949443, "grad_norm": 0.05073374882340431, "learning_rate": 0.01, "loss": 1.9951, "step": 34218 }, { "epoch": 3.5163378545006165, "grad_norm": 0.05423841252923012, "learning_rate": 0.01, "loss": 2.0115, "step": 34221 }, { "epoch": 3.5166461159062887, "grad_norm": 0.05695211887359619, "learning_rate": 0.01, "loss": 2.011, "step": 34224 }, { "epoch": 3.5169543773119605, "grad_norm": 0.04092913866043091, "learning_rate": 0.01, "loss": 2.0303, "step": 34227 }, { "epoch": 3.5172626387176327, "grad_norm": 0.05083661898970604, "learning_rate": 0.01, "loss": 2.0081, "step": 34230 }, { "epoch": 3.5175709001233044, "grad_norm": 0.04499472677707672, "learning_rate": 0.01, "loss": 2.0036, "step": 34233 }, { "epoch": 3.5178791615289766, "grad_norm": 0.09770061075687408, "learning_rate": 0.01, "loss": 2.0209, "step": 34236 }, { "epoch": 3.5181874229346484, "grad_norm": 0.09951679408550262, "learning_rate": 0.01, "loss": 1.9993, "step": 34239 }, { "epoch": 3.5184956843403206, "grad_norm": 0.11089123040437698, "learning_rate": 0.01, "loss": 2.0088, "step": 34242 }, { "epoch": 3.5188039457459928, "grad_norm": 0.05511726066470146, "learning_rate": 0.01, "loss": 2.0129, "step": 34245 }, { "epoch": 3.5191122071516645, "grad_norm": 0.035139378160238266, "learning_rate": 0.01, "loss": 2.016, "step": 34248 }, { "epoch": 3.5194204685573367, "grad_norm": 0.05882834270596504, "learning_rate": 0.01, "loss": 2.0061, "step": 34251 }, { "epoch": 3.519728729963009, "grad_norm": 0.06780868023633957, "learning_rate": 0.01, "loss": 1.977, "step": 34254 }, { "epoch": 3.5200369913686806, "grad_norm": 0.053879499435424805, "learning_rate": 0.01, "loss": 2.0055, "step": 34257 }, { "epoch": 3.5203452527743524, "grad_norm": 0.045473113656044006, "learning_rate": 0.01, "loss": 2.0108, "step": 34260 }, { "epoch": 3.5206535141800246, "grad_norm": 0.050460174679756165, "learning_rate": 0.01, "loss": 2.0327, "step": 34263 }, { "epoch": 3.520961775585697, "grad_norm": 0.040959011763334274, "learning_rate": 0.01, "loss": 1.9772, "step": 34266 }, { "epoch": 3.5212700369913685, "grad_norm": 0.048418451100587845, "learning_rate": 0.01, "loss": 2.0182, "step": 34269 }, { "epoch": 3.5215782983970407, "grad_norm": 0.06287720799446106, "learning_rate": 0.01, "loss": 2.0014, "step": 34272 }, { "epoch": 3.521886559802713, "grad_norm": 0.04529783874750137, "learning_rate": 0.01, "loss": 2.0011, "step": 34275 }, { "epoch": 3.5221948212083847, "grad_norm": 0.03729906305670738, "learning_rate": 0.01, "loss": 2.0183, "step": 34278 }, { "epoch": 3.522503082614057, "grad_norm": 0.1012713834643364, "learning_rate": 0.01, "loss": 1.9841, "step": 34281 }, { "epoch": 3.5228113440197286, "grad_norm": 0.10641775280237198, "learning_rate": 0.01, "loss": 2.0122, "step": 34284 }, { "epoch": 3.523119605425401, "grad_norm": 0.09289912134408951, "learning_rate": 0.01, "loss": 2.016, "step": 34287 }, { "epoch": 3.5234278668310726, "grad_norm": 0.13354292511940002, "learning_rate": 0.01, "loss": 2.0119, "step": 34290 }, { "epoch": 3.5237361282367448, "grad_norm": 0.08027501404285431, "learning_rate": 0.01, "loss": 2.0046, "step": 34293 }, { "epoch": 3.524044389642417, "grad_norm": 0.047286976128816605, "learning_rate": 0.01, "loss": 1.9796, "step": 34296 }, { "epoch": 3.5243526510480887, "grad_norm": 0.042211420834064484, "learning_rate": 0.01, "loss": 2.0426, "step": 34299 }, { "epoch": 3.524660912453761, "grad_norm": 0.11374162137508392, "learning_rate": 0.01, "loss": 1.9954, "step": 34302 }, { "epoch": 3.5249691738594326, "grad_norm": 0.05604710429906845, "learning_rate": 0.01, "loss": 2.0039, "step": 34305 }, { "epoch": 3.525277435265105, "grad_norm": 0.09744080156087875, "learning_rate": 0.01, "loss": 1.9944, "step": 34308 }, { "epoch": 3.5255856966707766, "grad_norm": 0.08903683722019196, "learning_rate": 0.01, "loss": 1.9931, "step": 34311 }, { "epoch": 3.525893958076449, "grad_norm": 0.048648543655872345, "learning_rate": 0.01, "loss": 2.0028, "step": 34314 }, { "epoch": 3.526202219482121, "grad_norm": 0.0710827112197876, "learning_rate": 0.01, "loss": 2.0168, "step": 34317 }, { "epoch": 3.5265104808877927, "grad_norm": 0.04736728593707085, "learning_rate": 0.01, "loss": 1.9812, "step": 34320 }, { "epoch": 3.526818742293465, "grad_norm": 0.05635381117463112, "learning_rate": 0.01, "loss": 2.0202, "step": 34323 }, { "epoch": 3.527127003699137, "grad_norm": 0.06686391681432724, "learning_rate": 0.01, "loss": 2.0196, "step": 34326 }, { "epoch": 3.527435265104809, "grad_norm": 0.03842944651842117, "learning_rate": 0.01, "loss": 1.9974, "step": 34329 }, { "epoch": 3.5277435265104806, "grad_norm": 0.042534928768873215, "learning_rate": 0.01, "loss": 2.0155, "step": 34332 }, { "epoch": 3.528051787916153, "grad_norm": 0.06037287786602974, "learning_rate": 0.01, "loss": 2.0192, "step": 34335 }, { "epoch": 3.528360049321825, "grad_norm": 0.06533370167016983, "learning_rate": 0.01, "loss": 1.9873, "step": 34338 }, { "epoch": 3.5286683107274968, "grad_norm": 0.0877017006278038, "learning_rate": 0.01, "loss": 2.0081, "step": 34341 }, { "epoch": 3.528976572133169, "grad_norm": 0.09864972531795502, "learning_rate": 0.01, "loss": 1.9866, "step": 34344 }, { "epoch": 3.529284833538841, "grad_norm": 0.049320898950099945, "learning_rate": 0.01, "loss": 1.9994, "step": 34347 }, { "epoch": 3.529593094944513, "grad_norm": 0.12996013462543488, "learning_rate": 0.01, "loss": 2.0136, "step": 34350 }, { "epoch": 3.529901356350185, "grad_norm": 0.06548713147640228, "learning_rate": 0.01, "loss": 2.0221, "step": 34353 }, { "epoch": 3.530209617755857, "grad_norm": 0.045482341200113297, "learning_rate": 0.01, "loss": 2.0062, "step": 34356 }, { "epoch": 3.530517879161529, "grad_norm": 0.04433637857437134, "learning_rate": 0.01, "loss": 2.0062, "step": 34359 }, { "epoch": 3.530826140567201, "grad_norm": 0.04450944438576698, "learning_rate": 0.01, "loss": 1.983, "step": 34362 }, { "epoch": 3.531134401972873, "grad_norm": 0.07188856601715088, "learning_rate": 0.01, "loss": 2.0426, "step": 34365 }, { "epoch": 3.531442663378545, "grad_norm": 0.049853190779685974, "learning_rate": 0.01, "loss": 1.9959, "step": 34368 }, { "epoch": 3.531750924784217, "grad_norm": 0.08662360161542892, "learning_rate": 0.01, "loss": 1.9886, "step": 34371 }, { "epoch": 3.532059186189889, "grad_norm": 0.07737040519714355, "learning_rate": 0.01, "loss": 2.0064, "step": 34374 }, { "epoch": 3.532367447595561, "grad_norm": 0.0868213102221489, "learning_rate": 0.01, "loss": 1.9836, "step": 34377 }, { "epoch": 3.532675709001233, "grad_norm": 0.06288056075572968, "learning_rate": 0.01, "loss": 2.0142, "step": 34380 }, { "epoch": 3.532983970406905, "grad_norm": 0.05537475645542145, "learning_rate": 0.01, "loss": 1.9937, "step": 34383 }, { "epoch": 3.533292231812577, "grad_norm": 0.13551141321659088, "learning_rate": 0.01, "loss": 2.0162, "step": 34386 }, { "epoch": 3.533600493218249, "grad_norm": 0.040236156433820724, "learning_rate": 0.01, "loss": 2.0006, "step": 34389 }, { "epoch": 3.533908754623921, "grad_norm": 0.06904727220535278, "learning_rate": 0.01, "loss": 1.9882, "step": 34392 }, { "epoch": 3.534217016029593, "grad_norm": 0.06675262004137039, "learning_rate": 0.01, "loss": 2.0018, "step": 34395 }, { "epoch": 3.5345252774352653, "grad_norm": 0.05011274665594101, "learning_rate": 0.01, "loss": 2.0267, "step": 34398 }, { "epoch": 3.534833538840937, "grad_norm": 0.04494976997375488, "learning_rate": 0.01, "loss": 2.0243, "step": 34401 }, { "epoch": 3.5351418002466093, "grad_norm": 0.04114719480276108, "learning_rate": 0.01, "loss": 2.0161, "step": 34404 }, { "epoch": 3.535450061652281, "grad_norm": 0.03544189780950546, "learning_rate": 0.01, "loss": 1.9987, "step": 34407 }, { "epoch": 3.5357583230579532, "grad_norm": 0.10223556309938431, "learning_rate": 0.01, "loss": 2.0237, "step": 34410 }, { "epoch": 3.536066584463625, "grad_norm": 0.03573578596115112, "learning_rate": 0.01, "loss": 2.0017, "step": 34413 }, { "epoch": 3.536374845869297, "grad_norm": 0.08199316263198853, "learning_rate": 0.01, "loss": 2.007, "step": 34416 }, { "epoch": 3.5366831072749694, "grad_norm": 0.04587673768401146, "learning_rate": 0.01, "loss": 2.01, "step": 34419 }, { "epoch": 3.536991368680641, "grad_norm": 0.061022888869047165, "learning_rate": 0.01, "loss": 1.9894, "step": 34422 }, { "epoch": 3.5372996300863133, "grad_norm": 0.09616536647081375, "learning_rate": 0.01, "loss": 2.0081, "step": 34425 }, { "epoch": 3.537607891491985, "grad_norm": 0.10521430522203445, "learning_rate": 0.01, "loss": 2.0187, "step": 34428 }, { "epoch": 3.5379161528976573, "grad_norm": 0.0551154688000679, "learning_rate": 0.01, "loss": 2.0034, "step": 34431 }, { "epoch": 3.538224414303329, "grad_norm": 0.0405128113925457, "learning_rate": 0.01, "loss": 2.0152, "step": 34434 }, { "epoch": 3.538532675709001, "grad_norm": 0.035197604447603226, "learning_rate": 0.01, "loss": 2.0158, "step": 34437 }, { "epoch": 3.5388409371146734, "grad_norm": 0.026394899934530258, "learning_rate": 0.01, "loss": 1.9898, "step": 34440 }, { "epoch": 3.539149198520345, "grad_norm": 0.09831299632787704, "learning_rate": 0.01, "loss": 2.0205, "step": 34443 }, { "epoch": 3.5394574599260173, "grad_norm": 0.08986491709947586, "learning_rate": 0.01, "loss": 2.023, "step": 34446 }, { "epoch": 3.5397657213316895, "grad_norm": 0.07974385470151901, "learning_rate": 0.01, "loss": 1.9975, "step": 34449 }, { "epoch": 3.5400739827373613, "grad_norm": 0.11314170062541962, "learning_rate": 0.01, "loss": 2.0045, "step": 34452 }, { "epoch": 3.540382244143033, "grad_norm": 0.05535215139389038, "learning_rate": 0.01, "loss": 2.0142, "step": 34455 }, { "epoch": 3.5406905055487052, "grad_norm": 0.0592242069542408, "learning_rate": 0.01, "loss": 2.0243, "step": 34458 }, { "epoch": 3.5409987669543774, "grad_norm": 0.12061761319637299, "learning_rate": 0.01, "loss": 2.0125, "step": 34461 }, { "epoch": 3.541307028360049, "grad_norm": 0.11443237960338593, "learning_rate": 0.01, "loss": 2.004, "step": 34464 }, { "epoch": 3.5416152897657214, "grad_norm": 0.050069622695446014, "learning_rate": 0.01, "loss": 2.013, "step": 34467 }, { "epoch": 3.5419235511713936, "grad_norm": 0.0419803261756897, "learning_rate": 0.01, "loss": 2.0141, "step": 34470 }, { "epoch": 3.5422318125770653, "grad_norm": 0.03493333235383034, "learning_rate": 0.01, "loss": 2.0106, "step": 34473 }, { "epoch": 3.5425400739827375, "grad_norm": 0.036889005452394485, "learning_rate": 0.01, "loss": 2.0085, "step": 34476 }, { "epoch": 3.5428483353884093, "grad_norm": 0.05217687413096428, "learning_rate": 0.01, "loss": 1.9987, "step": 34479 }, { "epoch": 3.5431565967940815, "grad_norm": 0.050754569470882416, "learning_rate": 0.01, "loss": 2.0333, "step": 34482 }, { "epoch": 3.543464858199753, "grad_norm": 0.04706338793039322, "learning_rate": 0.01, "loss": 1.9973, "step": 34485 }, { "epoch": 3.5437731196054254, "grad_norm": 0.07039839029312134, "learning_rate": 0.01, "loss": 2.0104, "step": 34488 }, { "epoch": 3.5440813810110976, "grad_norm": 0.07069671154022217, "learning_rate": 0.01, "loss": 1.978, "step": 34491 }, { "epoch": 3.5443896424167693, "grad_norm": 0.09058693796396255, "learning_rate": 0.01, "loss": 2.0212, "step": 34494 }, { "epoch": 3.5446979038224415, "grad_norm": 0.037666015326976776, "learning_rate": 0.01, "loss": 2.0312, "step": 34497 }, { "epoch": 3.5450061652281133, "grad_norm": 0.04604499414563179, "learning_rate": 0.01, "loss": 2.041, "step": 34500 }, { "epoch": 3.5453144266337855, "grad_norm": 0.03853873908519745, "learning_rate": 0.01, "loss": 2.0138, "step": 34503 }, { "epoch": 3.5456226880394572, "grad_norm": 0.07123208791017532, "learning_rate": 0.01, "loss": 2.0107, "step": 34506 }, { "epoch": 3.5459309494451294, "grad_norm": 0.0936029776930809, "learning_rate": 0.01, "loss": 2.0001, "step": 34509 }, { "epoch": 3.5462392108508016, "grad_norm": 0.07412207871675491, "learning_rate": 0.01, "loss": 2.0034, "step": 34512 }, { "epoch": 3.5465474722564734, "grad_norm": 0.07681329548358917, "learning_rate": 0.01, "loss": 1.9964, "step": 34515 }, { "epoch": 3.5468557336621456, "grad_norm": 0.10403033345937729, "learning_rate": 0.01, "loss": 2.0077, "step": 34518 }, { "epoch": 3.5471639950678178, "grad_norm": 0.14240513741970062, "learning_rate": 0.01, "loss": 1.9695, "step": 34521 }, { "epoch": 3.5474722564734895, "grad_norm": 0.14203394949436188, "learning_rate": 0.01, "loss": 2.0187, "step": 34524 }, { "epoch": 3.5477805178791613, "grad_norm": 0.09036475419998169, "learning_rate": 0.01, "loss": 2.0054, "step": 34527 }, { "epoch": 3.5480887792848335, "grad_norm": 0.037448760122060776, "learning_rate": 0.01, "loss": 1.9978, "step": 34530 }, { "epoch": 3.5483970406905057, "grad_norm": 0.05539664253592491, "learning_rate": 0.01, "loss": 2.0015, "step": 34533 }, { "epoch": 3.5487053020961774, "grad_norm": 0.05857717618346214, "learning_rate": 0.01, "loss": 1.9974, "step": 34536 }, { "epoch": 3.5490135635018496, "grad_norm": 0.06406868249177933, "learning_rate": 0.01, "loss": 2.009, "step": 34539 }, { "epoch": 3.549321824907522, "grad_norm": 0.04234686121344566, "learning_rate": 0.01, "loss": 2.0273, "step": 34542 }, { "epoch": 3.5496300863131935, "grad_norm": 0.038916390389204025, "learning_rate": 0.01, "loss": 2.0059, "step": 34545 }, { "epoch": 3.5499383477188657, "grad_norm": 0.039077602326869965, "learning_rate": 0.01, "loss": 2.0196, "step": 34548 }, { "epoch": 3.5502466091245375, "grad_norm": 0.03960174322128296, "learning_rate": 0.01, "loss": 2.0167, "step": 34551 }, { "epoch": 3.5505548705302097, "grad_norm": 0.12132997810840607, "learning_rate": 0.01, "loss": 1.9902, "step": 34554 }, { "epoch": 3.5508631319358814, "grad_norm": 0.09045036137104034, "learning_rate": 0.01, "loss": 2.0178, "step": 34557 }, { "epoch": 3.5511713933415536, "grad_norm": 0.07062508165836334, "learning_rate": 0.01, "loss": 1.9798, "step": 34560 }, { "epoch": 3.551479654747226, "grad_norm": 0.08182030916213989, "learning_rate": 0.01, "loss": 1.9715, "step": 34563 }, { "epoch": 3.5517879161528976, "grad_norm": 0.05917488783597946, "learning_rate": 0.01, "loss": 2.0149, "step": 34566 }, { "epoch": 3.5520961775585698, "grad_norm": 0.08519969880580902, "learning_rate": 0.01, "loss": 2.009, "step": 34569 }, { "epoch": 3.5524044389642415, "grad_norm": 0.04442654922604561, "learning_rate": 0.01, "loss": 2.0218, "step": 34572 }, { "epoch": 3.5527127003699137, "grad_norm": 0.08265768736600876, "learning_rate": 0.01, "loss": 2.0006, "step": 34575 }, { "epoch": 3.5530209617755855, "grad_norm": 0.0891944020986557, "learning_rate": 0.01, "loss": 1.991, "step": 34578 }, { "epoch": 3.5533292231812577, "grad_norm": 0.0688168928027153, "learning_rate": 0.01, "loss": 1.995, "step": 34581 }, { "epoch": 3.55363748458693, "grad_norm": 0.0936504453420639, "learning_rate": 0.01, "loss": 1.9835, "step": 34584 }, { "epoch": 3.5539457459926016, "grad_norm": 0.10172853618860245, "learning_rate": 0.01, "loss": 2.0016, "step": 34587 }, { "epoch": 3.554254007398274, "grad_norm": 0.04822350665926933, "learning_rate": 0.01, "loss": 1.9796, "step": 34590 }, { "epoch": 3.554562268803946, "grad_norm": 0.05222393944859505, "learning_rate": 0.01, "loss": 2.0036, "step": 34593 }, { "epoch": 3.5548705302096177, "grad_norm": 0.03592358157038689, "learning_rate": 0.01, "loss": 1.9911, "step": 34596 }, { "epoch": 3.5551787916152895, "grad_norm": 0.03903461620211601, "learning_rate": 0.01, "loss": 2.0062, "step": 34599 }, { "epoch": 3.5554870530209617, "grad_norm": 0.0467611663043499, "learning_rate": 0.01, "loss": 1.995, "step": 34602 }, { "epoch": 3.555795314426634, "grad_norm": 0.12140758335590363, "learning_rate": 0.01, "loss": 2.0168, "step": 34605 }, { "epoch": 3.5561035758323056, "grad_norm": 0.04155382886528969, "learning_rate": 0.01, "loss": 2.0048, "step": 34608 }, { "epoch": 3.556411837237978, "grad_norm": 0.039924267679452896, "learning_rate": 0.01, "loss": 1.9813, "step": 34611 }, { "epoch": 3.55672009864365, "grad_norm": 0.10513463616371155, "learning_rate": 0.01, "loss": 2.0098, "step": 34614 }, { "epoch": 3.5570283600493218, "grad_norm": 0.08956603705883026, "learning_rate": 0.01, "loss": 1.9951, "step": 34617 }, { "epoch": 3.557336621454994, "grad_norm": 0.045444682240486145, "learning_rate": 0.01, "loss": 2.0041, "step": 34620 }, { "epoch": 3.5576448828606657, "grad_norm": 0.05367853119969368, "learning_rate": 0.01, "loss": 2.0154, "step": 34623 }, { "epoch": 3.557953144266338, "grad_norm": 0.0465155765414238, "learning_rate": 0.01, "loss": 2.0094, "step": 34626 }, { "epoch": 3.5582614056720097, "grad_norm": 0.06788338720798492, "learning_rate": 0.01, "loss": 2.0526, "step": 34629 }, { "epoch": 3.558569667077682, "grad_norm": 0.04508093744516373, "learning_rate": 0.01, "loss": 2.0205, "step": 34632 }, { "epoch": 3.558877928483354, "grad_norm": 0.05827740207314491, "learning_rate": 0.01, "loss": 2.0154, "step": 34635 }, { "epoch": 3.559186189889026, "grad_norm": 0.09915097057819366, "learning_rate": 0.01, "loss": 2.0322, "step": 34638 }, { "epoch": 3.559494451294698, "grad_norm": 0.08571092784404755, "learning_rate": 0.01, "loss": 2.0157, "step": 34641 }, { "epoch": 3.55980271270037, "grad_norm": 0.06480662524700165, "learning_rate": 0.01, "loss": 1.9991, "step": 34644 }, { "epoch": 3.560110974106042, "grad_norm": 0.055505797266960144, "learning_rate": 0.01, "loss": 1.9894, "step": 34647 }, { "epoch": 3.5604192355117137, "grad_norm": 0.05562606081366539, "learning_rate": 0.01, "loss": 2.0173, "step": 34650 }, { "epoch": 3.560727496917386, "grad_norm": 0.05720195546746254, "learning_rate": 0.01, "loss": 1.999, "step": 34653 }, { "epoch": 3.561035758323058, "grad_norm": 0.14654351770877838, "learning_rate": 0.01, "loss": 2.0, "step": 34656 }, { "epoch": 3.56134401972873, "grad_norm": 0.04594961181282997, "learning_rate": 0.01, "loss": 1.9979, "step": 34659 }, { "epoch": 3.561652281134402, "grad_norm": 0.052737195044755936, "learning_rate": 0.01, "loss": 2.0013, "step": 34662 }, { "epoch": 3.561960542540074, "grad_norm": 0.0722641721367836, "learning_rate": 0.01, "loss": 2.0165, "step": 34665 }, { "epoch": 3.562268803945746, "grad_norm": 0.03973691165447235, "learning_rate": 0.01, "loss": 2.0317, "step": 34668 }, { "epoch": 3.562577065351418, "grad_norm": 0.06284237653017044, "learning_rate": 0.01, "loss": 2.0112, "step": 34671 }, { "epoch": 3.56288532675709, "grad_norm": 0.0880017802119255, "learning_rate": 0.01, "loss": 2.0034, "step": 34674 }, { "epoch": 3.563193588162762, "grad_norm": 0.06454182416200638, "learning_rate": 0.01, "loss": 2.0068, "step": 34677 }, { "epoch": 3.563501849568434, "grad_norm": 0.07054916024208069, "learning_rate": 0.01, "loss": 1.9997, "step": 34680 }, { "epoch": 3.563810110974106, "grad_norm": 0.08058945089578629, "learning_rate": 0.01, "loss": 1.9834, "step": 34683 }, { "epoch": 3.5641183723797782, "grad_norm": 0.08660910278558731, "learning_rate": 0.01, "loss": 2.0159, "step": 34686 }, { "epoch": 3.56442663378545, "grad_norm": 0.050544124096632004, "learning_rate": 0.01, "loss": 1.9975, "step": 34689 }, { "epoch": 3.564734895191122, "grad_norm": 0.056985314935445786, "learning_rate": 0.01, "loss": 2.0013, "step": 34692 }, { "epoch": 3.565043156596794, "grad_norm": 0.08204400539398193, "learning_rate": 0.01, "loss": 1.993, "step": 34695 }, { "epoch": 3.565351418002466, "grad_norm": 0.09590046107769012, "learning_rate": 0.01, "loss": 1.9995, "step": 34698 }, { "epoch": 3.565659679408138, "grad_norm": 0.03453322499990463, "learning_rate": 0.01, "loss": 2.001, "step": 34701 }, { "epoch": 3.56596794081381, "grad_norm": 0.03801025450229645, "learning_rate": 0.01, "loss": 2.0096, "step": 34704 }, { "epoch": 3.5662762022194823, "grad_norm": 0.05342378839850426, "learning_rate": 0.01, "loss": 1.9924, "step": 34707 }, { "epoch": 3.566584463625154, "grad_norm": 0.06871719658374786, "learning_rate": 0.01, "loss": 2.0254, "step": 34710 }, { "epoch": 3.566892725030826, "grad_norm": 0.06653191894292831, "learning_rate": 0.01, "loss": 1.9956, "step": 34713 }, { "epoch": 3.5672009864364984, "grad_norm": 0.10531385242938995, "learning_rate": 0.01, "loss": 2.0151, "step": 34716 }, { "epoch": 3.56750924784217, "grad_norm": 0.05481969192624092, "learning_rate": 0.01, "loss": 2.003, "step": 34719 }, { "epoch": 3.567817509247842, "grad_norm": 0.04904542118310928, "learning_rate": 0.01, "loss": 1.9992, "step": 34722 }, { "epoch": 3.568125770653514, "grad_norm": 0.034647226333618164, "learning_rate": 0.01, "loss": 2.0232, "step": 34725 }, { "epoch": 3.5684340320591863, "grad_norm": 0.07234811037778854, "learning_rate": 0.01, "loss": 1.9992, "step": 34728 }, { "epoch": 3.568742293464858, "grad_norm": 0.10623595118522644, "learning_rate": 0.01, "loss": 1.9955, "step": 34731 }, { "epoch": 3.5690505548705302, "grad_norm": 0.04955185204744339, "learning_rate": 0.01, "loss": 1.9894, "step": 34734 }, { "epoch": 3.5693588162762024, "grad_norm": 0.08183744549751282, "learning_rate": 0.01, "loss": 2.0236, "step": 34737 }, { "epoch": 3.569667077681874, "grad_norm": 0.04063693434000015, "learning_rate": 0.01, "loss": 1.9739, "step": 34740 }, { "epoch": 3.5699753390875464, "grad_norm": 0.05661273002624512, "learning_rate": 0.01, "loss": 2.0019, "step": 34743 }, { "epoch": 3.570283600493218, "grad_norm": 0.03300248831510544, "learning_rate": 0.01, "loss": 1.9687, "step": 34746 }, { "epoch": 3.5705918618988903, "grad_norm": 0.09492766112089157, "learning_rate": 0.01, "loss": 2.0099, "step": 34749 }, { "epoch": 3.570900123304562, "grad_norm": 0.0505099892616272, "learning_rate": 0.01, "loss": 2.0062, "step": 34752 }, { "epoch": 3.5712083847102343, "grad_norm": 0.094505175948143, "learning_rate": 0.01, "loss": 1.9948, "step": 34755 }, { "epoch": 3.5715166461159065, "grad_norm": 0.046727851033210754, "learning_rate": 0.01, "loss": 2.0081, "step": 34758 }, { "epoch": 3.571824907521578, "grad_norm": 0.06659562885761261, "learning_rate": 0.01, "loss": 2.0026, "step": 34761 }, { "epoch": 3.5721331689272504, "grad_norm": 0.0828694999217987, "learning_rate": 0.01, "loss": 1.9923, "step": 34764 }, { "epoch": 3.572441430332922, "grad_norm": 0.12309973686933517, "learning_rate": 0.01, "loss": 2.0145, "step": 34767 }, { "epoch": 3.5727496917385944, "grad_norm": 0.03831718862056732, "learning_rate": 0.01, "loss": 2.0022, "step": 34770 }, { "epoch": 3.573057953144266, "grad_norm": 0.049565766006708145, "learning_rate": 0.01, "loss": 1.9838, "step": 34773 }, { "epoch": 3.5733662145499383, "grad_norm": 0.055259205400943756, "learning_rate": 0.01, "loss": 2.0103, "step": 34776 }, { "epoch": 3.5736744759556105, "grad_norm": 0.08979593217372894, "learning_rate": 0.01, "loss": 1.9915, "step": 34779 }, { "epoch": 3.5739827373612822, "grad_norm": 0.05166800692677498, "learning_rate": 0.01, "loss": 2.005, "step": 34782 }, { "epoch": 3.5742909987669544, "grad_norm": 0.06904742866754532, "learning_rate": 0.01, "loss": 1.9823, "step": 34785 }, { "epoch": 3.5745992601726266, "grad_norm": 0.0667278841137886, "learning_rate": 0.01, "loss": 1.9862, "step": 34788 }, { "epoch": 3.5749075215782984, "grad_norm": 0.03483438491821289, "learning_rate": 0.01, "loss": 2.0044, "step": 34791 }, { "epoch": 3.57521578298397, "grad_norm": 0.08964372426271439, "learning_rate": 0.01, "loss": 1.9828, "step": 34794 }, { "epoch": 3.5755240443896423, "grad_norm": 0.09426937997341156, "learning_rate": 0.01, "loss": 2.0093, "step": 34797 }, { "epoch": 3.5758323057953145, "grad_norm": 0.0574275478720665, "learning_rate": 0.01, "loss": 2.0269, "step": 34800 }, { "epoch": 3.5761405672009863, "grad_norm": 0.03654215484857559, "learning_rate": 0.01, "loss": 1.9876, "step": 34803 }, { "epoch": 3.5764488286066585, "grad_norm": 0.05129201337695122, "learning_rate": 0.01, "loss": 2.0072, "step": 34806 }, { "epoch": 3.5767570900123307, "grad_norm": 0.07386317104101181, "learning_rate": 0.01, "loss": 2.0074, "step": 34809 }, { "epoch": 3.5770653514180024, "grad_norm": 0.1415167599916458, "learning_rate": 0.01, "loss": 2.0036, "step": 34812 }, { "epoch": 3.5773736128236746, "grad_norm": 0.04465651139616966, "learning_rate": 0.01, "loss": 1.9881, "step": 34815 }, { "epoch": 3.5776818742293464, "grad_norm": 0.05919940024614334, "learning_rate": 0.01, "loss": 2.0257, "step": 34818 }, { "epoch": 3.5779901356350186, "grad_norm": 0.03873635083436966, "learning_rate": 0.01, "loss": 2.0034, "step": 34821 }, { "epoch": 3.5782983970406903, "grad_norm": 0.05425990745425224, "learning_rate": 0.01, "loss": 1.9911, "step": 34824 }, { "epoch": 3.5786066584463625, "grad_norm": 0.11823546886444092, "learning_rate": 0.01, "loss": 1.9999, "step": 34827 }, { "epoch": 3.5789149198520347, "grad_norm": 0.12624318897724152, "learning_rate": 0.01, "loss": 2.0124, "step": 34830 }, { "epoch": 3.5792231812577064, "grad_norm": 0.05450008437037468, "learning_rate": 0.01, "loss": 1.9941, "step": 34833 }, { "epoch": 3.5795314426633786, "grad_norm": 0.04888772964477539, "learning_rate": 0.01, "loss": 2.007, "step": 34836 }, { "epoch": 3.579839704069051, "grad_norm": 0.033485714346170425, "learning_rate": 0.01, "loss": 2.0066, "step": 34839 }, { "epoch": 3.5801479654747226, "grad_norm": 0.08570647239685059, "learning_rate": 0.01, "loss": 2.0157, "step": 34842 }, { "epoch": 3.5804562268803943, "grad_norm": 0.06110506132245064, "learning_rate": 0.01, "loss": 2.0054, "step": 34845 }, { "epoch": 3.5807644882860665, "grad_norm": 0.08815490454435349, "learning_rate": 0.01, "loss": 1.9991, "step": 34848 }, { "epoch": 3.5810727496917387, "grad_norm": 0.09731253236532211, "learning_rate": 0.01, "loss": 1.9763, "step": 34851 }, { "epoch": 3.5813810110974105, "grad_norm": 0.08022457361221313, "learning_rate": 0.01, "loss": 2.0095, "step": 34854 }, { "epoch": 3.5816892725030827, "grad_norm": 0.042772453278303146, "learning_rate": 0.01, "loss": 2.0041, "step": 34857 }, { "epoch": 3.581997533908755, "grad_norm": 0.030573785305023193, "learning_rate": 0.01, "loss": 1.9908, "step": 34860 }, { "epoch": 3.5823057953144266, "grad_norm": 0.05686675012111664, "learning_rate": 0.01, "loss": 2.0131, "step": 34863 }, { "epoch": 3.582614056720099, "grad_norm": 0.04291863366961479, "learning_rate": 0.01, "loss": 1.9925, "step": 34866 }, { "epoch": 3.5829223181257706, "grad_norm": 0.0715685561299324, "learning_rate": 0.01, "loss": 2.0052, "step": 34869 }, { "epoch": 3.5832305795314427, "grad_norm": 0.06964897364377975, "learning_rate": 0.01, "loss": 2.0019, "step": 34872 }, { "epoch": 3.5835388409371145, "grad_norm": 0.043709173798561096, "learning_rate": 0.01, "loss": 1.9957, "step": 34875 }, { "epoch": 3.5838471023427867, "grad_norm": 0.06593792140483856, "learning_rate": 0.01, "loss": 2.0034, "step": 34878 }, { "epoch": 3.584155363748459, "grad_norm": 0.04702428728342056, "learning_rate": 0.01, "loss": 2.0174, "step": 34881 }, { "epoch": 3.5844636251541306, "grad_norm": 0.08917208015918732, "learning_rate": 0.01, "loss": 1.9914, "step": 34884 }, { "epoch": 3.584771886559803, "grad_norm": 0.1167001873254776, "learning_rate": 0.01, "loss": 2.0183, "step": 34887 }, { "epoch": 3.5850801479654746, "grad_norm": 0.07775711268186569, "learning_rate": 0.01, "loss": 2.0254, "step": 34890 }, { "epoch": 3.585388409371147, "grad_norm": 0.08915986865758896, "learning_rate": 0.01, "loss": 1.9832, "step": 34893 }, { "epoch": 3.5856966707768185, "grad_norm": 0.045163143426179886, "learning_rate": 0.01, "loss": 2.0024, "step": 34896 }, { "epoch": 3.5860049321824907, "grad_norm": 0.0571911595761776, "learning_rate": 0.01, "loss": 1.9897, "step": 34899 }, { "epoch": 3.586313193588163, "grad_norm": 0.0707104504108429, "learning_rate": 0.01, "loss": 2.0291, "step": 34902 }, { "epoch": 3.5866214549938347, "grad_norm": 0.04241738095879555, "learning_rate": 0.01, "loss": 1.9858, "step": 34905 }, { "epoch": 3.586929716399507, "grad_norm": 0.045163869857788086, "learning_rate": 0.01, "loss": 2.004, "step": 34908 }, { "epoch": 3.587237977805179, "grad_norm": 0.03191215172410011, "learning_rate": 0.01, "loss": 2.017, "step": 34911 }, { "epoch": 3.587546239210851, "grad_norm": 0.033771395683288574, "learning_rate": 0.01, "loss": 2.0137, "step": 34914 }, { "epoch": 3.5878545006165226, "grad_norm": 0.06919904053211212, "learning_rate": 0.01, "loss": 1.9974, "step": 34917 }, { "epoch": 3.5881627620221948, "grad_norm": 0.05089094117283821, "learning_rate": 0.01, "loss": 2.0174, "step": 34920 }, { "epoch": 3.588471023427867, "grad_norm": 0.10625668615102768, "learning_rate": 0.01, "loss": 2.0118, "step": 34923 }, { "epoch": 3.5887792848335387, "grad_norm": 0.05255312845110893, "learning_rate": 0.01, "loss": 1.9957, "step": 34926 }, { "epoch": 3.589087546239211, "grad_norm": 0.09395511448383331, "learning_rate": 0.01, "loss": 1.986, "step": 34929 }, { "epoch": 3.589395807644883, "grad_norm": 0.04209842532873154, "learning_rate": 0.01, "loss": 1.9993, "step": 34932 }, { "epoch": 3.589704069050555, "grad_norm": 0.0340830534696579, "learning_rate": 0.01, "loss": 2.036, "step": 34935 }, { "epoch": 3.590012330456227, "grad_norm": 0.041663311421871185, "learning_rate": 0.01, "loss": 1.9921, "step": 34938 }, { "epoch": 3.590320591861899, "grad_norm": 0.044666144996881485, "learning_rate": 0.01, "loss": 2.0027, "step": 34941 }, { "epoch": 3.590628853267571, "grad_norm": 0.05369654670357704, "learning_rate": 0.01, "loss": 2.0291, "step": 34944 }, { "epoch": 3.5909371146732427, "grad_norm": 0.054408960044384, "learning_rate": 0.01, "loss": 1.9996, "step": 34947 }, { "epoch": 3.591245376078915, "grad_norm": 0.09313920885324478, "learning_rate": 0.01, "loss": 1.9848, "step": 34950 }, { "epoch": 3.591553637484587, "grad_norm": 0.05207030475139618, "learning_rate": 0.01, "loss": 1.9925, "step": 34953 }, { "epoch": 3.591861898890259, "grad_norm": 0.06822825968265533, "learning_rate": 0.01, "loss": 2.0096, "step": 34956 }, { "epoch": 3.592170160295931, "grad_norm": 0.05392748489975929, "learning_rate": 0.01, "loss": 2.0129, "step": 34959 }, { "epoch": 3.592478421701603, "grad_norm": 0.06300345808267593, "learning_rate": 0.01, "loss": 2.0259, "step": 34962 }, { "epoch": 3.592786683107275, "grad_norm": 0.09526319801807404, "learning_rate": 0.01, "loss": 1.982, "step": 34965 }, { "epoch": 3.5930949445129468, "grad_norm": 0.060867104679346085, "learning_rate": 0.01, "loss": 2.0216, "step": 34968 }, { "epoch": 3.593403205918619, "grad_norm": 0.09763433039188385, "learning_rate": 0.01, "loss": 2.0151, "step": 34971 }, { "epoch": 3.593711467324291, "grad_norm": 0.05499181151390076, "learning_rate": 0.01, "loss": 2.0221, "step": 34974 }, { "epoch": 3.594019728729963, "grad_norm": 0.18828648328781128, "learning_rate": 0.01, "loss": 2.0094, "step": 34977 }, { "epoch": 3.594327990135635, "grad_norm": 0.1367756426334381, "learning_rate": 0.01, "loss": 1.9861, "step": 34980 }, { "epoch": 3.5946362515413073, "grad_norm": 0.0752996876835823, "learning_rate": 0.01, "loss": 1.9973, "step": 34983 }, { "epoch": 3.594944512946979, "grad_norm": 0.07500961422920227, "learning_rate": 0.01, "loss": 2.0196, "step": 34986 }, { "epoch": 3.595252774352651, "grad_norm": 0.08114928752183914, "learning_rate": 0.01, "loss": 1.9768, "step": 34989 }, { "epoch": 3.595561035758323, "grad_norm": 0.0735306516289711, "learning_rate": 0.01, "loss": 2.0046, "step": 34992 }, { "epoch": 3.595869297163995, "grad_norm": 0.04386765882372856, "learning_rate": 0.01, "loss": 2.0095, "step": 34995 }, { "epoch": 3.596177558569667, "grad_norm": 0.052437882870435715, "learning_rate": 0.01, "loss": 2.0008, "step": 34998 }, { "epoch": 3.596485819975339, "grad_norm": 0.044234346598386765, "learning_rate": 0.01, "loss": 2.0145, "step": 35001 }, { "epoch": 3.5967940813810113, "grad_norm": 0.04302847385406494, "learning_rate": 0.01, "loss": 2.0284, "step": 35004 }, { "epoch": 3.597102342786683, "grad_norm": 0.047404494136571884, "learning_rate": 0.01, "loss": 1.9965, "step": 35007 }, { "epoch": 3.5974106041923553, "grad_norm": 0.04339216649532318, "learning_rate": 0.01, "loss": 1.9888, "step": 35010 }, { "epoch": 3.597718865598027, "grad_norm": 0.07121206820011139, "learning_rate": 0.01, "loss": 1.9935, "step": 35013 }, { "epoch": 3.598027127003699, "grad_norm": 0.06314744055271149, "learning_rate": 0.01, "loss": 2.0062, "step": 35016 }, { "epoch": 3.598335388409371, "grad_norm": 0.0557650588452816, "learning_rate": 0.01, "loss": 1.9947, "step": 35019 }, { "epoch": 3.598643649815043, "grad_norm": 0.043066419661045074, "learning_rate": 0.01, "loss": 2.0061, "step": 35022 }, { "epoch": 3.5989519112207153, "grad_norm": 0.0739695131778717, "learning_rate": 0.01, "loss": 2.0139, "step": 35025 }, { "epoch": 3.599260172626387, "grad_norm": 0.13933037221431732, "learning_rate": 0.01, "loss": 1.9982, "step": 35028 }, { "epoch": 3.5995684340320593, "grad_norm": 0.03341522440314293, "learning_rate": 0.01, "loss": 1.9969, "step": 35031 }, { "epoch": 3.599876695437731, "grad_norm": 0.07127437740564346, "learning_rate": 0.01, "loss": 1.9988, "step": 35034 }, { "epoch": 3.6001849568434032, "grad_norm": 0.05078571289777756, "learning_rate": 0.01, "loss": 1.9961, "step": 35037 }, { "epoch": 3.600493218249075, "grad_norm": 0.049321308732032776, "learning_rate": 0.01, "loss": 2.0083, "step": 35040 }, { "epoch": 3.600801479654747, "grad_norm": 0.04840512201189995, "learning_rate": 0.01, "loss": 2.0086, "step": 35043 }, { "epoch": 3.6011097410604194, "grad_norm": 0.05541226640343666, "learning_rate": 0.01, "loss": 1.9975, "step": 35046 }, { "epoch": 3.601418002466091, "grad_norm": 0.03456197306513786, "learning_rate": 0.01, "loss": 2.0024, "step": 35049 }, { "epoch": 3.6017262638717633, "grad_norm": 0.0342303030192852, "learning_rate": 0.01, "loss": 2.0113, "step": 35052 }, { "epoch": 3.6020345252774355, "grad_norm": 0.043709222227334976, "learning_rate": 0.01, "loss": 1.9818, "step": 35055 }, { "epoch": 3.6023427866831073, "grad_norm": 0.05025354400277138, "learning_rate": 0.01, "loss": 1.9958, "step": 35058 }, { "epoch": 3.6026510480887795, "grad_norm": 0.0903453454375267, "learning_rate": 0.01, "loss": 1.9858, "step": 35061 }, { "epoch": 3.602959309494451, "grad_norm": 0.029984181746840477, "learning_rate": 0.01, "loss": 2.0049, "step": 35064 }, { "epoch": 3.6032675709001234, "grad_norm": 0.03956552594900131, "learning_rate": 0.01, "loss": 1.9821, "step": 35067 }, { "epoch": 3.603575832305795, "grad_norm": 0.0535871721804142, "learning_rate": 0.01, "loss": 2.0002, "step": 35070 }, { "epoch": 3.6038840937114673, "grad_norm": 0.07621357589960098, "learning_rate": 0.01, "loss": 1.9978, "step": 35073 }, { "epoch": 3.6041923551171395, "grad_norm": 0.08122174441814423, "learning_rate": 0.01, "loss": 2.0012, "step": 35076 }, { "epoch": 3.6045006165228113, "grad_norm": 0.04478932544589043, "learning_rate": 0.01, "loss": 2.0016, "step": 35079 }, { "epoch": 3.6048088779284835, "grad_norm": 0.07432077080011368, "learning_rate": 0.01, "loss": 1.9881, "step": 35082 }, { "epoch": 3.6051171393341552, "grad_norm": 0.08805304020643234, "learning_rate": 0.01, "loss": 2.0014, "step": 35085 }, { "epoch": 3.6054254007398274, "grad_norm": 0.15478205680847168, "learning_rate": 0.01, "loss": 1.9907, "step": 35088 }, { "epoch": 3.605733662145499, "grad_norm": 0.11871577054262161, "learning_rate": 0.01, "loss": 2.0125, "step": 35091 }, { "epoch": 3.6060419235511714, "grad_norm": 0.06366606056690216, "learning_rate": 0.01, "loss": 2.0011, "step": 35094 }, { "epoch": 3.6063501849568436, "grad_norm": 0.10421419143676758, "learning_rate": 0.01, "loss": 1.9947, "step": 35097 }, { "epoch": 3.6066584463625153, "grad_norm": 0.08190574496984482, "learning_rate": 0.01, "loss": 1.9893, "step": 35100 }, { "epoch": 3.6069667077681875, "grad_norm": 0.03852493688464165, "learning_rate": 0.01, "loss": 2.0112, "step": 35103 }, { "epoch": 3.6072749691738597, "grad_norm": 0.04792521148920059, "learning_rate": 0.01, "loss": 2.0063, "step": 35106 }, { "epoch": 3.6075832305795315, "grad_norm": 0.04788130521774292, "learning_rate": 0.01, "loss": 2.0138, "step": 35109 }, { "epoch": 3.607891491985203, "grad_norm": 0.06820268929004669, "learning_rate": 0.01, "loss": 1.9813, "step": 35112 }, { "epoch": 3.6081997533908754, "grad_norm": 0.05702923610806465, "learning_rate": 0.01, "loss": 2.0261, "step": 35115 }, { "epoch": 3.6085080147965476, "grad_norm": 0.10051339119672775, "learning_rate": 0.01, "loss": 1.9805, "step": 35118 }, { "epoch": 3.6088162762022193, "grad_norm": 0.07067910581827164, "learning_rate": 0.01, "loss": 2.0049, "step": 35121 }, { "epoch": 3.6091245376078915, "grad_norm": 0.03547900170087814, "learning_rate": 0.01, "loss": 1.9887, "step": 35124 }, { "epoch": 3.6094327990135637, "grad_norm": 0.062315478920936584, "learning_rate": 0.01, "loss": 2.0035, "step": 35127 }, { "epoch": 3.6097410604192355, "grad_norm": 0.12956464290618896, "learning_rate": 0.01, "loss": 2.0106, "step": 35130 }, { "epoch": 3.6100493218249077, "grad_norm": 0.1570357233285904, "learning_rate": 0.01, "loss": 2.0408, "step": 35133 }, { "epoch": 3.6103575832305794, "grad_norm": 0.06165264546871185, "learning_rate": 0.01, "loss": 2.0178, "step": 35136 }, { "epoch": 3.6106658446362516, "grad_norm": 0.046166177839040756, "learning_rate": 0.01, "loss": 2.0237, "step": 35139 }, { "epoch": 3.6109741060419234, "grad_norm": 0.052293550223112106, "learning_rate": 0.01, "loss": 1.9686, "step": 35142 }, { "epoch": 3.6112823674475956, "grad_norm": 0.046433527022600174, "learning_rate": 0.01, "loss": 1.9964, "step": 35145 }, { "epoch": 3.6115906288532678, "grad_norm": 0.029988888651132584, "learning_rate": 0.01, "loss": 1.9536, "step": 35148 }, { "epoch": 3.6118988902589395, "grad_norm": 0.05726629123091698, "learning_rate": 0.01, "loss": 2.0312, "step": 35151 }, { "epoch": 3.6122071516646117, "grad_norm": 0.054002657532691956, "learning_rate": 0.01, "loss": 1.9853, "step": 35154 }, { "epoch": 3.6125154130702835, "grad_norm": 0.05368361249566078, "learning_rate": 0.01, "loss": 1.978, "step": 35157 }, { "epoch": 3.6128236744759556, "grad_norm": 0.04732634872198105, "learning_rate": 0.01, "loss": 1.9876, "step": 35160 }, { "epoch": 3.6131319358816274, "grad_norm": 0.05976017564535141, "learning_rate": 0.01, "loss": 1.9695, "step": 35163 }, { "epoch": 3.6134401972872996, "grad_norm": 0.04517058655619621, "learning_rate": 0.01, "loss": 2.0137, "step": 35166 }, { "epoch": 3.613748458692972, "grad_norm": 0.12990356981754303, "learning_rate": 0.01, "loss": 2.0157, "step": 35169 }, { "epoch": 3.6140567200986435, "grad_norm": 0.059477876871824265, "learning_rate": 0.01, "loss": 1.9885, "step": 35172 }, { "epoch": 3.6143649815043157, "grad_norm": 0.05780341103672981, "learning_rate": 0.01, "loss": 2.0084, "step": 35175 }, { "epoch": 3.614673242909988, "grad_norm": 0.03971264883875847, "learning_rate": 0.01, "loss": 2.0061, "step": 35178 }, { "epoch": 3.6149815043156597, "grad_norm": 0.04069376364350319, "learning_rate": 0.01, "loss": 2.0013, "step": 35181 }, { "epoch": 3.6152897657213314, "grad_norm": 0.049401164054870605, "learning_rate": 0.01, "loss": 1.9785, "step": 35184 }, { "epoch": 3.6155980271270036, "grad_norm": 0.058909036219120026, "learning_rate": 0.01, "loss": 1.9979, "step": 35187 }, { "epoch": 3.615906288532676, "grad_norm": 0.1364602893590927, "learning_rate": 0.01, "loss": 2.0126, "step": 35190 }, { "epoch": 3.6162145499383476, "grad_norm": 0.0892588272690773, "learning_rate": 0.01, "loss": 1.9942, "step": 35193 }, { "epoch": 3.6165228113440198, "grad_norm": 0.03974326699972153, "learning_rate": 0.01, "loss": 2.0159, "step": 35196 }, { "epoch": 3.616831072749692, "grad_norm": 0.03898739442229271, "learning_rate": 0.01, "loss": 2.0303, "step": 35199 }, { "epoch": 3.6171393341553637, "grad_norm": 0.044964905828237534, "learning_rate": 0.01, "loss": 2.015, "step": 35202 }, { "epoch": 3.617447595561036, "grad_norm": 0.11181700229644775, "learning_rate": 0.01, "loss": 1.996, "step": 35205 }, { "epoch": 3.6177558569667077, "grad_norm": 0.0697384923696518, "learning_rate": 0.01, "loss": 2.0091, "step": 35208 }, { "epoch": 3.61806411837238, "grad_norm": 0.03923085704445839, "learning_rate": 0.01, "loss": 1.9976, "step": 35211 }, { "epoch": 3.6183723797780516, "grad_norm": 0.03635834902524948, "learning_rate": 0.01, "loss": 2.0082, "step": 35214 }, { "epoch": 3.618680641183724, "grad_norm": 0.05013216286897659, "learning_rate": 0.01, "loss": 2.0005, "step": 35217 }, { "epoch": 3.618988902589396, "grad_norm": 0.03531458228826523, "learning_rate": 0.01, "loss": 2.0256, "step": 35220 }, { "epoch": 3.6192971639950677, "grad_norm": 0.09228625893592834, "learning_rate": 0.01, "loss": 1.9941, "step": 35223 }, { "epoch": 3.61960542540074, "grad_norm": 0.06587129831314087, "learning_rate": 0.01, "loss": 2.0179, "step": 35226 }, { "epoch": 3.6199136868064117, "grad_norm": 0.11610520631074905, "learning_rate": 0.01, "loss": 2.0364, "step": 35229 }, { "epoch": 3.620221948212084, "grad_norm": 0.05594666674733162, "learning_rate": 0.01, "loss": 1.9921, "step": 35232 }, { "epoch": 3.6205302096177556, "grad_norm": 0.04040682688355446, "learning_rate": 0.01, "loss": 2.0213, "step": 35235 }, { "epoch": 3.620838471023428, "grad_norm": 0.038559917360544205, "learning_rate": 0.01, "loss": 1.9874, "step": 35238 }, { "epoch": 3.6211467324291, "grad_norm": 0.05012970045208931, "learning_rate": 0.01, "loss": 1.9959, "step": 35241 }, { "epoch": 3.6214549938347718, "grad_norm": 0.09769418835639954, "learning_rate": 0.01, "loss": 2.0246, "step": 35244 }, { "epoch": 3.621763255240444, "grad_norm": 0.07791107892990112, "learning_rate": 0.01, "loss": 2.0021, "step": 35247 }, { "epoch": 3.622071516646116, "grad_norm": 0.06556175649166107, "learning_rate": 0.01, "loss": 2.0175, "step": 35250 }, { "epoch": 3.622379778051788, "grad_norm": 0.04608479142189026, "learning_rate": 0.01, "loss": 2.0082, "step": 35253 }, { "epoch": 3.6226880394574597, "grad_norm": 0.03681867569684982, "learning_rate": 0.01, "loss": 2.0005, "step": 35256 }, { "epoch": 3.622996300863132, "grad_norm": 0.05940413847565651, "learning_rate": 0.01, "loss": 1.9957, "step": 35259 }, { "epoch": 3.623304562268804, "grad_norm": 0.09911686182022095, "learning_rate": 0.01, "loss": 1.9991, "step": 35262 }, { "epoch": 3.623612823674476, "grad_norm": 0.03739270940423012, "learning_rate": 0.01, "loss": 2.0285, "step": 35265 }, { "epoch": 3.623921085080148, "grad_norm": 0.11673395335674286, "learning_rate": 0.01, "loss": 2.0093, "step": 35268 }, { "epoch": 3.62422934648582, "grad_norm": 0.0795954018831253, "learning_rate": 0.01, "loss": 1.9954, "step": 35271 }, { "epoch": 3.624537607891492, "grad_norm": 0.04180069640278816, "learning_rate": 0.01, "loss": 1.9844, "step": 35274 }, { "epoch": 3.624845869297164, "grad_norm": 0.09516190737485886, "learning_rate": 0.01, "loss": 2.0031, "step": 35277 }, { "epoch": 3.625154130702836, "grad_norm": 0.11448989808559418, "learning_rate": 0.01, "loss": 2.0236, "step": 35280 }, { "epoch": 3.625462392108508, "grad_norm": 0.06945902854204178, "learning_rate": 0.01, "loss": 2.0211, "step": 35283 }, { "epoch": 3.62577065351418, "grad_norm": 0.039453648030757904, "learning_rate": 0.01, "loss": 2.0038, "step": 35286 }, { "epoch": 3.626078914919852, "grad_norm": 0.06266641616821289, "learning_rate": 0.01, "loss": 2.0267, "step": 35289 }, { "epoch": 3.626387176325524, "grad_norm": 0.04908052831888199, "learning_rate": 0.01, "loss": 1.9993, "step": 35292 }, { "epoch": 3.626695437731196, "grad_norm": 0.06186684966087341, "learning_rate": 0.01, "loss": 1.9813, "step": 35295 }, { "epoch": 3.627003699136868, "grad_norm": 0.13145780563354492, "learning_rate": 0.01, "loss": 1.9967, "step": 35298 }, { "epoch": 3.6273119605425403, "grad_norm": 0.05850294232368469, "learning_rate": 0.01, "loss": 1.9963, "step": 35301 }, { "epoch": 3.627620221948212, "grad_norm": 0.04934421926736832, "learning_rate": 0.01, "loss": 1.9952, "step": 35304 }, { "epoch": 3.627928483353884, "grad_norm": 0.07607220858335495, "learning_rate": 0.01, "loss": 2.0091, "step": 35307 }, { "epoch": 3.628236744759556, "grad_norm": 0.046615466475486755, "learning_rate": 0.01, "loss": 2.0037, "step": 35310 }, { "epoch": 3.6285450061652282, "grad_norm": 0.11176659911870956, "learning_rate": 0.01, "loss": 2.0077, "step": 35313 }, { "epoch": 3.6288532675709, "grad_norm": 0.07844175398349762, "learning_rate": 0.01, "loss": 2.0198, "step": 35316 }, { "epoch": 3.629161528976572, "grad_norm": 0.0730755403637886, "learning_rate": 0.01, "loss": 2.0014, "step": 35319 }, { "epoch": 3.6294697903822444, "grad_norm": 0.08503863960504532, "learning_rate": 0.01, "loss": 2.005, "step": 35322 }, { "epoch": 3.629778051787916, "grad_norm": 0.03936958312988281, "learning_rate": 0.01, "loss": 2.0211, "step": 35325 }, { "epoch": 3.6300863131935883, "grad_norm": 0.11586350947618484, "learning_rate": 0.01, "loss": 2.0054, "step": 35328 }, { "epoch": 3.63039457459926, "grad_norm": 0.06128397583961487, "learning_rate": 0.01, "loss": 2.0076, "step": 35331 }, { "epoch": 3.6307028360049323, "grad_norm": 0.04872961342334747, "learning_rate": 0.01, "loss": 2.017, "step": 35334 }, { "epoch": 3.631011097410604, "grad_norm": 0.08416412770748138, "learning_rate": 0.01, "loss": 2.0144, "step": 35337 }, { "epoch": 3.631319358816276, "grad_norm": 0.05844739452004433, "learning_rate": 0.01, "loss": 2.001, "step": 35340 }, { "epoch": 3.6316276202219484, "grad_norm": 0.08564214408397675, "learning_rate": 0.01, "loss": 2.0061, "step": 35343 }, { "epoch": 3.63193588162762, "grad_norm": 0.08769746124744415, "learning_rate": 0.01, "loss": 1.9914, "step": 35346 }, { "epoch": 3.6322441430332923, "grad_norm": 0.03819827735424042, "learning_rate": 0.01, "loss": 2.0103, "step": 35349 }, { "epoch": 3.632552404438964, "grad_norm": 0.12311212718486786, "learning_rate": 0.01, "loss": 1.9921, "step": 35352 }, { "epoch": 3.6328606658446363, "grad_norm": 0.037630997598171234, "learning_rate": 0.01, "loss": 2.007, "step": 35355 }, { "epoch": 3.633168927250308, "grad_norm": 0.08739642798900604, "learning_rate": 0.01, "loss": 2.0113, "step": 35358 }, { "epoch": 3.6334771886559802, "grad_norm": 0.04889992997050285, "learning_rate": 0.01, "loss": 2.0098, "step": 35361 }, { "epoch": 3.6337854500616524, "grad_norm": 0.035435836762189865, "learning_rate": 0.01, "loss": 2.0045, "step": 35364 }, { "epoch": 3.634093711467324, "grad_norm": 0.059248197823762894, "learning_rate": 0.01, "loss": 2.0159, "step": 35367 }, { "epoch": 3.6344019728729964, "grad_norm": 0.08831764757633209, "learning_rate": 0.01, "loss": 1.992, "step": 35370 }, { "epoch": 3.6347102342786686, "grad_norm": 0.14940893650054932, "learning_rate": 0.01, "loss": 2.0051, "step": 35373 }, { "epoch": 3.6350184956843403, "grad_norm": 0.06616901606321335, "learning_rate": 0.01, "loss": 1.9825, "step": 35376 }, { "epoch": 3.635326757090012, "grad_norm": 0.06391241401433945, "learning_rate": 0.01, "loss": 1.9636, "step": 35379 }, { "epoch": 3.6356350184956843, "grad_norm": 0.0580880343914032, "learning_rate": 0.01, "loss": 1.975, "step": 35382 }, { "epoch": 3.6359432799013565, "grad_norm": 0.057876862585544586, "learning_rate": 0.01, "loss": 1.9888, "step": 35385 }, { "epoch": 3.636251541307028, "grad_norm": 0.044347915798425674, "learning_rate": 0.01, "loss": 2.0051, "step": 35388 }, { "epoch": 3.6365598027127004, "grad_norm": 0.0400017574429512, "learning_rate": 0.01, "loss": 1.9722, "step": 35391 }, { "epoch": 3.6368680641183726, "grad_norm": 0.11369085311889648, "learning_rate": 0.01, "loss": 1.9855, "step": 35394 }, { "epoch": 3.6371763255240444, "grad_norm": 0.10995481163263321, "learning_rate": 0.01, "loss": 2.0037, "step": 35397 }, { "epoch": 3.6374845869297165, "grad_norm": 0.05769447609782219, "learning_rate": 0.01, "loss": 2.0109, "step": 35400 }, { "epoch": 3.6377928483353883, "grad_norm": 0.07872482389211655, "learning_rate": 0.01, "loss": 2.0101, "step": 35403 }, { "epoch": 3.6381011097410605, "grad_norm": 0.05196002870798111, "learning_rate": 0.01, "loss": 2.0108, "step": 35406 }, { "epoch": 3.6384093711467322, "grad_norm": 0.0493265725672245, "learning_rate": 0.01, "loss": 2.0283, "step": 35409 }, { "epoch": 3.6387176325524044, "grad_norm": 0.04237900674343109, "learning_rate": 0.01, "loss": 2.0216, "step": 35412 }, { "epoch": 3.6390258939580766, "grad_norm": 0.06882897764444351, "learning_rate": 0.01, "loss": 2.0073, "step": 35415 }, { "epoch": 3.6393341553637484, "grad_norm": 0.058774422854185104, "learning_rate": 0.01, "loss": 2.0114, "step": 35418 }, { "epoch": 3.6396424167694206, "grad_norm": 0.10032794624567032, "learning_rate": 0.01, "loss": 2.0008, "step": 35421 }, { "epoch": 3.6399506781750923, "grad_norm": 0.042988426983356476, "learning_rate": 0.01, "loss": 2.0311, "step": 35424 }, { "epoch": 3.6402589395807645, "grad_norm": 0.055391326546669006, "learning_rate": 0.01, "loss": 2.0019, "step": 35427 }, { "epoch": 3.6405672009864363, "grad_norm": 0.03977194428443909, "learning_rate": 0.01, "loss": 1.9992, "step": 35430 }, { "epoch": 3.6408754623921085, "grad_norm": 0.0903034657239914, "learning_rate": 0.01, "loss": 2.0207, "step": 35433 }, { "epoch": 3.6411837237977807, "grad_norm": 0.04610143229365349, "learning_rate": 0.01, "loss": 2.0015, "step": 35436 }, { "epoch": 3.6414919852034524, "grad_norm": 0.08295582234859467, "learning_rate": 0.01, "loss": 1.9848, "step": 35439 }, { "epoch": 3.6418002466091246, "grad_norm": 0.09561655670404434, "learning_rate": 0.01, "loss": 1.9931, "step": 35442 }, { "epoch": 3.642108508014797, "grad_norm": 0.07968761771917343, "learning_rate": 0.01, "loss": 1.9925, "step": 35445 }, { "epoch": 3.6424167694204685, "grad_norm": 0.04458128660917282, "learning_rate": 0.01, "loss": 2.0112, "step": 35448 }, { "epoch": 3.6427250308261403, "grad_norm": 0.12892895936965942, "learning_rate": 0.01, "loss": 2.0004, "step": 35451 }, { "epoch": 3.6430332922318125, "grad_norm": 0.1063292846083641, "learning_rate": 0.01, "loss": 2.0165, "step": 35454 }, { "epoch": 3.6433415536374847, "grad_norm": 0.04187742993235588, "learning_rate": 0.01, "loss": 2.0146, "step": 35457 }, { "epoch": 3.6436498150431564, "grad_norm": 0.06096494942903519, "learning_rate": 0.01, "loss": 1.9935, "step": 35460 }, { "epoch": 3.6439580764488286, "grad_norm": 0.035430386662483215, "learning_rate": 0.01, "loss": 2.0034, "step": 35463 }, { "epoch": 3.644266337854501, "grad_norm": 0.039337921887636185, "learning_rate": 0.01, "loss": 2.0163, "step": 35466 }, { "epoch": 3.6445745992601726, "grad_norm": 0.04446728155016899, "learning_rate": 0.01, "loss": 1.9804, "step": 35469 }, { "epoch": 3.6448828606658448, "grad_norm": 0.06175538897514343, "learning_rate": 0.01, "loss": 1.9991, "step": 35472 }, { "epoch": 3.6451911220715165, "grad_norm": 0.1305261105298996, "learning_rate": 0.01, "loss": 2.008, "step": 35475 }, { "epoch": 3.6454993834771887, "grad_norm": 0.14111317694187164, "learning_rate": 0.01, "loss": 1.9938, "step": 35478 }, { "epoch": 3.6458076448828605, "grad_norm": 0.13947373628616333, "learning_rate": 0.01, "loss": 2.0083, "step": 35481 }, { "epoch": 3.6461159062885327, "grad_norm": 0.057133182883262634, "learning_rate": 0.01, "loss": 1.9899, "step": 35484 }, { "epoch": 3.646424167694205, "grad_norm": 0.03709038347005844, "learning_rate": 0.01, "loss": 1.9825, "step": 35487 }, { "epoch": 3.6467324290998766, "grad_norm": 0.04786526411771774, "learning_rate": 0.01, "loss": 1.993, "step": 35490 }, { "epoch": 3.647040690505549, "grad_norm": 0.03816988691687584, "learning_rate": 0.01, "loss": 1.9762, "step": 35493 }, { "epoch": 3.6473489519112205, "grad_norm": 0.05153171718120575, "learning_rate": 0.01, "loss": 2.0236, "step": 35496 }, { "epoch": 3.6476572133168927, "grad_norm": 0.09072964638471603, "learning_rate": 0.01, "loss": 2.0153, "step": 35499 }, { "epoch": 3.6479654747225645, "grad_norm": 0.06302040815353394, "learning_rate": 0.01, "loss": 1.9963, "step": 35502 }, { "epoch": 3.6482737361282367, "grad_norm": 0.04940348491072655, "learning_rate": 0.01, "loss": 2.0015, "step": 35505 }, { "epoch": 3.648581997533909, "grad_norm": 0.04952861741185188, "learning_rate": 0.01, "loss": 2.0, "step": 35508 }, { "epoch": 3.6488902589395806, "grad_norm": 0.03227638080716133, "learning_rate": 0.01, "loss": 1.9961, "step": 35511 }, { "epoch": 3.649198520345253, "grad_norm": 0.04237101599574089, "learning_rate": 0.01, "loss": 2.019, "step": 35514 }, { "epoch": 3.649506781750925, "grad_norm": 0.10213712602853775, "learning_rate": 0.01, "loss": 2.0028, "step": 35517 }, { "epoch": 3.6498150431565968, "grad_norm": 0.0747971460223198, "learning_rate": 0.01, "loss": 2.0102, "step": 35520 }, { "epoch": 3.650123304562269, "grad_norm": 0.07060317695140839, "learning_rate": 0.01, "loss": 1.9951, "step": 35523 }, { "epoch": 3.6504315659679407, "grad_norm": 0.13745035231113434, "learning_rate": 0.01, "loss": 2.0171, "step": 35526 }, { "epoch": 3.650739827373613, "grad_norm": 0.11544306576251984, "learning_rate": 0.01, "loss": 2.0374, "step": 35529 }, { "epoch": 3.6510480887792847, "grad_norm": 0.07669739425182343, "learning_rate": 0.01, "loss": 2.0227, "step": 35532 }, { "epoch": 3.651356350184957, "grad_norm": 0.04519101604819298, "learning_rate": 0.01, "loss": 2.0136, "step": 35535 }, { "epoch": 3.651664611590629, "grad_norm": 0.04446371644735336, "learning_rate": 0.01, "loss": 2.0065, "step": 35538 }, { "epoch": 3.651972872996301, "grad_norm": 0.05518916994333267, "learning_rate": 0.01, "loss": 2.0036, "step": 35541 }, { "epoch": 3.652281134401973, "grad_norm": 0.04405786469578743, "learning_rate": 0.01, "loss": 2.0106, "step": 35544 }, { "epoch": 3.6525893958076447, "grad_norm": 0.06154394894838333, "learning_rate": 0.01, "loss": 2.0076, "step": 35547 }, { "epoch": 3.652897657213317, "grad_norm": 0.07919877767562866, "learning_rate": 0.01, "loss": 1.9895, "step": 35550 }, { "epoch": 3.6532059186189887, "grad_norm": 0.0575590617954731, "learning_rate": 0.01, "loss": 2.011, "step": 35553 }, { "epoch": 3.653514180024661, "grad_norm": 0.05680471658706665, "learning_rate": 0.01, "loss": 2.0096, "step": 35556 }, { "epoch": 3.653822441430333, "grad_norm": 0.0352899394929409, "learning_rate": 0.01, "loss": 1.9937, "step": 35559 }, { "epoch": 3.654130702836005, "grad_norm": 0.034432023763656616, "learning_rate": 0.01, "loss": 2.004, "step": 35562 }, { "epoch": 3.654438964241677, "grad_norm": 0.10298652946949005, "learning_rate": 0.01, "loss": 1.9992, "step": 35565 }, { "epoch": 3.654747225647349, "grad_norm": 0.03996056690812111, "learning_rate": 0.01, "loss": 2.007, "step": 35568 }, { "epoch": 3.655055487053021, "grad_norm": 0.07411230355501175, "learning_rate": 0.01, "loss": 1.9981, "step": 35571 }, { "epoch": 3.6553637484586927, "grad_norm": 0.04124278202652931, "learning_rate": 0.01, "loss": 1.9989, "step": 35574 }, { "epoch": 3.655672009864365, "grad_norm": 0.035065338015556335, "learning_rate": 0.01, "loss": 1.9903, "step": 35577 }, { "epoch": 3.655980271270037, "grad_norm": 0.04023493826389313, "learning_rate": 0.01, "loss": 2.0126, "step": 35580 }, { "epoch": 3.656288532675709, "grad_norm": 0.0333552286028862, "learning_rate": 0.01, "loss": 2.0139, "step": 35583 }, { "epoch": 3.656596794081381, "grad_norm": 0.1286098062992096, "learning_rate": 0.01, "loss": 1.9847, "step": 35586 }, { "epoch": 3.6569050554870532, "grad_norm": 0.061940498650074005, "learning_rate": 0.01, "loss": 2.0093, "step": 35589 }, { "epoch": 3.657213316892725, "grad_norm": 0.08766448497772217, "learning_rate": 0.01, "loss": 2.0062, "step": 35592 }, { "epoch": 3.657521578298397, "grad_norm": 0.07218505442142487, "learning_rate": 0.01, "loss": 1.9914, "step": 35595 }, { "epoch": 3.657829839704069, "grad_norm": 0.11700677126646042, "learning_rate": 0.01, "loss": 2.0164, "step": 35598 }, { "epoch": 3.658138101109741, "grad_norm": 0.05746941268444061, "learning_rate": 0.01, "loss": 1.9741, "step": 35601 }, { "epoch": 3.658446362515413, "grad_norm": 0.0517272874712944, "learning_rate": 0.01, "loss": 1.9803, "step": 35604 }, { "epoch": 3.658754623921085, "grad_norm": 0.04524237662553787, "learning_rate": 0.01, "loss": 1.9912, "step": 35607 }, { "epoch": 3.6590628853267573, "grad_norm": 0.053268514573574066, "learning_rate": 0.01, "loss": 2.0099, "step": 35610 }, { "epoch": 3.659371146732429, "grad_norm": 0.06391450762748718, "learning_rate": 0.01, "loss": 1.9958, "step": 35613 }, { "epoch": 3.659679408138101, "grad_norm": 0.06404808908700943, "learning_rate": 0.01, "loss": 2.0093, "step": 35616 }, { "epoch": 3.659987669543773, "grad_norm": 0.06734498590230942, "learning_rate": 0.01, "loss": 2.0135, "step": 35619 }, { "epoch": 3.660295930949445, "grad_norm": 0.056514717638492584, "learning_rate": 0.01, "loss": 2.0195, "step": 35622 }, { "epoch": 3.660604192355117, "grad_norm": 0.04377966374158859, "learning_rate": 0.01, "loss": 1.9945, "step": 35625 }, { "epoch": 3.660912453760789, "grad_norm": 0.0527014285326004, "learning_rate": 0.01, "loss": 2.0011, "step": 35628 }, { "epoch": 3.6612207151664613, "grad_norm": 0.08603314310312271, "learning_rate": 0.01, "loss": 1.9736, "step": 35631 }, { "epoch": 3.661528976572133, "grad_norm": 0.06637904793024063, "learning_rate": 0.01, "loss": 2.0078, "step": 35634 }, { "epoch": 3.6618372379778052, "grad_norm": 0.042211759835481644, "learning_rate": 0.01, "loss": 1.9934, "step": 35637 }, { "epoch": 3.6621454993834774, "grad_norm": 0.08589443564414978, "learning_rate": 0.01, "loss": 2.0076, "step": 35640 }, { "epoch": 3.662453760789149, "grad_norm": 0.12761181592941284, "learning_rate": 0.01, "loss": 1.9956, "step": 35643 }, { "epoch": 3.662762022194821, "grad_norm": 0.08702104538679123, "learning_rate": 0.01, "loss": 2.0147, "step": 35646 }, { "epoch": 3.663070283600493, "grad_norm": 0.1481538712978363, "learning_rate": 0.01, "loss": 2.0028, "step": 35649 }, { "epoch": 3.6633785450061653, "grad_norm": 0.05475825071334839, "learning_rate": 0.01, "loss": 1.991, "step": 35652 }, { "epoch": 3.663686806411837, "grad_norm": 0.0481879860162735, "learning_rate": 0.01, "loss": 2.0244, "step": 35655 }, { "epoch": 3.6639950678175093, "grad_norm": 0.07099757343530655, "learning_rate": 0.01, "loss": 2.027, "step": 35658 }, { "epoch": 3.6643033292231815, "grad_norm": 0.09161633253097534, "learning_rate": 0.01, "loss": 2.0139, "step": 35661 }, { "epoch": 3.664611590628853, "grad_norm": 0.06675172597169876, "learning_rate": 0.01, "loss": 1.9944, "step": 35664 }, { "epoch": 3.6649198520345254, "grad_norm": 0.06166239082813263, "learning_rate": 0.01, "loss": 1.992, "step": 35667 }, { "epoch": 3.665228113440197, "grad_norm": 0.07947093993425369, "learning_rate": 0.01, "loss": 2.0233, "step": 35670 }, { "epoch": 3.6655363748458694, "grad_norm": 0.07639001309871674, "learning_rate": 0.01, "loss": 2.0246, "step": 35673 }, { "epoch": 3.665844636251541, "grad_norm": 0.11109112948179245, "learning_rate": 0.01, "loss": 2.033, "step": 35676 }, { "epoch": 3.6661528976572133, "grad_norm": 0.0517781600356102, "learning_rate": 0.01, "loss": 2.0, "step": 35679 }, { "epoch": 3.6664611590628855, "grad_norm": 0.05615885183215141, "learning_rate": 0.01, "loss": 1.9858, "step": 35682 }, { "epoch": 3.6667694204685573, "grad_norm": 0.054977256804704666, "learning_rate": 0.01, "loss": 1.983, "step": 35685 }, { "epoch": 3.6670776818742294, "grad_norm": 0.08208134025335312, "learning_rate": 0.01, "loss": 2.0038, "step": 35688 }, { "epoch": 3.667385943279901, "grad_norm": 0.053141169250011444, "learning_rate": 0.01, "loss": 2.0, "step": 35691 }, { "epoch": 3.6676942046855734, "grad_norm": 0.0867038443684578, "learning_rate": 0.01, "loss": 1.9992, "step": 35694 }, { "epoch": 3.668002466091245, "grad_norm": 0.04153072461485863, "learning_rate": 0.01, "loss": 2.0156, "step": 35697 }, { "epoch": 3.6683107274969173, "grad_norm": 0.050722386687994, "learning_rate": 0.01, "loss": 2.0029, "step": 35700 }, { "epoch": 3.6686189889025895, "grad_norm": 0.036444198340177536, "learning_rate": 0.01, "loss": 1.98, "step": 35703 }, { "epoch": 3.6689272503082613, "grad_norm": 0.06851452589035034, "learning_rate": 0.01, "loss": 2.0365, "step": 35706 }, { "epoch": 3.6692355117139335, "grad_norm": 0.05442693457007408, "learning_rate": 0.01, "loss": 1.9783, "step": 35709 }, { "epoch": 3.6695437731196057, "grad_norm": 0.04789496958255768, "learning_rate": 0.01, "loss": 1.9919, "step": 35712 }, { "epoch": 3.6698520345252774, "grad_norm": 0.05229181796312332, "learning_rate": 0.01, "loss": 2.0011, "step": 35715 }, { "epoch": 3.670160295930949, "grad_norm": 0.05596926435828209, "learning_rate": 0.01, "loss": 1.9906, "step": 35718 }, { "epoch": 3.6704685573366214, "grad_norm": 0.04481478035449982, "learning_rate": 0.01, "loss": 2.0036, "step": 35721 }, { "epoch": 3.6707768187422936, "grad_norm": 0.053352151066064835, "learning_rate": 0.01, "loss": 1.9952, "step": 35724 }, { "epoch": 3.6710850801479653, "grad_norm": 0.07115049660205841, "learning_rate": 0.01, "loss": 1.9967, "step": 35727 }, { "epoch": 3.6713933415536375, "grad_norm": 0.06887483596801758, "learning_rate": 0.01, "loss": 1.9872, "step": 35730 }, { "epoch": 3.6717016029593097, "grad_norm": 0.14638669788837433, "learning_rate": 0.01, "loss": 2.0211, "step": 35733 }, { "epoch": 3.6720098643649814, "grad_norm": 0.06305021792650223, "learning_rate": 0.01, "loss": 2.0146, "step": 35736 }, { "epoch": 3.6723181257706536, "grad_norm": 0.041157372295856476, "learning_rate": 0.01, "loss": 2.0066, "step": 35739 }, { "epoch": 3.6726263871763254, "grad_norm": 0.10821440070867538, "learning_rate": 0.01, "loss": 1.9949, "step": 35742 }, { "epoch": 3.6729346485819976, "grad_norm": 0.06525052338838577, "learning_rate": 0.01, "loss": 2.0093, "step": 35745 }, { "epoch": 3.6732429099876693, "grad_norm": 0.03519715368747711, "learning_rate": 0.01, "loss": 2.0086, "step": 35748 }, { "epoch": 3.6735511713933415, "grad_norm": 0.0548231340944767, "learning_rate": 0.01, "loss": 1.9969, "step": 35751 }, { "epoch": 3.6738594327990137, "grad_norm": 0.08685827255249023, "learning_rate": 0.01, "loss": 1.9985, "step": 35754 }, { "epoch": 3.6741676942046855, "grad_norm": 0.05920972675085068, "learning_rate": 0.01, "loss": 1.9882, "step": 35757 }, { "epoch": 3.6744759556103577, "grad_norm": 0.060545727610588074, "learning_rate": 0.01, "loss": 2.0057, "step": 35760 }, { "epoch": 3.67478421701603, "grad_norm": 0.05436963587999344, "learning_rate": 0.01, "loss": 2.0023, "step": 35763 }, { "epoch": 3.6750924784217016, "grad_norm": 0.05975078046321869, "learning_rate": 0.01, "loss": 2.0112, "step": 35766 }, { "epoch": 3.6754007398273734, "grad_norm": 0.05793645977973938, "learning_rate": 0.01, "loss": 1.9861, "step": 35769 }, { "epoch": 3.6757090012330456, "grad_norm": 0.07932164520025253, "learning_rate": 0.01, "loss": 1.9808, "step": 35772 }, { "epoch": 3.6760172626387178, "grad_norm": 0.05182117223739624, "learning_rate": 0.01, "loss": 1.9987, "step": 35775 }, { "epoch": 3.6763255240443895, "grad_norm": 0.05408613011240959, "learning_rate": 0.01, "loss": 1.9912, "step": 35778 }, { "epoch": 3.6766337854500617, "grad_norm": 0.0396132618188858, "learning_rate": 0.01, "loss": 1.9922, "step": 35781 }, { "epoch": 3.676942046855734, "grad_norm": 0.04768325388431549, "learning_rate": 0.01, "loss": 2.0055, "step": 35784 }, { "epoch": 3.6772503082614056, "grad_norm": 0.09452294558286667, "learning_rate": 0.01, "loss": 1.9927, "step": 35787 }, { "epoch": 3.677558569667078, "grad_norm": 0.09846623986959457, "learning_rate": 0.01, "loss": 2.0182, "step": 35790 }, { "epoch": 3.6778668310727496, "grad_norm": 0.1176767498254776, "learning_rate": 0.01, "loss": 1.989, "step": 35793 }, { "epoch": 3.678175092478422, "grad_norm": 0.07070811092853546, "learning_rate": 0.01, "loss": 2.0283, "step": 35796 }, { "epoch": 3.6784833538840935, "grad_norm": 0.04359079524874687, "learning_rate": 0.01, "loss": 1.9875, "step": 35799 }, { "epoch": 3.6787916152897657, "grad_norm": 0.05718059092760086, "learning_rate": 0.01, "loss": 2.0186, "step": 35802 }, { "epoch": 3.679099876695438, "grad_norm": 0.03475908935070038, "learning_rate": 0.01, "loss": 1.9996, "step": 35805 }, { "epoch": 3.6794081381011097, "grad_norm": 0.03629058972001076, "learning_rate": 0.01, "loss": 2.0111, "step": 35808 }, { "epoch": 3.679716399506782, "grad_norm": 0.11145780235528946, "learning_rate": 0.01, "loss": 1.989, "step": 35811 }, { "epoch": 3.6800246609124536, "grad_norm": 0.07155486941337585, "learning_rate": 0.01, "loss": 2.025, "step": 35814 }, { "epoch": 3.680332922318126, "grad_norm": 0.05105472728610039, "learning_rate": 0.01, "loss": 2.0109, "step": 35817 }, { "epoch": 3.6806411837237976, "grad_norm": 0.04478003829717636, "learning_rate": 0.01, "loss": 2.0011, "step": 35820 }, { "epoch": 3.6809494451294698, "grad_norm": 0.03695627674460411, "learning_rate": 0.01, "loss": 2.0145, "step": 35823 }, { "epoch": 3.681257706535142, "grad_norm": 0.04222894459962845, "learning_rate": 0.01, "loss": 1.9761, "step": 35826 }, { "epoch": 3.6815659679408137, "grad_norm": 0.04362770542502403, "learning_rate": 0.01, "loss": 2.0082, "step": 35829 }, { "epoch": 3.681874229346486, "grad_norm": 0.07785011827945709, "learning_rate": 0.01, "loss": 1.9881, "step": 35832 }, { "epoch": 3.682182490752158, "grad_norm": 0.08382470905780792, "learning_rate": 0.01, "loss": 1.9778, "step": 35835 }, { "epoch": 3.68249075215783, "grad_norm": 0.09897731244564056, "learning_rate": 0.01, "loss": 1.9845, "step": 35838 }, { "epoch": 3.6827990135635016, "grad_norm": 0.05617908388376236, "learning_rate": 0.01, "loss": 1.977, "step": 35841 }, { "epoch": 3.683107274969174, "grad_norm": 0.07543769478797913, "learning_rate": 0.01, "loss": 1.9984, "step": 35844 }, { "epoch": 3.683415536374846, "grad_norm": 0.04036831855773926, "learning_rate": 0.01, "loss": 1.9944, "step": 35847 }, { "epoch": 3.6837237977805177, "grad_norm": 0.03254294767975807, "learning_rate": 0.01, "loss": 1.9971, "step": 35850 }, { "epoch": 3.68403205918619, "grad_norm": 0.08003180474042892, "learning_rate": 0.01, "loss": 1.9754, "step": 35853 }, { "epoch": 3.684340320591862, "grad_norm": 0.0821150541305542, "learning_rate": 0.01, "loss": 1.9944, "step": 35856 }, { "epoch": 3.684648581997534, "grad_norm": 0.15571752190589905, "learning_rate": 0.01, "loss": 1.997, "step": 35859 }, { "epoch": 3.684956843403206, "grad_norm": 0.07748742401599884, "learning_rate": 0.01, "loss": 2.001, "step": 35862 }, { "epoch": 3.685265104808878, "grad_norm": 0.08278176933526993, "learning_rate": 0.01, "loss": 2.0004, "step": 35865 }, { "epoch": 3.68557336621455, "grad_norm": 0.08014661818742752, "learning_rate": 0.01, "loss": 1.9751, "step": 35868 }, { "epoch": 3.6858816276202218, "grad_norm": 0.03511490300297737, "learning_rate": 0.01, "loss": 2.006, "step": 35871 }, { "epoch": 3.686189889025894, "grad_norm": 0.04790462180972099, "learning_rate": 0.01, "loss": 2.0058, "step": 35874 }, { "epoch": 3.686498150431566, "grad_norm": 0.05917114019393921, "learning_rate": 0.01, "loss": 1.9945, "step": 35877 }, { "epoch": 3.686806411837238, "grad_norm": 0.09026920050382614, "learning_rate": 0.01, "loss": 2.0278, "step": 35880 }, { "epoch": 3.68711467324291, "grad_norm": 0.07264431565999985, "learning_rate": 0.01, "loss": 1.9974, "step": 35883 }, { "epoch": 3.687422934648582, "grad_norm": 0.03798295557498932, "learning_rate": 0.01, "loss": 1.9907, "step": 35886 }, { "epoch": 3.687731196054254, "grad_norm": 0.1147773265838623, "learning_rate": 0.01, "loss": 1.9827, "step": 35889 }, { "epoch": 3.688039457459926, "grad_norm": 0.05281820893287659, "learning_rate": 0.01, "loss": 1.9969, "step": 35892 }, { "epoch": 3.688347718865598, "grad_norm": 0.0617707259953022, "learning_rate": 0.01, "loss": 1.9983, "step": 35895 }, { "epoch": 3.68865598027127, "grad_norm": 0.05084836110472679, "learning_rate": 0.01, "loss": 2.0124, "step": 35898 }, { "epoch": 3.688964241676942, "grad_norm": 0.04952919855713844, "learning_rate": 0.01, "loss": 1.9969, "step": 35901 }, { "epoch": 3.689272503082614, "grad_norm": 0.08321710675954819, "learning_rate": 0.01, "loss": 2.0127, "step": 35904 }, { "epoch": 3.6895807644882863, "grad_norm": 0.04074413329362869, "learning_rate": 0.01, "loss": 2.0056, "step": 35907 }, { "epoch": 3.689889025893958, "grad_norm": 0.11404154449701309, "learning_rate": 0.01, "loss": 2.0102, "step": 35910 }, { "epoch": 3.69019728729963, "grad_norm": 0.07283198833465576, "learning_rate": 0.01, "loss": 2.011, "step": 35913 }, { "epoch": 3.690505548705302, "grad_norm": 0.0645599216222763, "learning_rate": 0.01, "loss": 2.0343, "step": 35916 }, { "epoch": 3.690813810110974, "grad_norm": 0.0537913516163826, "learning_rate": 0.01, "loss": 2.0142, "step": 35919 }, { "epoch": 3.691122071516646, "grad_norm": 0.04984259232878685, "learning_rate": 0.01, "loss": 1.9863, "step": 35922 }, { "epoch": 3.691430332922318, "grad_norm": 0.05349741503596306, "learning_rate": 0.01, "loss": 1.9853, "step": 35925 }, { "epoch": 3.6917385943279903, "grad_norm": 0.05882362276315689, "learning_rate": 0.01, "loss": 2.0053, "step": 35928 }, { "epoch": 3.692046855733662, "grad_norm": 0.0809783861041069, "learning_rate": 0.01, "loss": 1.9967, "step": 35931 }, { "epoch": 3.6923551171393343, "grad_norm": 0.07075916230678558, "learning_rate": 0.01, "loss": 1.9872, "step": 35934 }, { "epoch": 3.692663378545006, "grad_norm": 0.058204181492328644, "learning_rate": 0.01, "loss": 1.9847, "step": 35937 }, { "epoch": 3.6929716399506782, "grad_norm": 0.10149111598730087, "learning_rate": 0.01, "loss": 2.0006, "step": 35940 }, { "epoch": 3.69327990135635, "grad_norm": 0.0722658559679985, "learning_rate": 0.01, "loss": 2.0364, "step": 35943 }, { "epoch": 3.693588162762022, "grad_norm": 0.08981306105852127, "learning_rate": 0.01, "loss": 2.0254, "step": 35946 }, { "epoch": 3.6938964241676944, "grad_norm": 0.04798738285899162, "learning_rate": 0.01, "loss": 2.0099, "step": 35949 }, { "epoch": 3.694204685573366, "grad_norm": 0.0338914729654789, "learning_rate": 0.01, "loss": 1.9843, "step": 35952 }, { "epoch": 3.6945129469790383, "grad_norm": 0.04813714697957039, "learning_rate": 0.01, "loss": 1.9948, "step": 35955 }, { "epoch": 3.6948212083847105, "grad_norm": 0.11086717247962952, "learning_rate": 0.01, "loss": 1.9963, "step": 35958 }, { "epoch": 3.6951294697903823, "grad_norm": 0.09754761308431625, "learning_rate": 0.01, "loss": 1.9965, "step": 35961 }, { "epoch": 3.695437731196054, "grad_norm": 0.07381515204906464, "learning_rate": 0.01, "loss": 1.9921, "step": 35964 }, { "epoch": 3.695745992601726, "grad_norm": 0.0501638762652874, "learning_rate": 0.01, "loss": 2.0231, "step": 35967 }, { "epoch": 3.6960542540073984, "grad_norm": 0.04152151197195053, "learning_rate": 0.01, "loss": 2.0033, "step": 35970 }, { "epoch": 3.69636251541307, "grad_norm": 0.06455028802156448, "learning_rate": 0.01, "loss": 2.0266, "step": 35973 }, { "epoch": 3.6966707768187423, "grad_norm": 0.07925435900688171, "learning_rate": 0.01, "loss": 2.0153, "step": 35976 }, { "epoch": 3.6969790382244145, "grad_norm": 0.17305727303028107, "learning_rate": 0.01, "loss": 1.9923, "step": 35979 }, { "epoch": 3.6972872996300863, "grad_norm": 0.04664710536599159, "learning_rate": 0.01, "loss": 1.9811, "step": 35982 }, { "epoch": 3.6975955610357585, "grad_norm": 0.03971162810921669, "learning_rate": 0.01, "loss": 2.0065, "step": 35985 }, { "epoch": 3.6979038224414302, "grad_norm": 0.04888729378581047, "learning_rate": 0.01, "loss": 1.9901, "step": 35988 }, { "epoch": 3.6982120838471024, "grad_norm": 0.03428930416703224, "learning_rate": 0.01, "loss": 1.9902, "step": 35991 }, { "epoch": 3.698520345252774, "grad_norm": 0.04609334468841553, "learning_rate": 0.01, "loss": 1.9818, "step": 35994 }, { "epoch": 3.6988286066584464, "grad_norm": 0.06719902157783508, "learning_rate": 0.01, "loss": 2.0144, "step": 35997 }, { "epoch": 3.6991368680641186, "grad_norm": 0.0409255288541317, "learning_rate": 0.01, "loss": 1.9893, "step": 36000 }, { "epoch": 3.6994451294697903, "grad_norm": 0.03683490306138992, "learning_rate": 0.01, "loss": 1.9709, "step": 36003 }, { "epoch": 3.6997533908754625, "grad_norm": 0.050057847052812576, "learning_rate": 0.01, "loss": 1.9875, "step": 36006 }, { "epoch": 3.7000616522811343, "grad_norm": 0.06051032990217209, "learning_rate": 0.01, "loss": 2.0034, "step": 36009 }, { "epoch": 3.7003699136868065, "grad_norm": 0.141363725066185, "learning_rate": 0.01, "loss": 1.9931, "step": 36012 }, { "epoch": 3.700678175092478, "grad_norm": 0.04017074033617973, "learning_rate": 0.01, "loss": 1.9919, "step": 36015 }, { "epoch": 3.7009864364981504, "grad_norm": 0.05374070256948471, "learning_rate": 0.01, "loss": 1.9975, "step": 36018 }, { "epoch": 3.7012946979038226, "grad_norm": 0.07037216424942017, "learning_rate": 0.01, "loss": 2.014, "step": 36021 }, { "epoch": 3.7016029593094943, "grad_norm": 0.06400209665298462, "learning_rate": 0.01, "loss": 2.0035, "step": 36024 }, { "epoch": 3.7019112207151665, "grad_norm": 0.04186910763382912, "learning_rate": 0.01, "loss": 1.9881, "step": 36027 }, { "epoch": 3.7022194821208387, "grad_norm": 0.05100405216217041, "learning_rate": 0.01, "loss": 2.0009, "step": 36030 }, { "epoch": 3.7025277435265105, "grad_norm": 0.06786265224218369, "learning_rate": 0.01, "loss": 2.0041, "step": 36033 }, { "epoch": 3.7028360049321822, "grad_norm": 0.058080319315195084, "learning_rate": 0.01, "loss": 1.9748, "step": 36036 }, { "epoch": 3.7031442663378544, "grad_norm": 0.09919019788503647, "learning_rate": 0.01, "loss": 2.0016, "step": 36039 }, { "epoch": 3.7034525277435266, "grad_norm": 0.08252627402544022, "learning_rate": 0.01, "loss": 2.0154, "step": 36042 }, { "epoch": 3.7037607891491984, "grad_norm": 0.06026385724544525, "learning_rate": 0.01, "loss": 1.9877, "step": 36045 }, { "epoch": 3.7040690505548706, "grad_norm": 0.08052903413772583, "learning_rate": 0.01, "loss": 1.9951, "step": 36048 }, { "epoch": 3.7043773119605428, "grad_norm": 0.0790569856762886, "learning_rate": 0.01, "loss": 1.9877, "step": 36051 }, { "epoch": 3.7046855733662145, "grad_norm": 0.04464380070567131, "learning_rate": 0.01, "loss": 2.018, "step": 36054 }, { "epoch": 3.7049938347718867, "grad_norm": 0.06567423790693283, "learning_rate": 0.01, "loss": 1.9886, "step": 36057 }, { "epoch": 3.7053020961775585, "grad_norm": 0.06602831929922104, "learning_rate": 0.01, "loss": 1.9934, "step": 36060 }, { "epoch": 3.7056103575832307, "grad_norm": 0.06838707625865936, "learning_rate": 0.01, "loss": 1.9853, "step": 36063 }, { "epoch": 3.7059186189889024, "grad_norm": 0.09807326644659042, "learning_rate": 0.01, "loss": 1.9919, "step": 36066 }, { "epoch": 3.7062268803945746, "grad_norm": 0.06948495656251907, "learning_rate": 0.01, "loss": 1.992, "step": 36069 }, { "epoch": 3.706535141800247, "grad_norm": 0.07893040776252747, "learning_rate": 0.01, "loss": 1.9804, "step": 36072 }, { "epoch": 3.7068434032059185, "grad_norm": 0.04618433490395546, "learning_rate": 0.01, "loss": 2.0149, "step": 36075 }, { "epoch": 3.7071516646115907, "grad_norm": 0.04784570261836052, "learning_rate": 0.01, "loss": 2.0144, "step": 36078 }, { "epoch": 3.7074599260172625, "grad_norm": 0.03364891558885574, "learning_rate": 0.01, "loss": 1.9914, "step": 36081 }, { "epoch": 3.7077681874229347, "grad_norm": 0.03379710018634796, "learning_rate": 0.01, "loss": 1.994, "step": 36084 }, { "epoch": 3.7080764488286064, "grad_norm": 0.03952077031135559, "learning_rate": 0.01, "loss": 2.0159, "step": 36087 }, { "epoch": 3.7083847102342786, "grad_norm": 0.07781558483839035, "learning_rate": 0.01, "loss": 2.0045, "step": 36090 }, { "epoch": 3.708692971639951, "grad_norm": 0.04583312198519707, "learning_rate": 0.01, "loss": 1.9942, "step": 36093 }, { "epoch": 3.7090012330456226, "grad_norm": 0.08673562854528427, "learning_rate": 0.01, "loss": 1.9915, "step": 36096 }, { "epoch": 3.7093094944512948, "grad_norm": 0.05132952705025673, "learning_rate": 0.01, "loss": 1.9993, "step": 36099 }, { "epoch": 3.709617755856967, "grad_norm": 0.09407106041908264, "learning_rate": 0.01, "loss": 2.0028, "step": 36102 }, { "epoch": 3.7099260172626387, "grad_norm": 0.052429962903261185, "learning_rate": 0.01, "loss": 1.9971, "step": 36105 }, { "epoch": 3.7102342786683105, "grad_norm": 0.10452061146497726, "learning_rate": 0.01, "loss": 1.991, "step": 36108 }, { "epoch": 3.7105425400739827, "grad_norm": 0.03275587409734726, "learning_rate": 0.01, "loss": 1.9941, "step": 36111 }, { "epoch": 3.710850801479655, "grad_norm": 0.10667680203914642, "learning_rate": 0.01, "loss": 1.9775, "step": 36114 }, { "epoch": 3.7111590628853266, "grad_norm": 0.08240865170955658, "learning_rate": 0.01, "loss": 2.0113, "step": 36117 }, { "epoch": 3.711467324290999, "grad_norm": 0.0850924402475357, "learning_rate": 0.01, "loss": 1.9963, "step": 36120 }, { "epoch": 3.711775585696671, "grad_norm": 0.03906584531068802, "learning_rate": 0.01, "loss": 1.9695, "step": 36123 }, { "epoch": 3.7120838471023427, "grad_norm": 0.04797567054629326, "learning_rate": 0.01, "loss": 2.0276, "step": 36126 }, { "epoch": 3.712392108508015, "grad_norm": 0.042494967579841614, "learning_rate": 0.01, "loss": 1.977, "step": 36129 }, { "epoch": 3.7127003699136867, "grad_norm": 0.05491645634174347, "learning_rate": 0.01, "loss": 2.0033, "step": 36132 }, { "epoch": 3.713008631319359, "grad_norm": 0.10729935020208359, "learning_rate": 0.01, "loss": 2.0001, "step": 36135 }, { "epoch": 3.7133168927250306, "grad_norm": 0.043170586228370667, "learning_rate": 0.01, "loss": 2.0007, "step": 36138 }, { "epoch": 3.713625154130703, "grad_norm": 0.08662062138319016, "learning_rate": 0.01, "loss": 2.0094, "step": 36141 }, { "epoch": 3.713933415536375, "grad_norm": 0.11379045993089676, "learning_rate": 0.01, "loss": 1.9953, "step": 36144 }, { "epoch": 3.7142416769420468, "grad_norm": 0.03698570281267166, "learning_rate": 0.01, "loss": 1.9863, "step": 36147 }, { "epoch": 3.714549938347719, "grad_norm": 0.11952532082796097, "learning_rate": 0.01, "loss": 1.9923, "step": 36150 }, { "epoch": 3.7148581997533907, "grad_norm": 0.06563457101583481, "learning_rate": 0.01, "loss": 1.9981, "step": 36153 }, { "epoch": 3.715166461159063, "grad_norm": 0.07150832563638687, "learning_rate": 0.01, "loss": 2.012, "step": 36156 }, { "epoch": 3.7154747225647347, "grad_norm": 0.04657771810889244, "learning_rate": 0.01, "loss": 2.0056, "step": 36159 }, { "epoch": 3.715782983970407, "grad_norm": 0.055245641618967056, "learning_rate": 0.01, "loss": 2.0095, "step": 36162 }, { "epoch": 3.716091245376079, "grad_norm": 0.040487829595804214, "learning_rate": 0.01, "loss": 2.0027, "step": 36165 }, { "epoch": 3.716399506781751, "grad_norm": 0.12351846694946289, "learning_rate": 0.01, "loss": 1.9858, "step": 36168 }, { "epoch": 3.716707768187423, "grad_norm": 0.053484536707401276, "learning_rate": 0.01, "loss": 2.0019, "step": 36171 }, { "epoch": 3.717016029593095, "grad_norm": 0.06141964718699455, "learning_rate": 0.01, "loss": 2.0013, "step": 36174 }, { "epoch": 3.717324290998767, "grad_norm": 0.09509648382663727, "learning_rate": 0.01, "loss": 1.9949, "step": 36177 }, { "epoch": 3.717632552404439, "grad_norm": 0.0969184935092926, "learning_rate": 0.01, "loss": 2.0168, "step": 36180 }, { "epoch": 3.717940813810111, "grad_norm": 0.0884992703795433, "learning_rate": 0.01, "loss": 2.0186, "step": 36183 }, { "epoch": 3.718249075215783, "grad_norm": 0.07382161915302277, "learning_rate": 0.01, "loss": 1.9974, "step": 36186 }, { "epoch": 3.718557336621455, "grad_norm": 0.07776398956775665, "learning_rate": 0.01, "loss": 2.0259, "step": 36189 }, { "epoch": 3.718865598027127, "grad_norm": 0.07049771398305893, "learning_rate": 0.01, "loss": 2.0103, "step": 36192 }, { "epoch": 3.719173859432799, "grad_norm": 0.08669892698526382, "learning_rate": 0.01, "loss": 1.9649, "step": 36195 }, { "epoch": 3.719482120838471, "grad_norm": 0.05756537616252899, "learning_rate": 0.01, "loss": 2.0025, "step": 36198 }, { "epoch": 3.719790382244143, "grad_norm": 0.06954919546842575, "learning_rate": 0.01, "loss": 2.0101, "step": 36201 }, { "epoch": 3.720098643649815, "grad_norm": 0.09297992289066315, "learning_rate": 0.01, "loss": 1.9945, "step": 36204 }, { "epoch": 3.720406905055487, "grad_norm": 0.07941876351833344, "learning_rate": 0.01, "loss": 2.0001, "step": 36207 }, { "epoch": 3.720715166461159, "grad_norm": 0.0698881521821022, "learning_rate": 0.01, "loss": 1.9782, "step": 36210 }, { "epoch": 3.721023427866831, "grad_norm": 0.09939584881067276, "learning_rate": 0.01, "loss": 2.0096, "step": 36213 }, { "epoch": 3.7213316892725032, "grad_norm": 0.059318870306015015, "learning_rate": 0.01, "loss": 1.9965, "step": 36216 }, { "epoch": 3.721639950678175, "grad_norm": 0.06797734647989273, "learning_rate": 0.01, "loss": 1.992, "step": 36219 }, { "epoch": 3.721948212083847, "grad_norm": 0.03932074084877968, "learning_rate": 0.01, "loss": 2.012, "step": 36222 }, { "epoch": 3.7222564734895194, "grad_norm": 0.07458118349313736, "learning_rate": 0.01, "loss": 2.0184, "step": 36225 }, { "epoch": 3.722564734895191, "grad_norm": 0.09383490681648254, "learning_rate": 0.01, "loss": 2.0282, "step": 36228 }, { "epoch": 3.722872996300863, "grad_norm": 0.06527485698461533, "learning_rate": 0.01, "loss": 2.0048, "step": 36231 }, { "epoch": 3.723181257706535, "grad_norm": 0.050437018275260925, "learning_rate": 0.01, "loss": 1.9992, "step": 36234 }, { "epoch": 3.7234895191122073, "grad_norm": 0.04380796477198601, "learning_rate": 0.01, "loss": 2.0181, "step": 36237 }, { "epoch": 3.723797780517879, "grad_norm": 0.04857274517416954, "learning_rate": 0.01, "loss": 2.0172, "step": 36240 }, { "epoch": 3.724106041923551, "grad_norm": 0.032346438616514206, "learning_rate": 0.01, "loss": 1.9889, "step": 36243 }, { "epoch": 3.7244143033292234, "grad_norm": 0.045984454452991486, "learning_rate": 0.01, "loss": 1.9947, "step": 36246 }, { "epoch": 3.724722564734895, "grad_norm": 0.058199312537908554, "learning_rate": 0.01, "loss": 2.0142, "step": 36249 }, { "epoch": 3.7250308261405674, "grad_norm": 0.0391390286386013, "learning_rate": 0.01, "loss": 2.0105, "step": 36252 }, { "epoch": 3.725339087546239, "grad_norm": 0.058478716760873795, "learning_rate": 0.01, "loss": 2.0171, "step": 36255 }, { "epoch": 3.7256473489519113, "grad_norm": 0.10977184772491455, "learning_rate": 0.01, "loss": 1.995, "step": 36258 }, { "epoch": 3.725955610357583, "grad_norm": 0.06433524191379547, "learning_rate": 0.01, "loss": 2.0038, "step": 36261 }, { "epoch": 3.7262638717632552, "grad_norm": 0.07871260493993759, "learning_rate": 0.01, "loss": 1.9805, "step": 36264 }, { "epoch": 3.7265721331689274, "grad_norm": 0.09282384812831879, "learning_rate": 0.01, "loss": 1.991, "step": 36267 }, { "epoch": 3.726880394574599, "grad_norm": 0.03165189549326897, "learning_rate": 0.01, "loss": 1.9864, "step": 36270 }, { "epoch": 3.7271886559802714, "grad_norm": 0.0789475068449974, "learning_rate": 0.01, "loss": 2.0143, "step": 36273 }, { "epoch": 3.727496917385943, "grad_norm": 0.13641710579395294, "learning_rate": 0.01, "loss": 1.996, "step": 36276 }, { "epoch": 3.7278051787916153, "grad_norm": 0.08470667898654938, "learning_rate": 0.01, "loss": 1.9953, "step": 36279 }, { "epoch": 3.728113440197287, "grad_norm": 0.09110618382692337, "learning_rate": 0.01, "loss": 2.005, "step": 36282 }, { "epoch": 3.7284217016029593, "grad_norm": 0.079217828810215, "learning_rate": 0.01, "loss": 1.9694, "step": 36285 }, { "epoch": 3.7287299630086315, "grad_norm": 0.03365809842944145, "learning_rate": 0.01, "loss": 1.998, "step": 36288 }, { "epoch": 3.729038224414303, "grad_norm": 0.0691695362329483, "learning_rate": 0.01, "loss": 1.9971, "step": 36291 }, { "epoch": 3.7293464858199754, "grad_norm": 0.03766563534736633, "learning_rate": 0.01, "loss": 2.0054, "step": 36294 }, { "epoch": 3.7296547472256476, "grad_norm": 0.13410314917564392, "learning_rate": 0.01, "loss": 2.0295, "step": 36297 }, { "epoch": 3.7299630086313194, "grad_norm": 0.08052795380353928, "learning_rate": 0.01, "loss": 1.988, "step": 36300 }, { "epoch": 3.730271270036991, "grad_norm": 0.04205624386668205, "learning_rate": 0.01, "loss": 2.0013, "step": 36303 }, { "epoch": 3.7305795314426633, "grad_norm": 0.055557433515787125, "learning_rate": 0.01, "loss": 2.0097, "step": 36306 }, { "epoch": 3.7308877928483355, "grad_norm": 0.08415018022060394, "learning_rate": 0.01, "loss": 2.0069, "step": 36309 }, { "epoch": 3.7311960542540072, "grad_norm": 0.057384416460990906, "learning_rate": 0.01, "loss": 2.0164, "step": 36312 }, { "epoch": 3.7315043156596794, "grad_norm": 0.046833690255880356, "learning_rate": 0.01, "loss": 1.9747, "step": 36315 }, { "epoch": 3.7318125770653516, "grad_norm": 0.0527094230055809, "learning_rate": 0.01, "loss": 1.9959, "step": 36318 }, { "epoch": 3.7321208384710234, "grad_norm": 0.04863157868385315, "learning_rate": 0.01, "loss": 2.0082, "step": 36321 }, { "epoch": 3.7324290998766956, "grad_norm": 0.05578998848795891, "learning_rate": 0.01, "loss": 1.9724, "step": 36324 }, { "epoch": 3.7327373612823673, "grad_norm": 0.08895915746688843, "learning_rate": 0.01, "loss": 2.0025, "step": 36327 }, { "epoch": 3.7330456226880395, "grad_norm": 0.14340227842330933, "learning_rate": 0.01, "loss": 1.999, "step": 36330 }, { "epoch": 3.7333538840937113, "grad_norm": 0.074753038585186, "learning_rate": 0.01, "loss": 1.9733, "step": 36333 }, { "epoch": 3.7336621454993835, "grad_norm": 0.05069069564342499, "learning_rate": 0.01, "loss": 2.025, "step": 36336 }, { "epoch": 3.7339704069050557, "grad_norm": 0.0417485274374485, "learning_rate": 0.01, "loss": 2.0078, "step": 36339 }, { "epoch": 3.7342786683107274, "grad_norm": 0.04727747291326523, "learning_rate": 0.01, "loss": 2.0165, "step": 36342 }, { "epoch": 3.7345869297163996, "grad_norm": 0.056336645036935806, "learning_rate": 0.01, "loss": 1.9956, "step": 36345 }, { "epoch": 3.7348951911220714, "grad_norm": 0.09889603406190872, "learning_rate": 0.01, "loss": 1.9908, "step": 36348 }, { "epoch": 3.7352034525277436, "grad_norm": 0.06653422117233276, "learning_rate": 0.01, "loss": 1.9675, "step": 36351 }, { "epoch": 3.7355117139334153, "grad_norm": 0.05115421861410141, "learning_rate": 0.01, "loss": 2.0004, "step": 36354 }, { "epoch": 3.7358199753390875, "grad_norm": 0.04250407963991165, "learning_rate": 0.01, "loss": 1.9803, "step": 36357 }, { "epoch": 3.7361282367447597, "grad_norm": 0.037854380905628204, "learning_rate": 0.01, "loss": 2.0014, "step": 36360 }, { "epoch": 3.7364364981504314, "grad_norm": 0.06039261072874069, "learning_rate": 0.01, "loss": 2.0364, "step": 36363 }, { "epoch": 3.7367447595561036, "grad_norm": 0.09946703165769577, "learning_rate": 0.01, "loss": 2.0169, "step": 36366 }, { "epoch": 3.737053020961776, "grad_norm": 0.10038801282644272, "learning_rate": 0.01, "loss": 2.012, "step": 36369 }, { "epoch": 3.7373612823674476, "grad_norm": 0.07845285534858704, "learning_rate": 0.01, "loss": 1.9782, "step": 36372 }, { "epoch": 3.7376695437731193, "grad_norm": 0.08079241961240768, "learning_rate": 0.01, "loss": 1.994, "step": 36375 }, { "epoch": 3.7379778051787915, "grad_norm": 0.04414941743016243, "learning_rate": 0.01, "loss": 2.0103, "step": 36378 }, { "epoch": 3.7382860665844637, "grad_norm": 0.14685457944869995, "learning_rate": 0.01, "loss": 1.9965, "step": 36381 }, { "epoch": 3.7385943279901355, "grad_norm": 0.04633990302681923, "learning_rate": 0.01, "loss": 2.0, "step": 36384 }, { "epoch": 3.7389025893958077, "grad_norm": 0.04103631526231766, "learning_rate": 0.01, "loss": 2.0023, "step": 36387 }, { "epoch": 3.73921085080148, "grad_norm": 0.03546123206615448, "learning_rate": 0.01, "loss": 1.9901, "step": 36390 }, { "epoch": 3.7395191122071516, "grad_norm": 0.08792685717344284, "learning_rate": 0.01, "loss": 1.9955, "step": 36393 }, { "epoch": 3.739827373612824, "grad_norm": 0.09209249913692474, "learning_rate": 0.01, "loss": 2.0211, "step": 36396 }, { "epoch": 3.7401356350184956, "grad_norm": 0.0480416901409626, "learning_rate": 0.01, "loss": 1.9943, "step": 36399 }, { "epoch": 3.7404438964241677, "grad_norm": 0.03383079916238785, "learning_rate": 0.01, "loss": 2.0016, "step": 36402 }, { "epoch": 3.7407521578298395, "grad_norm": 0.06483932584524155, "learning_rate": 0.01, "loss": 2.0077, "step": 36405 }, { "epoch": 3.7410604192355117, "grad_norm": 0.04043160006403923, "learning_rate": 0.01, "loss": 1.994, "step": 36408 }, { "epoch": 3.741368680641184, "grad_norm": 0.047389864921569824, "learning_rate": 0.01, "loss": 1.9923, "step": 36411 }, { "epoch": 3.7416769420468556, "grad_norm": 0.04405448958277702, "learning_rate": 0.01, "loss": 1.9969, "step": 36414 }, { "epoch": 3.741985203452528, "grad_norm": 0.03635013848543167, "learning_rate": 0.01, "loss": 2.0013, "step": 36417 }, { "epoch": 3.7422934648582, "grad_norm": 0.06296937167644501, "learning_rate": 0.01, "loss": 2.0106, "step": 36420 }, { "epoch": 3.7426017262638718, "grad_norm": 0.08138510584831238, "learning_rate": 0.01, "loss": 2.0052, "step": 36423 }, { "epoch": 3.7429099876695435, "grad_norm": 0.05814410001039505, "learning_rate": 0.01, "loss": 2.0081, "step": 36426 }, { "epoch": 3.7432182490752157, "grad_norm": 0.09043169766664505, "learning_rate": 0.01, "loss": 1.9894, "step": 36429 }, { "epoch": 3.743526510480888, "grad_norm": 0.038527343422174454, "learning_rate": 0.01, "loss": 2.0191, "step": 36432 }, { "epoch": 3.7438347718865597, "grad_norm": 0.03342151269316673, "learning_rate": 0.01, "loss": 1.9951, "step": 36435 }, { "epoch": 3.744143033292232, "grad_norm": 0.054572802037000656, "learning_rate": 0.01, "loss": 2.0117, "step": 36438 }, { "epoch": 3.744451294697904, "grad_norm": 0.10896478593349457, "learning_rate": 0.01, "loss": 2.0004, "step": 36441 }, { "epoch": 3.744759556103576, "grad_norm": 0.04996712505817413, "learning_rate": 0.01, "loss": 1.9957, "step": 36444 }, { "epoch": 3.745067817509248, "grad_norm": 0.06720001995563507, "learning_rate": 0.01, "loss": 1.9883, "step": 36447 }, { "epoch": 3.7453760789149197, "grad_norm": 0.07731924951076508, "learning_rate": 0.01, "loss": 2.0132, "step": 36450 }, { "epoch": 3.745684340320592, "grad_norm": 0.07545910775661469, "learning_rate": 0.01, "loss": 2.0, "step": 36453 }, { "epoch": 3.7459926017262637, "grad_norm": 0.07236587256193161, "learning_rate": 0.01, "loss": 1.9954, "step": 36456 }, { "epoch": 3.746300863131936, "grad_norm": 0.05105200409889221, "learning_rate": 0.01, "loss": 1.986, "step": 36459 }, { "epoch": 3.746609124537608, "grad_norm": 0.06345131993293762, "learning_rate": 0.01, "loss": 1.9977, "step": 36462 }, { "epoch": 3.74691738594328, "grad_norm": 0.08864062279462814, "learning_rate": 0.01, "loss": 1.989, "step": 36465 }, { "epoch": 3.747225647348952, "grad_norm": 0.06058958172798157, "learning_rate": 0.01, "loss": 2.0155, "step": 36468 }, { "epoch": 3.7475339087546238, "grad_norm": 0.08054932206869125, "learning_rate": 0.01, "loss": 1.9682, "step": 36471 }, { "epoch": 3.747842170160296, "grad_norm": 0.04538315162062645, "learning_rate": 0.01, "loss": 1.9707, "step": 36474 }, { "epoch": 3.7481504315659677, "grad_norm": 0.11694948375225067, "learning_rate": 0.01, "loss": 1.9962, "step": 36477 }, { "epoch": 3.74845869297164, "grad_norm": 0.037599921226501465, "learning_rate": 0.01, "loss": 1.9998, "step": 36480 }, { "epoch": 3.748766954377312, "grad_norm": 0.062208350747823715, "learning_rate": 0.01, "loss": 2.0154, "step": 36483 }, { "epoch": 3.749075215782984, "grad_norm": 0.04657081514596939, "learning_rate": 0.01, "loss": 1.9954, "step": 36486 }, { "epoch": 3.749383477188656, "grad_norm": 0.11601486057043076, "learning_rate": 0.01, "loss": 2.0216, "step": 36489 }, { "epoch": 3.7496917385943282, "grad_norm": 0.04120590165257454, "learning_rate": 0.01, "loss": 2.0076, "step": 36492 }, { "epoch": 3.75, "grad_norm": 0.07783524692058563, "learning_rate": 0.01, "loss": 1.9892, "step": 36495 }, { "epoch": 3.7503082614056718, "grad_norm": 0.04452613368630409, "learning_rate": 0.01, "loss": 2.0191, "step": 36498 }, { "epoch": 3.750616522811344, "grad_norm": 0.10393857210874557, "learning_rate": 0.01, "loss": 1.9765, "step": 36501 }, { "epoch": 3.750924784217016, "grad_norm": 0.11419197171926498, "learning_rate": 0.01, "loss": 1.9791, "step": 36504 }, { "epoch": 3.751233045622688, "grad_norm": 0.0752970427274704, "learning_rate": 0.01, "loss": 2.0128, "step": 36507 }, { "epoch": 3.75154130702836, "grad_norm": 0.06409469246864319, "learning_rate": 0.01, "loss": 2.0103, "step": 36510 }, { "epoch": 3.7518495684340323, "grad_norm": 0.053437262773513794, "learning_rate": 0.01, "loss": 1.9912, "step": 36513 }, { "epoch": 3.752157829839704, "grad_norm": 0.04102127254009247, "learning_rate": 0.01, "loss": 1.999, "step": 36516 }, { "epoch": 3.7524660912453762, "grad_norm": 0.03595980256795883, "learning_rate": 0.01, "loss": 1.9903, "step": 36519 }, { "epoch": 3.752774352651048, "grad_norm": 0.10266067087650299, "learning_rate": 0.01, "loss": 1.9884, "step": 36522 }, { "epoch": 3.75308261405672, "grad_norm": 0.07302891463041306, "learning_rate": 0.01, "loss": 2.012, "step": 36525 }, { "epoch": 3.753390875462392, "grad_norm": 0.08043140172958374, "learning_rate": 0.01, "loss": 2.0129, "step": 36528 }, { "epoch": 3.753699136868064, "grad_norm": 0.0765608549118042, "learning_rate": 0.01, "loss": 2.0029, "step": 36531 }, { "epoch": 3.7540073982737363, "grad_norm": 0.040770966559648514, "learning_rate": 0.01, "loss": 1.9894, "step": 36534 }, { "epoch": 3.754315659679408, "grad_norm": 0.09125746041536331, "learning_rate": 0.01, "loss": 2.0098, "step": 36537 }, { "epoch": 3.7546239210850803, "grad_norm": 0.10734240710735321, "learning_rate": 0.01, "loss": 1.9839, "step": 36540 }, { "epoch": 3.754932182490752, "grad_norm": 0.05876481533050537, "learning_rate": 0.01, "loss": 1.9759, "step": 36543 }, { "epoch": 3.755240443896424, "grad_norm": 0.04223402962088585, "learning_rate": 0.01, "loss": 1.9709, "step": 36546 }, { "epoch": 3.755548705302096, "grad_norm": 0.03419182077050209, "learning_rate": 0.01, "loss": 1.9934, "step": 36549 }, { "epoch": 3.755856966707768, "grad_norm": 0.08762037754058838, "learning_rate": 0.01, "loss": 2.0068, "step": 36552 }, { "epoch": 3.7561652281134403, "grad_norm": 0.035800088196992874, "learning_rate": 0.01, "loss": 1.9892, "step": 36555 }, { "epoch": 3.756473489519112, "grad_norm": 0.07538183778524399, "learning_rate": 0.01, "loss": 2.0066, "step": 36558 }, { "epoch": 3.7567817509247843, "grad_norm": 0.05067085847258568, "learning_rate": 0.01, "loss": 2.0124, "step": 36561 }, { "epoch": 3.7570900123304565, "grad_norm": 0.07041851431131363, "learning_rate": 0.01, "loss": 1.9941, "step": 36564 }, { "epoch": 3.7573982737361282, "grad_norm": 0.06819632649421692, "learning_rate": 0.01, "loss": 1.9986, "step": 36567 }, { "epoch": 3.7577065351418, "grad_norm": 0.11158425360918045, "learning_rate": 0.01, "loss": 1.9883, "step": 36570 }, { "epoch": 3.758014796547472, "grad_norm": 0.09598013758659363, "learning_rate": 0.01, "loss": 2.0164, "step": 36573 }, { "epoch": 3.7583230579531444, "grad_norm": 0.0501101016998291, "learning_rate": 0.01, "loss": 2.0264, "step": 36576 }, { "epoch": 3.758631319358816, "grad_norm": 0.03867189958691597, "learning_rate": 0.01, "loss": 2.0162, "step": 36579 }, { "epoch": 3.7589395807644883, "grad_norm": 0.03836961090564728, "learning_rate": 0.01, "loss": 2.0095, "step": 36582 }, { "epoch": 3.7592478421701605, "grad_norm": 0.04135824367403984, "learning_rate": 0.01, "loss": 2.0206, "step": 36585 }, { "epoch": 3.7595561035758323, "grad_norm": 0.0922931507229805, "learning_rate": 0.01, "loss": 1.9809, "step": 36588 }, { "epoch": 3.7598643649815044, "grad_norm": 0.048077963292598724, "learning_rate": 0.01, "loss": 1.9716, "step": 36591 }, { "epoch": 3.760172626387176, "grad_norm": 0.04263912886381149, "learning_rate": 0.01, "loss": 2.0082, "step": 36594 }, { "epoch": 3.7604808877928484, "grad_norm": 0.05825957655906677, "learning_rate": 0.01, "loss": 2.0036, "step": 36597 }, { "epoch": 3.76078914919852, "grad_norm": 0.04579320177435875, "learning_rate": 0.01, "loss": 1.9677, "step": 36600 }, { "epoch": 3.7610974106041923, "grad_norm": 0.07970302551984787, "learning_rate": 0.01, "loss": 2.0029, "step": 36603 }, { "epoch": 3.7614056720098645, "grad_norm": 0.1091640293598175, "learning_rate": 0.01, "loss": 1.9974, "step": 36606 }, { "epoch": 3.7617139334155363, "grad_norm": 0.07577066123485565, "learning_rate": 0.01, "loss": 2.0077, "step": 36609 }, { "epoch": 3.7620221948212085, "grad_norm": 0.07901261001825333, "learning_rate": 0.01, "loss": 2.0055, "step": 36612 }, { "epoch": 3.7623304562268807, "grad_norm": 0.040557824075222015, "learning_rate": 0.01, "loss": 2.0129, "step": 36615 }, { "epoch": 3.7626387176325524, "grad_norm": 0.03163420036435127, "learning_rate": 0.01, "loss": 1.9782, "step": 36618 }, { "epoch": 3.762946979038224, "grad_norm": 0.0960499569773674, "learning_rate": 0.01, "loss": 1.9882, "step": 36621 }, { "epoch": 3.7632552404438964, "grad_norm": 0.05283837392926216, "learning_rate": 0.01, "loss": 2.0351, "step": 36624 }, { "epoch": 3.7635635018495686, "grad_norm": 0.03764748573303223, "learning_rate": 0.01, "loss": 1.9943, "step": 36627 }, { "epoch": 3.7638717632552403, "grad_norm": 0.07017308473587036, "learning_rate": 0.01, "loss": 2.0008, "step": 36630 }, { "epoch": 3.7641800246609125, "grad_norm": 0.06506321579217911, "learning_rate": 0.01, "loss": 1.9925, "step": 36633 }, { "epoch": 3.7644882860665847, "grad_norm": 0.040651753544807434, "learning_rate": 0.01, "loss": 1.9799, "step": 36636 }, { "epoch": 3.7647965474722564, "grad_norm": 0.0897100567817688, "learning_rate": 0.01, "loss": 2.014, "step": 36639 }, { "epoch": 3.7651048088779286, "grad_norm": 0.1067187637090683, "learning_rate": 0.01, "loss": 2.0369, "step": 36642 }, { "epoch": 3.7654130702836004, "grad_norm": 0.08220155537128448, "learning_rate": 0.01, "loss": 1.9708, "step": 36645 }, { "epoch": 3.7657213316892726, "grad_norm": 0.0830981582403183, "learning_rate": 0.01, "loss": 1.9937, "step": 36648 }, { "epoch": 3.7660295930949443, "grad_norm": 0.04124632850289345, "learning_rate": 0.01, "loss": 1.9937, "step": 36651 }, { "epoch": 3.7663378545006165, "grad_norm": 0.03570066764950752, "learning_rate": 0.01, "loss": 2.0102, "step": 36654 }, { "epoch": 3.7666461159062887, "grad_norm": 0.04715826362371445, "learning_rate": 0.01, "loss": 1.9917, "step": 36657 }, { "epoch": 3.7669543773119605, "grad_norm": 0.05129298195242882, "learning_rate": 0.01, "loss": 2.0058, "step": 36660 }, { "epoch": 3.7672626387176327, "grad_norm": 0.07690376043319702, "learning_rate": 0.01, "loss": 1.9985, "step": 36663 }, { "epoch": 3.7675709001233044, "grad_norm": 0.06316812336444855, "learning_rate": 0.01, "loss": 1.9911, "step": 36666 }, { "epoch": 3.7678791615289766, "grad_norm": 0.045724622905254364, "learning_rate": 0.01, "loss": 2.007, "step": 36669 }, { "epoch": 3.7681874229346484, "grad_norm": 0.12661319971084595, "learning_rate": 0.01, "loss": 2.0159, "step": 36672 }, { "epoch": 3.7684956843403206, "grad_norm": 0.03898628428578377, "learning_rate": 0.01, "loss": 2.0127, "step": 36675 }, { "epoch": 3.7688039457459928, "grad_norm": 0.04297169670462608, "learning_rate": 0.01, "loss": 2.0072, "step": 36678 }, { "epoch": 3.7691122071516645, "grad_norm": 0.047098271548748016, "learning_rate": 0.01, "loss": 2.0019, "step": 36681 }, { "epoch": 3.7694204685573367, "grad_norm": 0.0880102887749672, "learning_rate": 0.01, "loss": 2.0009, "step": 36684 }, { "epoch": 3.769728729963009, "grad_norm": 0.09598886221647263, "learning_rate": 0.01, "loss": 1.987, "step": 36687 }, { "epoch": 3.7700369913686806, "grad_norm": 0.07920796424150467, "learning_rate": 0.01, "loss": 1.9992, "step": 36690 }, { "epoch": 3.7703452527743524, "grad_norm": 0.09643390029668808, "learning_rate": 0.01, "loss": 2.0219, "step": 36693 }, { "epoch": 3.7706535141800246, "grad_norm": 0.07026758790016174, "learning_rate": 0.01, "loss": 2.0217, "step": 36696 }, { "epoch": 3.770961775585697, "grad_norm": 0.04533863440155983, "learning_rate": 0.01, "loss": 2.0129, "step": 36699 }, { "epoch": 3.7712700369913685, "grad_norm": 0.03511887788772583, "learning_rate": 0.01, "loss": 2.0054, "step": 36702 }, { "epoch": 3.7715782983970407, "grad_norm": 0.03846128657460213, "learning_rate": 0.01, "loss": 2.0006, "step": 36705 }, { "epoch": 3.771886559802713, "grad_norm": 0.07523466646671295, "learning_rate": 0.01, "loss": 2.0232, "step": 36708 }, { "epoch": 3.7721948212083847, "grad_norm": 0.09649614989757538, "learning_rate": 0.01, "loss": 1.9851, "step": 36711 }, { "epoch": 3.772503082614057, "grad_norm": 0.04845314472913742, "learning_rate": 0.01, "loss": 1.9994, "step": 36714 }, { "epoch": 3.7728113440197286, "grad_norm": 0.037477899342775345, "learning_rate": 0.01, "loss": 2.0113, "step": 36717 }, { "epoch": 3.773119605425401, "grad_norm": 0.08308611810207367, "learning_rate": 0.01, "loss": 2.017, "step": 36720 }, { "epoch": 3.7734278668310726, "grad_norm": 0.048357848078012466, "learning_rate": 0.01, "loss": 1.9848, "step": 36723 }, { "epoch": 3.7737361282367448, "grad_norm": 0.04424552246928215, "learning_rate": 0.01, "loss": 2.0176, "step": 36726 }, { "epoch": 3.774044389642417, "grad_norm": 0.04857276752591133, "learning_rate": 0.01, "loss": 2.0304, "step": 36729 }, { "epoch": 3.7743526510480887, "grad_norm": 0.039389774203300476, "learning_rate": 0.01, "loss": 2.0044, "step": 36732 }, { "epoch": 3.774660912453761, "grad_norm": 0.04370071366429329, "learning_rate": 0.01, "loss": 2.0095, "step": 36735 }, { "epoch": 3.7749691738594326, "grad_norm": 0.05456184223294258, "learning_rate": 0.01, "loss": 2.0155, "step": 36738 }, { "epoch": 3.775277435265105, "grad_norm": 0.0888199731707573, "learning_rate": 0.01, "loss": 2.008, "step": 36741 }, { "epoch": 3.7755856966707766, "grad_norm": 0.19867762923240662, "learning_rate": 0.01, "loss": 1.9926, "step": 36744 }, { "epoch": 3.775893958076449, "grad_norm": 0.17659060657024384, "learning_rate": 0.01, "loss": 2.0021, "step": 36747 }, { "epoch": 3.776202219482121, "grad_norm": 0.0629454255104065, "learning_rate": 0.01, "loss": 1.9917, "step": 36750 }, { "epoch": 3.7765104808877927, "grad_norm": 0.05801904946565628, "learning_rate": 0.01, "loss": 1.9972, "step": 36753 }, { "epoch": 3.776818742293465, "grad_norm": 0.04907959699630737, "learning_rate": 0.01, "loss": 1.9899, "step": 36756 }, { "epoch": 3.777127003699137, "grad_norm": 0.08606990426778793, "learning_rate": 0.01, "loss": 1.9829, "step": 36759 }, { "epoch": 3.777435265104809, "grad_norm": 0.03293627128005028, "learning_rate": 0.01, "loss": 2.0177, "step": 36762 }, { "epoch": 3.7777435265104806, "grad_norm": 0.033272456377744675, "learning_rate": 0.01, "loss": 1.982, "step": 36765 }, { "epoch": 3.778051787916153, "grad_norm": 0.10520119965076447, "learning_rate": 0.01, "loss": 1.9592, "step": 36768 }, { "epoch": 3.778360049321825, "grad_norm": 0.03949661925435066, "learning_rate": 0.01, "loss": 2.0025, "step": 36771 }, { "epoch": 3.7786683107274968, "grad_norm": 0.10781146585941315, "learning_rate": 0.01, "loss": 2.0142, "step": 36774 }, { "epoch": 3.778976572133169, "grad_norm": 0.1707312911748886, "learning_rate": 0.01, "loss": 2.0072, "step": 36777 }, { "epoch": 3.779284833538841, "grad_norm": 0.10689792037010193, "learning_rate": 0.01, "loss": 1.9979, "step": 36780 }, { "epoch": 3.779593094944513, "grad_norm": 0.06377099454402924, "learning_rate": 0.01, "loss": 2.0019, "step": 36783 }, { "epoch": 3.779901356350185, "grad_norm": 0.050422243773937225, "learning_rate": 0.01, "loss": 1.9994, "step": 36786 }, { "epoch": 3.780209617755857, "grad_norm": 0.041165612637996674, "learning_rate": 0.01, "loss": 2.0116, "step": 36789 }, { "epoch": 3.780517879161529, "grad_norm": 0.07220548391342163, "learning_rate": 0.01, "loss": 2.0089, "step": 36792 }, { "epoch": 3.780826140567201, "grad_norm": 0.05860710144042969, "learning_rate": 0.01, "loss": 2.0221, "step": 36795 }, { "epoch": 3.781134401972873, "grad_norm": 0.03842824697494507, "learning_rate": 0.01, "loss": 1.9945, "step": 36798 }, { "epoch": 3.781442663378545, "grad_norm": 0.04346943646669388, "learning_rate": 0.01, "loss": 2.0203, "step": 36801 }, { "epoch": 3.781750924784217, "grad_norm": 0.16297590732574463, "learning_rate": 0.01, "loss": 2.0002, "step": 36804 }, { "epoch": 3.782059186189889, "grad_norm": 0.06720095127820969, "learning_rate": 0.01, "loss": 2.0, "step": 36807 }, { "epoch": 3.782367447595561, "grad_norm": 0.04820172116160393, "learning_rate": 0.01, "loss": 1.9773, "step": 36810 }, { "epoch": 3.782675709001233, "grad_norm": 0.0431947223842144, "learning_rate": 0.01, "loss": 2.0213, "step": 36813 }, { "epoch": 3.782983970406905, "grad_norm": 0.0517662838101387, "learning_rate": 0.01, "loss": 2.016, "step": 36816 }, { "epoch": 3.783292231812577, "grad_norm": 0.06249597296118736, "learning_rate": 0.01, "loss": 1.9905, "step": 36819 }, { "epoch": 3.783600493218249, "grad_norm": 0.04001644626259804, "learning_rate": 0.01, "loss": 2.0082, "step": 36822 }, { "epoch": 3.783908754623921, "grad_norm": 0.0713401585817337, "learning_rate": 0.01, "loss": 1.9978, "step": 36825 }, { "epoch": 3.784217016029593, "grad_norm": 0.08583984524011612, "learning_rate": 0.01, "loss": 2.0047, "step": 36828 }, { "epoch": 3.7845252774352653, "grad_norm": 0.05482606962323189, "learning_rate": 0.01, "loss": 2.0038, "step": 36831 }, { "epoch": 3.784833538840937, "grad_norm": 0.04373779147863388, "learning_rate": 0.01, "loss": 2.0003, "step": 36834 }, { "epoch": 3.7851418002466093, "grad_norm": 0.09752769768238068, "learning_rate": 0.01, "loss": 1.9854, "step": 36837 }, { "epoch": 3.785450061652281, "grad_norm": 0.07635717839002609, "learning_rate": 0.01, "loss": 2.0065, "step": 36840 }, { "epoch": 3.7857583230579532, "grad_norm": 0.07021810859441757, "learning_rate": 0.01, "loss": 1.9888, "step": 36843 }, { "epoch": 3.786066584463625, "grad_norm": 0.0541268065571785, "learning_rate": 0.01, "loss": 2.0252, "step": 36846 }, { "epoch": 3.786374845869297, "grad_norm": 0.07205335795879364, "learning_rate": 0.01, "loss": 1.9916, "step": 36849 }, { "epoch": 3.7866831072749694, "grad_norm": 0.057571567595005035, "learning_rate": 0.01, "loss": 1.9944, "step": 36852 }, { "epoch": 3.786991368680641, "grad_norm": 0.11135399341583252, "learning_rate": 0.01, "loss": 2.0053, "step": 36855 }, { "epoch": 3.7872996300863133, "grad_norm": 0.0698394924402237, "learning_rate": 0.01, "loss": 1.9832, "step": 36858 }, { "epoch": 3.787607891491985, "grad_norm": 0.09506375342607498, "learning_rate": 0.01, "loss": 1.9965, "step": 36861 }, { "epoch": 3.7879161528976573, "grad_norm": 0.08534973114728928, "learning_rate": 0.01, "loss": 1.9893, "step": 36864 }, { "epoch": 3.788224414303329, "grad_norm": 0.08360368013381958, "learning_rate": 0.01, "loss": 1.9962, "step": 36867 }, { "epoch": 3.788532675709001, "grad_norm": 0.08635003864765167, "learning_rate": 0.01, "loss": 2.0042, "step": 36870 }, { "epoch": 3.7888409371146734, "grad_norm": 0.033921390771865845, "learning_rate": 0.01, "loss": 2.0134, "step": 36873 }, { "epoch": 3.789149198520345, "grad_norm": 0.04312542825937271, "learning_rate": 0.01, "loss": 2.0188, "step": 36876 }, { "epoch": 3.7894574599260173, "grad_norm": 0.06157702952623367, "learning_rate": 0.01, "loss": 1.9622, "step": 36879 }, { "epoch": 3.7897657213316895, "grad_norm": 0.05039060115814209, "learning_rate": 0.01, "loss": 2.0035, "step": 36882 }, { "epoch": 3.7900739827373613, "grad_norm": 0.05873194709420204, "learning_rate": 0.01, "loss": 2.0335, "step": 36885 }, { "epoch": 3.790382244143033, "grad_norm": 0.07444643974304199, "learning_rate": 0.01, "loss": 1.9708, "step": 36888 }, { "epoch": 3.7906905055487052, "grad_norm": 0.06560081243515015, "learning_rate": 0.01, "loss": 2.0251, "step": 36891 }, { "epoch": 3.7909987669543774, "grad_norm": 0.08456238359212875, "learning_rate": 0.01, "loss": 1.9889, "step": 36894 }, { "epoch": 3.791307028360049, "grad_norm": 0.09048344194889069, "learning_rate": 0.01, "loss": 1.9992, "step": 36897 }, { "epoch": 3.7916152897657214, "grad_norm": 0.07640919834375381, "learning_rate": 0.01, "loss": 2.0104, "step": 36900 }, { "epoch": 3.7919235511713936, "grad_norm": 0.0968911275267601, "learning_rate": 0.01, "loss": 1.9664, "step": 36903 }, { "epoch": 3.7922318125770653, "grad_norm": 0.08743193000555038, "learning_rate": 0.01, "loss": 1.9942, "step": 36906 }, { "epoch": 3.7925400739827375, "grad_norm": 0.056414127349853516, "learning_rate": 0.01, "loss": 1.9847, "step": 36909 }, { "epoch": 3.7928483353884093, "grad_norm": 0.04174700006842613, "learning_rate": 0.01, "loss": 2.0113, "step": 36912 }, { "epoch": 3.7931565967940815, "grad_norm": 0.03753121197223663, "learning_rate": 0.01, "loss": 1.978, "step": 36915 }, { "epoch": 3.793464858199753, "grad_norm": 0.04957683011889458, "learning_rate": 0.01, "loss": 2.0017, "step": 36918 }, { "epoch": 3.7937731196054254, "grad_norm": 0.051355455070734024, "learning_rate": 0.01, "loss": 2.0076, "step": 36921 }, { "epoch": 3.7940813810110976, "grad_norm": 0.0345148891210556, "learning_rate": 0.01, "loss": 1.9869, "step": 36924 }, { "epoch": 3.7943896424167693, "grad_norm": 0.12389633059501648, "learning_rate": 0.01, "loss": 2.0207, "step": 36927 }, { "epoch": 3.7946979038224415, "grad_norm": 0.05504097789525986, "learning_rate": 0.01, "loss": 2.0119, "step": 36930 }, { "epoch": 3.7950061652281133, "grad_norm": 0.044859953224658966, "learning_rate": 0.01, "loss": 2.0009, "step": 36933 }, { "epoch": 3.7953144266337855, "grad_norm": 0.06867794692516327, "learning_rate": 0.01, "loss": 1.9942, "step": 36936 }, { "epoch": 3.7956226880394572, "grad_norm": 0.10308001190423965, "learning_rate": 0.01, "loss": 1.9884, "step": 36939 }, { "epoch": 3.7959309494451294, "grad_norm": 0.07946982234716415, "learning_rate": 0.01, "loss": 1.9817, "step": 36942 }, { "epoch": 3.7962392108508016, "grad_norm": 0.058571770787239075, "learning_rate": 0.01, "loss": 1.9873, "step": 36945 }, { "epoch": 3.7965474722564734, "grad_norm": 0.07778012007474899, "learning_rate": 0.01, "loss": 1.9951, "step": 36948 }, { "epoch": 3.7968557336621456, "grad_norm": 0.06965510547161102, "learning_rate": 0.01, "loss": 1.9944, "step": 36951 }, { "epoch": 3.7971639950678178, "grad_norm": 0.06443614512681961, "learning_rate": 0.01, "loss": 2.012, "step": 36954 }, { "epoch": 3.7974722564734895, "grad_norm": 0.04750034958124161, "learning_rate": 0.01, "loss": 1.9962, "step": 36957 }, { "epoch": 3.7977805178791613, "grad_norm": 0.039454251527786255, "learning_rate": 0.01, "loss": 2.0343, "step": 36960 }, { "epoch": 3.7980887792848335, "grad_norm": 0.07771621644496918, "learning_rate": 0.01, "loss": 2.0194, "step": 36963 }, { "epoch": 3.7983970406905057, "grad_norm": 0.06523919850587845, "learning_rate": 0.01, "loss": 1.9954, "step": 36966 }, { "epoch": 3.7987053020961774, "grad_norm": 0.055610157549381256, "learning_rate": 0.01, "loss": 2.0062, "step": 36969 }, { "epoch": 3.7990135635018496, "grad_norm": 0.08819667249917984, "learning_rate": 0.01, "loss": 1.9859, "step": 36972 }, { "epoch": 3.799321824907522, "grad_norm": 0.05418279394507408, "learning_rate": 0.01, "loss": 1.9951, "step": 36975 }, { "epoch": 3.7996300863131935, "grad_norm": 0.06648479402065277, "learning_rate": 0.01, "loss": 2.0151, "step": 36978 }, { "epoch": 3.7999383477188657, "grad_norm": 0.04629239812493324, "learning_rate": 0.01, "loss": 1.9928, "step": 36981 }, { "epoch": 3.8002466091245375, "grad_norm": 0.042264122515916824, "learning_rate": 0.01, "loss": 1.9832, "step": 36984 }, { "epoch": 3.8005548705302097, "grad_norm": 0.10186997801065445, "learning_rate": 0.01, "loss": 1.9888, "step": 36987 }, { "epoch": 3.8008631319358814, "grad_norm": 0.09278517216444016, "learning_rate": 0.01, "loss": 2.0063, "step": 36990 }, { "epoch": 3.8011713933415536, "grad_norm": 0.07084151357412338, "learning_rate": 0.01, "loss": 1.9972, "step": 36993 }, { "epoch": 3.801479654747226, "grad_norm": 0.07990908622741699, "learning_rate": 0.01, "loss": 2.0019, "step": 36996 }, { "epoch": 3.8017879161528976, "grad_norm": 0.039643771946430206, "learning_rate": 0.01, "loss": 1.9926, "step": 36999 }, { "epoch": 3.8020961775585698, "grad_norm": 0.03678774833679199, "learning_rate": 0.01, "loss": 1.9884, "step": 37002 }, { "epoch": 3.8024044389642415, "grad_norm": 0.040611330419778824, "learning_rate": 0.01, "loss": 2.0054, "step": 37005 }, { "epoch": 3.8027127003699137, "grad_norm": 0.03812083601951599, "learning_rate": 0.01, "loss": 1.9778, "step": 37008 }, { "epoch": 3.8030209617755855, "grad_norm": 0.07582142949104309, "learning_rate": 0.01, "loss": 1.9989, "step": 37011 }, { "epoch": 3.8033292231812577, "grad_norm": 0.14520719647407532, "learning_rate": 0.01, "loss": 1.9948, "step": 37014 }, { "epoch": 3.80363748458693, "grad_norm": 0.04497974365949631, "learning_rate": 0.01, "loss": 2.0239, "step": 37017 }, { "epoch": 3.8039457459926016, "grad_norm": 0.04178651422262192, "learning_rate": 0.01, "loss": 2.0156, "step": 37020 }, { "epoch": 3.804254007398274, "grad_norm": 0.047346848994493484, "learning_rate": 0.01, "loss": 1.9734, "step": 37023 }, { "epoch": 3.804562268803946, "grad_norm": 0.04626936465501785, "learning_rate": 0.01, "loss": 2.0209, "step": 37026 }, { "epoch": 3.8048705302096177, "grad_norm": 0.055330242961645126, "learning_rate": 0.01, "loss": 2.0, "step": 37029 }, { "epoch": 3.8051787916152895, "grad_norm": 0.040033504366874695, "learning_rate": 0.01, "loss": 2.0082, "step": 37032 }, { "epoch": 3.8054870530209617, "grad_norm": 0.03897986188530922, "learning_rate": 0.01, "loss": 1.9863, "step": 37035 }, { "epoch": 3.805795314426634, "grad_norm": 0.07525118440389633, "learning_rate": 0.01, "loss": 2.0316, "step": 37038 }, { "epoch": 3.8061035758323056, "grad_norm": 0.06256567686796188, "learning_rate": 0.01, "loss": 2.0018, "step": 37041 }, { "epoch": 3.806411837237978, "grad_norm": 0.05028248950839043, "learning_rate": 0.01, "loss": 2.0111, "step": 37044 }, { "epoch": 3.80672009864365, "grad_norm": 0.03530338406562805, "learning_rate": 0.01, "loss": 1.9918, "step": 37047 }, { "epoch": 3.8070283600493218, "grad_norm": 0.044229693710803986, "learning_rate": 0.01, "loss": 1.9906, "step": 37050 }, { "epoch": 3.807336621454994, "grad_norm": 0.1117880642414093, "learning_rate": 0.01, "loss": 2.0065, "step": 37053 }, { "epoch": 3.8076448828606657, "grad_norm": 0.04328616335988045, "learning_rate": 0.01, "loss": 1.9818, "step": 37056 }, { "epoch": 3.807953144266338, "grad_norm": 0.05948890000581741, "learning_rate": 0.01, "loss": 2.0013, "step": 37059 }, { "epoch": 3.8082614056720097, "grad_norm": 0.043167680501937866, "learning_rate": 0.01, "loss": 1.9996, "step": 37062 }, { "epoch": 3.808569667077682, "grad_norm": 0.04852902144193649, "learning_rate": 0.01, "loss": 2.0178, "step": 37065 }, { "epoch": 3.808877928483354, "grad_norm": 0.06212899461388588, "learning_rate": 0.01, "loss": 1.9709, "step": 37068 }, { "epoch": 3.809186189889026, "grad_norm": 0.07403160631656647, "learning_rate": 0.01, "loss": 1.9913, "step": 37071 }, { "epoch": 3.809494451294698, "grad_norm": 0.07734464108943939, "learning_rate": 0.01, "loss": 2.008, "step": 37074 }, { "epoch": 3.80980271270037, "grad_norm": 0.05057765170931816, "learning_rate": 0.01, "loss": 1.9949, "step": 37077 }, { "epoch": 3.810110974106042, "grad_norm": 0.11746016144752502, "learning_rate": 0.01, "loss": 1.9787, "step": 37080 }, { "epoch": 3.8104192355117137, "grad_norm": 0.10531330853700638, "learning_rate": 0.01, "loss": 2.0042, "step": 37083 }, { "epoch": 3.810727496917386, "grad_norm": 0.11436725407838821, "learning_rate": 0.01, "loss": 2.0051, "step": 37086 }, { "epoch": 3.811035758323058, "grad_norm": 0.04365989565849304, "learning_rate": 0.01, "loss": 1.9909, "step": 37089 }, { "epoch": 3.81134401972873, "grad_norm": 0.04271963611245155, "learning_rate": 0.01, "loss": 1.998, "step": 37092 }, { "epoch": 3.811652281134402, "grad_norm": 0.03649172931909561, "learning_rate": 0.01, "loss": 1.9963, "step": 37095 }, { "epoch": 3.811960542540074, "grad_norm": 0.054175905883312225, "learning_rate": 0.01, "loss": 2.0175, "step": 37098 }, { "epoch": 3.812268803945746, "grad_norm": 0.041479047387838364, "learning_rate": 0.01, "loss": 1.9962, "step": 37101 }, { "epoch": 3.812577065351418, "grad_norm": 0.03695906326174736, "learning_rate": 0.01, "loss": 1.9984, "step": 37104 }, { "epoch": 3.81288532675709, "grad_norm": 0.05167945846915245, "learning_rate": 0.01, "loss": 2.0219, "step": 37107 }, { "epoch": 3.813193588162762, "grad_norm": 0.0706062912940979, "learning_rate": 0.01, "loss": 1.9862, "step": 37110 }, { "epoch": 3.813501849568434, "grad_norm": 0.04282987117767334, "learning_rate": 0.01, "loss": 1.9829, "step": 37113 }, { "epoch": 3.813810110974106, "grad_norm": 0.048825137317180634, "learning_rate": 0.01, "loss": 1.9764, "step": 37116 }, { "epoch": 3.8141183723797782, "grad_norm": 0.13310399651527405, "learning_rate": 0.01, "loss": 1.9889, "step": 37119 }, { "epoch": 3.81442663378545, "grad_norm": 0.05691874772310257, "learning_rate": 0.01, "loss": 2.0029, "step": 37122 }, { "epoch": 3.814734895191122, "grad_norm": 0.07542920112609863, "learning_rate": 0.01, "loss": 2.0014, "step": 37125 }, { "epoch": 3.815043156596794, "grad_norm": 0.09146812558174133, "learning_rate": 0.01, "loss": 1.9908, "step": 37128 }, { "epoch": 3.815351418002466, "grad_norm": 0.04742105305194855, "learning_rate": 0.01, "loss": 1.9907, "step": 37131 }, { "epoch": 3.815659679408138, "grad_norm": 0.03387041017413139, "learning_rate": 0.01, "loss": 1.9763, "step": 37134 }, { "epoch": 3.81596794081381, "grad_norm": 0.036746036261320114, "learning_rate": 0.01, "loss": 1.9919, "step": 37137 }, { "epoch": 3.8162762022194823, "grad_norm": 0.06593815982341766, "learning_rate": 0.01, "loss": 1.9814, "step": 37140 }, { "epoch": 3.816584463625154, "grad_norm": 0.10538439452648163, "learning_rate": 0.01, "loss": 2.0011, "step": 37143 }, { "epoch": 3.816892725030826, "grad_norm": 0.07849781960248947, "learning_rate": 0.01, "loss": 2.0053, "step": 37146 }, { "epoch": 3.8172009864364984, "grad_norm": 0.10899082571268082, "learning_rate": 0.01, "loss": 2.0186, "step": 37149 }, { "epoch": 3.81750924784217, "grad_norm": 0.05171935632824898, "learning_rate": 0.01, "loss": 1.9911, "step": 37152 }, { "epoch": 3.817817509247842, "grad_norm": 0.06028240546584129, "learning_rate": 0.01, "loss": 1.9911, "step": 37155 }, { "epoch": 3.818125770653514, "grad_norm": 0.0440855398774147, "learning_rate": 0.01, "loss": 2.0063, "step": 37158 }, { "epoch": 3.8184340320591863, "grad_norm": 0.05764901638031006, "learning_rate": 0.01, "loss": 1.9834, "step": 37161 }, { "epoch": 3.818742293464858, "grad_norm": 0.042115770280361176, "learning_rate": 0.01, "loss": 1.9816, "step": 37164 }, { "epoch": 3.8190505548705302, "grad_norm": 0.0819876417517662, "learning_rate": 0.01, "loss": 1.9991, "step": 37167 }, { "epoch": 3.8193588162762024, "grad_norm": 0.09432919323444366, "learning_rate": 0.01, "loss": 1.999, "step": 37170 }, { "epoch": 3.819667077681874, "grad_norm": 0.1223209798336029, "learning_rate": 0.01, "loss": 1.994, "step": 37173 }, { "epoch": 3.8199753390875464, "grad_norm": 0.08615586161613464, "learning_rate": 0.01, "loss": 1.9676, "step": 37176 }, { "epoch": 3.820283600493218, "grad_norm": 0.0847785696387291, "learning_rate": 0.01, "loss": 2.0067, "step": 37179 }, { "epoch": 3.8205918618988903, "grad_norm": 0.059272028505802155, "learning_rate": 0.01, "loss": 1.9938, "step": 37182 }, { "epoch": 3.820900123304562, "grad_norm": 0.042237572371959686, "learning_rate": 0.01, "loss": 2.0011, "step": 37185 }, { "epoch": 3.8212083847102343, "grad_norm": 0.04662346839904785, "learning_rate": 0.01, "loss": 1.989, "step": 37188 }, { "epoch": 3.8215166461159065, "grad_norm": 0.04566279426217079, "learning_rate": 0.01, "loss": 2.0093, "step": 37191 }, { "epoch": 3.821824907521578, "grad_norm": 0.07829894125461578, "learning_rate": 0.01, "loss": 1.9811, "step": 37194 }, { "epoch": 3.8221331689272504, "grad_norm": 0.09386474639177322, "learning_rate": 0.01, "loss": 2.016, "step": 37197 }, { "epoch": 3.822441430332922, "grad_norm": 0.04232482239603996, "learning_rate": 0.01, "loss": 1.9899, "step": 37200 }, { "epoch": 3.8227496917385944, "grad_norm": 0.09978868067264557, "learning_rate": 0.01, "loss": 2.0112, "step": 37203 }, { "epoch": 3.823057953144266, "grad_norm": 0.04246772453188896, "learning_rate": 0.01, "loss": 1.9953, "step": 37206 }, { "epoch": 3.8233662145499383, "grad_norm": 0.13856659829616547, "learning_rate": 0.01, "loss": 2.0061, "step": 37209 }, { "epoch": 3.8236744759556105, "grad_norm": 0.05659123882651329, "learning_rate": 0.01, "loss": 1.9963, "step": 37212 }, { "epoch": 3.8239827373612822, "grad_norm": 0.05944579839706421, "learning_rate": 0.01, "loss": 2.0016, "step": 37215 }, { "epoch": 3.8242909987669544, "grad_norm": 0.040483999997377396, "learning_rate": 0.01, "loss": 1.996, "step": 37218 }, { "epoch": 3.8245992601726266, "grad_norm": 0.05651898682117462, "learning_rate": 0.01, "loss": 1.9863, "step": 37221 }, { "epoch": 3.8249075215782984, "grad_norm": 0.04841217026114464, "learning_rate": 0.01, "loss": 1.9924, "step": 37224 }, { "epoch": 3.82521578298397, "grad_norm": 0.05571025237441063, "learning_rate": 0.01, "loss": 1.9857, "step": 37227 }, { "epoch": 3.8255240443896423, "grad_norm": 0.07893476635217667, "learning_rate": 0.01, "loss": 2.0081, "step": 37230 }, { "epoch": 3.8258323057953145, "grad_norm": 0.07275935262441635, "learning_rate": 0.01, "loss": 2.0264, "step": 37233 }, { "epoch": 3.8261405672009863, "grad_norm": 0.14288173615932465, "learning_rate": 0.01, "loss": 1.9963, "step": 37236 }, { "epoch": 3.8264488286066585, "grad_norm": 0.18501073122024536, "learning_rate": 0.01, "loss": 2.0133, "step": 37239 }, { "epoch": 3.8267570900123307, "grad_norm": 0.12003073841333389, "learning_rate": 0.01, "loss": 1.9811, "step": 37242 }, { "epoch": 3.8270653514180024, "grad_norm": 0.05952821299433708, "learning_rate": 0.01, "loss": 2.0194, "step": 37245 }, { "epoch": 3.8273736128236746, "grad_norm": 0.052555590867996216, "learning_rate": 0.01, "loss": 2.0046, "step": 37248 }, { "epoch": 3.8276818742293464, "grad_norm": 0.05527700483798981, "learning_rate": 0.01, "loss": 1.9929, "step": 37251 }, { "epoch": 3.8279901356350186, "grad_norm": 0.043385427445173264, "learning_rate": 0.01, "loss": 1.9682, "step": 37254 }, { "epoch": 3.8282983970406903, "grad_norm": 0.05360834673047066, "learning_rate": 0.01, "loss": 1.9979, "step": 37257 }, { "epoch": 3.8286066584463625, "grad_norm": 0.04542825371026993, "learning_rate": 0.01, "loss": 1.981, "step": 37260 }, { "epoch": 3.8289149198520347, "grad_norm": 0.04607833921909332, "learning_rate": 0.01, "loss": 1.9995, "step": 37263 }, { "epoch": 3.8292231812577064, "grad_norm": 0.059813372790813446, "learning_rate": 0.01, "loss": 2.0043, "step": 37266 }, { "epoch": 3.8295314426633786, "grad_norm": 0.11444689333438873, "learning_rate": 0.01, "loss": 2.0026, "step": 37269 }, { "epoch": 3.829839704069051, "grad_norm": 0.07510776817798615, "learning_rate": 0.01, "loss": 2.0019, "step": 37272 }, { "epoch": 3.8301479654747226, "grad_norm": 0.051781926304101944, "learning_rate": 0.01, "loss": 2.0194, "step": 37275 }, { "epoch": 3.8304562268803943, "grad_norm": 0.051285672932863235, "learning_rate": 0.01, "loss": 1.9982, "step": 37278 }, { "epoch": 3.8307644882860665, "grad_norm": 0.0464506596326828, "learning_rate": 0.01, "loss": 2.0033, "step": 37281 }, { "epoch": 3.8310727496917387, "grad_norm": 0.03997113183140755, "learning_rate": 0.01, "loss": 1.9985, "step": 37284 }, { "epoch": 3.8313810110974105, "grad_norm": 0.03474081680178642, "learning_rate": 0.01, "loss": 1.9885, "step": 37287 }, { "epoch": 3.8316892725030827, "grad_norm": 0.13113395869731903, "learning_rate": 0.01, "loss": 2.0031, "step": 37290 }, { "epoch": 3.831997533908755, "grad_norm": 0.03894534707069397, "learning_rate": 0.01, "loss": 2.0232, "step": 37293 }, { "epoch": 3.8323057953144266, "grad_norm": 0.030656691640615463, "learning_rate": 0.01, "loss": 2.0052, "step": 37296 }, { "epoch": 3.832614056720099, "grad_norm": 0.08632262796163559, "learning_rate": 0.01, "loss": 2.0037, "step": 37299 }, { "epoch": 3.8329223181257706, "grad_norm": 0.09762530028820038, "learning_rate": 0.01, "loss": 1.997, "step": 37302 }, { "epoch": 3.8332305795314427, "grad_norm": 0.07323916256427765, "learning_rate": 0.01, "loss": 2.0051, "step": 37305 }, { "epoch": 3.8335388409371145, "grad_norm": 0.08117776364088058, "learning_rate": 0.01, "loss": 2.0123, "step": 37308 }, { "epoch": 3.8338471023427867, "grad_norm": 0.07396585494279861, "learning_rate": 0.01, "loss": 2.0043, "step": 37311 }, { "epoch": 3.834155363748459, "grad_norm": 0.07192173600196838, "learning_rate": 0.01, "loss": 2.0249, "step": 37314 }, { "epoch": 3.8344636251541306, "grad_norm": 0.06731805950403214, "learning_rate": 0.01, "loss": 2.0005, "step": 37317 }, { "epoch": 3.834771886559803, "grad_norm": 0.06851818412542343, "learning_rate": 0.01, "loss": 1.9924, "step": 37320 }, { "epoch": 3.8350801479654746, "grad_norm": 0.07433861494064331, "learning_rate": 0.01, "loss": 2.0238, "step": 37323 }, { "epoch": 3.835388409371147, "grad_norm": 0.13405515253543854, "learning_rate": 0.01, "loss": 2.0068, "step": 37326 }, { "epoch": 3.8356966707768185, "grad_norm": 0.0912710577249527, "learning_rate": 0.01, "loss": 1.9802, "step": 37329 }, { "epoch": 3.8360049321824907, "grad_norm": 0.05791820213198662, "learning_rate": 0.01, "loss": 1.9655, "step": 37332 }, { "epoch": 3.836313193588163, "grad_norm": 0.06457755714654922, "learning_rate": 0.01, "loss": 1.9818, "step": 37335 }, { "epoch": 3.8366214549938347, "grad_norm": 0.06486820429563522, "learning_rate": 0.01, "loss": 1.9909, "step": 37338 }, { "epoch": 3.836929716399507, "grad_norm": 0.04546729475259781, "learning_rate": 0.01, "loss": 1.9898, "step": 37341 }, { "epoch": 3.837237977805179, "grad_norm": 0.058978717774152756, "learning_rate": 0.01, "loss": 1.9948, "step": 37344 }, { "epoch": 3.837546239210851, "grad_norm": 0.09566085040569305, "learning_rate": 0.01, "loss": 2.0054, "step": 37347 }, { "epoch": 3.8378545006165226, "grad_norm": 0.038889624178409576, "learning_rate": 0.01, "loss": 2.0, "step": 37350 }, { "epoch": 3.8381627620221948, "grad_norm": 0.09589283168315887, "learning_rate": 0.01, "loss": 2.0305, "step": 37353 }, { "epoch": 3.838471023427867, "grad_norm": 0.04719104990363121, "learning_rate": 0.01, "loss": 2.0008, "step": 37356 }, { "epoch": 3.8387792848335387, "grad_norm": 0.09488753974437714, "learning_rate": 0.01, "loss": 1.9929, "step": 37359 }, { "epoch": 3.839087546239211, "grad_norm": 0.09091513603925705, "learning_rate": 0.01, "loss": 2.0032, "step": 37362 }, { "epoch": 3.839395807644883, "grad_norm": 0.06685637682676315, "learning_rate": 0.01, "loss": 1.9713, "step": 37365 }, { "epoch": 3.839704069050555, "grad_norm": 0.0734042376279831, "learning_rate": 0.01, "loss": 2.0043, "step": 37368 }, { "epoch": 3.840012330456227, "grad_norm": 0.04996969550848007, "learning_rate": 0.01, "loss": 1.9931, "step": 37371 }, { "epoch": 3.840320591861899, "grad_norm": 0.07901585847139359, "learning_rate": 0.01, "loss": 2.0007, "step": 37374 }, { "epoch": 3.840628853267571, "grad_norm": 0.06289222836494446, "learning_rate": 0.01, "loss": 1.978, "step": 37377 }, { "epoch": 3.8409371146732427, "grad_norm": 0.07179414480924606, "learning_rate": 0.01, "loss": 1.988, "step": 37380 }, { "epoch": 3.841245376078915, "grad_norm": 0.05835287645459175, "learning_rate": 0.01, "loss": 1.9777, "step": 37383 }, { "epoch": 3.841553637484587, "grad_norm": 0.07789372652769089, "learning_rate": 0.01, "loss": 1.9832, "step": 37386 }, { "epoch": 3.841861898890259, "grad_norm": 0.10866318643093109, "learning_rate": 0.01, "loss": 1.9973, "step": 37389 }, { "epoch": 3.842170160295931, "grad_norm": 0.12253516167402267, "learning_rate": 0.01, "loss": 1.9902, "step": 37392 }, { "epoch": 3.842478421701603, "grad_norm": 0.1773093193769455, "learning_rate": 0.01, "loss": 2.0107, "step": 37395 }, { "epoch": 3.842786683107275, "grad_norm": 0.12998591363430023, "learning_rate": 0.01, "loss": 1.9986, "step": 37398 }, { "epoch": 3.8430949445129468, "grad_norm": 0.07139603793621063, "learning_rate": 0.01, "loss": 2.0066, "step": 37401 }, { "epoch": 3.843403205918619, "grad_norm": 0.05447724089026451, "learning_rate": 0.01, "loss": 1.9924, "step": 37404 }, { "epoch": 3.843711467324291, "grad_norm": 0.053450725972652435, "learning_rate": 0.01, "loss": 2.0279, "step": 37407 }, { "epoch": 3.844019728729963, "grad_norm": 0.042635634541511536, "learning_rate": 0.01, "loss": 2.0146, "step": 37410 }, { "epoch": 3.844327990135635, "grad_norm": 0.06024963781237602, "learning_rate": 0.01, "loss": 1.9926, "step": 37413 }, { "epoch": 3.8446362515413073, "grad_norm": 0.04149017482995987, "learning_rate": 0.01, "loss": 1.9986, "step": 37416 }, { "epoch": 3.844944512946979, "grad_norm": 0.043965183198451996, "learning_rate": 0.01, "loss": 1.9881, "step": 37419 }, { "epoch": 3.845252774352651, "grad_norm": 0.06676291674375534, "learning_rate": 0.01, "loss": 1.9913, "step": 37422 }, { "epoch": 3.845561035758323, "grad_norm": 0.1391642689704895, "learning_rate": 0.01, "loss": 1.9918, "step": 37425 }, { "epoch": 3.845869297163995, "grad_norm": 0.12824542820453644, "learning_rate": 0.01, "loss": 2.0091, "step": 37428 }, { "epoch": 3.846177558569667, "grad_norm": 0.038818515837192535, "learning_rate": 0.01, "loss": 2.0008, "step": 37431 }, { "epoch": 3.846485819975339, "grad_norm": 0.06105605140328407, "learning_rate": 0.01, "loss": 2.005, "step": 37434 }, { "epoch": 3.8467940813810113, "grad_norm": 0.05692486837506294, "learning_rate": 0.01, "loss": 2.0234, "step": 37437 }, { "epoch": 3.847102342786683, "grad_norm": 0.03328600525856018, "learning_rate": 0.01, "loss": 1.9845, "step": 37440 }, { "epoch": 3.8474106041923553, "grad_norm": 0.05563362315297127, "learning_rate": 0.01, "loss": 1.9972, "step": 37443 }, { "epoch": 3.847718865598027, "grad_norm": 0.04523022845387459, "learning_rate": 0.01, "loss": 1.9607, "step": 37446 }, { "epoch": 3.848027127003699, "grad_norm": 0.12618042528629303, "learning_rate": 0.01, "loss": 1.9578, "step": 37449 }, { "epoch": 3.848335388409371, "grad_norm": 0.10030481964349747, "learning_rate": 0.01, "loss": 2.002, "step": 37452 }, { "epoch": 3.848643649815043, "grad_norm": 0.09524470567703247, "learning_rate": 0.01, "loss": 1.9772, "step": 37455 }, { "epoch": 3.8489519112207153, "grad_norm": 0.06369511038064957, "learning_rate": 0.01, "loss": 1.9853, "step": 37458 }, { "epoch": 3.849260172626387, "grad_norm": 0.052571844309568405, "learning_rate": 0.01, "loss": 1.9925, "step": 37461 }, { "epoch": 3.8495684340320593, "grad_norm": 0.040871210396289825, "learning_rate": 0.01, "loss": 2.0192, "step": 37464 }, { "epoch": 3.849876695437731, "grad_norm": 0.0393734946846962, "learning_rate": 0.01, "loss": 2.0312, "step": 37467 }, { "epoch": 3.8501849568434032, "grad_norm": 0.10289833694696426, "learning_rate": 0.01, "loss": 1.9791, "step": 37470 }, { "epoch": 3.850493218249075, "grad_norm": 0.053732771426439285, "learning_rate": 0.01, "loss": 1.9955, "step": 37473 }, { "epoch": 3.850801479654747, "grad_norm": 0.035672031342983246, "learning_rate": 0.01, "loss": 1.9896, "step": 37476 }, { "epoch": 3.8511097410604194, "grad_norm": 0.07263844460248947, "learning_rate": 0.01, "loss": 1.9779, "step": 37479 }, { "epoch": 3.851418002466091, "grad_norm": 0.06151323765516281, "learning_rate": 0.01, "loss": 1.9965, "step": 37482 }, { "epoch": 3.8517262638717633, "grad_norm": 0.08033137768507004, "learning_rate": 0.01, "loss": 2.0161, "step": 37485 }, { "epoch": 3.8520345252774355, "grad_norm": 0.09900759160518646, "learning_rate": 0.01, "loss": 2.0116, "step": 37488 }, { "epoch": 3.8523427866831073, "grad_norm": 0.061891715973615646, "learning_rate": 0.01, "loss": 1.9932, "step": 37491 }, { "epoch": 3.8526510480887795, "grad_norm": 0.039978478103876114, "learning_rate": 0.01, "loss": 1.998, "step": 37494 }, { "epoch": 3.852959309494451, "grad_norm": 0.04366837814450264, "learning_rate": 0.01, "loss": 1.9992, "step": 37497 }, { "epoch": 3.8532675709001234, "grad_norm": 0.03704690560698509, "learning_rate": 0.01, "loss": 2.0011, "step": 37500 }, { "epoch": 3.853575832305795, "grad_norm": 0.03964201733469963, "learning_rate": 0.01, "loss": 1.9773, "step": 37503 }, { "epoch": 3.8538840937114673, "grad_norm": 0.060866422951221466, "learning_rate": 0.01, "loss": 1.9779, "step": 37506 }, { "epoch": 3.8541923551171395, "grad_norm": 0.1373164802789688, "learning_rate": 0.01, "loss": 1.9751, "step": 37509 }, { "epoch": 3.8545006165228113, "grad_norm": 0.12396371364593506, "learning_rate": 0.01, "loss": 2.0007, "step": 37512 }, { "epoch": 3.8548088779284835, "grad_norm": 0.05732661485671997, "learning_rate": 0.01, "loss": 1.9889, "step": 37515 }, { "epoch": 3.8551171393341552, "grad_norm": 0.072254478931427, "learning_rate": 0.01, "loss": 2.0066, "step": 37518 }, { "epoch": 3.8554254007398274, "grad_norm": 0.05103025957942009, "learning_rate": 0.01, "loss": 1.981, "step": 37521 }, { "epoch": 3.855733662145499, "grad_norm": 0.06759181618690491, "learning_rate": 0.01, "loss": 1.9863, "step": 37524 }, { "epoch": 3.8560419235511714, "grad_norm": 0.08190746605396271, "learning_rate": 0.01, "loss": 2.0103, "step": 37527 }, { "epoch": 3.8563501849568436, "grad_norm": 0.052758537232875824, "learning_rate": 0.01, "loss": 2.0052, "step": 37530 }, { "epoch": 3.8566584463625153, "grad_norm": 0.06971180438995361, "learning_rate": 0.01, "loss": 1.9825, "step": 37533 }, { "epoch": 3.8569667077681875, "grad_norm": 0.051832135766744614, "learning_rate": 0.01, "loss": 2.0283, "step": 37536 }, { "epoch": 3.8572749691738597, "grad_norm": 0.11838405579328537, "learning_rate": 0.01, "loss": 1.9869, "step": 37539 }, { "epoch": 3.8575832305795315, "grad_norm": 0.0657329186797142, "learning_rate": 0.01, "loss": 2.0137, "step": 37542 }, { "epoch": 3.857891491985203, "grad_norm": 0.042532552033662796, "learning_rate": 0.01, "loss": 1.9633, "step": 37545 }, { "epoch": 3.8581997533908754, "grad_norm": 0.029605695977807045, "learning_rate": 0.01, "loss": 1.9747, "step": 37548 }, { "epoch": 3.8585080147965476, "grad_norm": 0.045208025723695755, "learning_rate": 0.01, "loss": 2.0034, "step": 37551 }, { "epoch": 3.8588162762022193, "grad_norm": 0.061939138919115067, "learning_rate": 0.01, "loss": 2.0042, "step": 37554 }, { "epoch": 3.8591245376078915, "grad_norm": 0.08816343545913696, "learning_rate": 0.01, "loss": 1.9995, "step": 37557 }, { "epoch": 3.8594327990135637, "grad_norm": 0.06432296335697174, "learning_rate": 0.01, "loss": 1.9992, "step": 37560 }, { "epoch": 3.8597410604192355, "grad_norm": 0.07691509276628494, "learning_rate": 0.01, "loss": 1.984, "step": 37563 }, { "epoch": 3.8600493218249077, "grad_norm": 0.06328088790178299, "learning_rate": 0.01, "loss": 2.0085, "step": 37566 }, { "epoch": 3.8603575832305794, "grad_norm": 0.10897444188594818, "learning_rate": 0.01, "loss": 2.0061, "step": 37569 }, { "epoch": 3.8606658446362516, "grad_norm": 0.07461336255073547, "learning_rate": 0.01, "loss": 1.9987, "step": 37572 }, { "epoch": 3.8609741060419234, "grad_norm": 0.08543375879526138, "learning_rate": 0.01, "loss": 1.9953, "step": 37575 }, { "epoch": 3.8612823674475956, "grad_norm": 0.09546118974685669, "learning_rate": 0.01, "loss": 1.9984, "step": 37578 }, { "epoch": 3.8615906288532678, "grad_norm": 0.0741833969950676, "learning_rate": 0.01, "loss": 2.0174, "step": 37581 }, { "epoch": 3.8618988902589395, "grad_norm": 0.06818868219852448, "learning_rate": 0.01, "loss": 1.9662, "step": 37584 }, { "epoch": 3.8622071516646117, "grad_norm": 0.05515025556087494, "learning_rate": 0.01, "loss": 2.0154, "step": 37587 }, { "epoch": 3.8625154130702835, "grad_norm": 0.035819582641124725, "learning_rate": 0.01, "loss": 2.0044, "step": 37590 }, { "epoch": 3.8628236744759556, "grad_norm": 0.03145124390721321, "learning_rate": 0.01, "loss": 1.9895, "step": 37593 }, { "epoch": 3.8631319358816274, "grad_norm": 0.036893073469400406, "learning_rate": 0.01, "loss": 1.971, "step": 37596 }, { "epoch": 3.8634401972872996, "grad_norm": 0.03417917340993881, "learning_rate": 0.01, "loss": 2.0172, "step": 37599 }, { "epoch": 3.863748458692972, "grad_norm": 0.06035599485039711, "learning_rate": 0.01, "loss": 1.9913, "step": 37602 }, { "epoch": 3.8640567200986435, "grad_norm": 0.12924471497535706, "learning_rate": 0.01, "loss": 1.9937, "step": 37605 }, { "epoch": 3.8643649815043157, "grad_norm": 0.1035354882478714, "learning_rate": 0.01, "loss": 1.9955, "step": 37608 }, { "epoch": 3.864673242909988, "grad_norm": 0.08066008985042572, "learning_rate": 0.01, "loss": 1.9858, "step": 37611 }, { "epoch": 3.8649815043156597, "grad_norm": 0.09418363124132156, "learning_rate": 0.01, "loss": 1.9869, "step": 37614 }, { "epoch": 3.8652897657213314, "grad_norm": 0.04805073142051697, "learning_rate": 0.01, "loss": 2.0007, "step": 37617 }, { "epoch": 3.8655980271270036, "grad_norm": 0.04753410443663597, "learning_rate": 0.01, "loss": 1.9875, "step": 37620 }, { "epoch": 3.865906288532676, "grad_norm": 0.0458386205136776, "learning_rate": 0.01, "loss": 1.9936, "step": 37623 }, { "epoch": 3.8662145499383476, "grad_norm": 0.1318642795085907, "learning_rate": 0.01, "loss": 1.9759, "step": 37626 }, { "epoch": 3.8665228113440198, "grad_norm": 0.10072299838066101, "learning_rate": 0.01, "loss": 2.0074, "step": 37629 }, { "epoch": 3.866831072749692, "grad_norm": 0.06701991707086563, "learning_rate": 0.01, "loss": 2.0128, "step": 37632 }, { "epoch": 3.8671393341553637, "grad_norm": 0.055372897535562515, "learning_rate": 0.01, "loss": 2.0143, "step": 37635 }, { "epoch": 3.867447595561036, "grad_norm": 0.061906322836875916, "learning_rate": 0.01, "loss": 1.9947, "step": 37638 }, { "epoch": 3.8677558569667077, "grad_norm": 0.05329981446266174, "learning_rate": 0.01, "loss": 2.0001, "step": 37641 }, { "epoch": 3.86806411837238, "grad_norm": 0.05068975314497948, "learning_rate": 0.01, "loss": 1.9685, "step": 37644 }, { "epoch": 3.8683723797780516, "grad_norm": 0.09255755692720413, "learning_rate": 0.01, "loss": 2.0059, "step": 37647 }, { "epoch": 3.868680641183724, "grad_norm": 0.08617328852415085, "learning_rate": 0.01, "loss": 1.9905, "step": 37650 }, { "epoch": 3.868988902589396, "grad_norm": 0.044413745403289795, "learning_rate": 0.01, "loss": 1.978, "step": 37653 }, { "epoch": 3.8692971639950677, "grad_norm": 0.086729995906353, "learning_rate": 0.01, "loss": 1.9997, "step": 37656 }, { "epoch": 3.86960542540074, "grad_norm": 0.07304412871599197, "learning_rate": 0.01, "loss": 1.9925, "step": 37659 }, { "epoch": 3.8699136868064117, "grad_norm": 0.06820013374090195, "learning_rate": 0.01, "loss": 2.0073, "step": 37662 }, { "epoch": 3.870221948212084, "grad_norm": 0.06025752052664757, "learning_rate": 0.01, "loss": 1.9999, "step": 37665 }, { "epoch": 3.8705302096177556, "grad_norm": 0.0649590715765953, "learning_rate": 0.01, "loss": 1.996, "step": 37668 }, { "epoch": 3.870838471023428, "grad_norm": 0.03553159162402153, "learning_rate": 0.01, "loss": 1.9689, "step": 37671 }, { "epoch": 3.8711467324291, "grad_norm": 0.06017104908823967, "learning_rate": 0.01, "loss": 2.0013, "step": 37674 }, { "epoch": 3.8714549938347718, "grad_norm": 0.05896780639886856, "learning_rate": 0.01, "loss": 1.9801, "step": 37677 }, { "epoch": 3.871763255240444, "grad_norm": 0.03493823856115341, "learning_rate": 0.01, "loss": 2.0032, "step": 37680 }, { "epoch": 3.872071516646116, "grad_norm": 0.11454503238201141, "learning_rate": 0.01, "loss": 2.0004, "step": 37683 }, { "epoch": 3.872379778051788, "grad_norm": 0.03603978455066681, "learning_rate": 0.01, "loss": 1.9834, "step": 37686 }, { "epoch": 3.8726880394574597, "grad_norm": 0.03927883505821228, "learning_rate": 0.01, "loss": 2.0015, "step": 37689 }, { "epoch": 3.872996300863132, "grad_norm": 0.06592239439487457, "learning_rate": 0.01, "loss": 2.0025, "step": 37692 }, { "epoch": 3.873304562268804, "grad_norm": 0.04402392357587814, "learning_rate": 0.01, "loss": 1.9845, "step": 37695 }, { "epoch": 3.873612823674476, "grad_norm": 0.04484686255455017, "learning_rate": 0.01, "loss": 1.9756, "step": 37698 }, { "epoch": 3.873921085080148, "grad_norm": 0.03577352687716484, "learning_rate": 0.01, "loss": 1.9957, "step": 37701 }, { "epoch": 3.87422934648582, "grad_norm": 0.03952937200665474, "learning_rate": 0.01, "loss": 1.9727, "step": 37704 }, { "epoch": 3.874537607891492, "grad_norm": 0.048469673842191696, "learning_rate": 0.01, "loss": 1.9783, "step": 37707 }, { "epoch": 3.874845869297164, "grad_norm": 0.04531377553939819, "learning_rate": 0.01, "loss": 1.9878, "step": 37710 }, { "epoch": 3.875154130702836, "grad_norm": 0.11962947994470596, "learning_rate": 0.01, "loss": 2.013, "step": 37713 }, { "epoch": 3.875462392108508, "grad_norm": 0.03379445523023605, "learning_rate": 0.01, "loss": 1.9817, "step": 37716 }, { "epoch": 3.87577065351418, "grad_norm": 0.036722879856824875, "learning_rate": 0.01, "loss": 1.969, "step": 37719 }, { "epoch": 3.876078914919852, "grad_norm": 0.03215174376964569, "learning_rate": 0.01, "loss": 1.9704, "step": 37722 }, { "epoch": 3.876387176325524, "grad_norm": 0.04591568931937218, "learning_rate": 0.01, "loss": 1.9886, "step": 37725 }, { "epoch": 3.876695437731196, "grad_norm": 0.04655212163925171, "learning_rate": 0.01, "loss": 1.9971, "step": 37728 }, { "epoch": 3.877003699136868, "grad_norm": 0.053555794060230255, "learning_rate": 0.01, "loss": 1.9863, "step": 37731 }, { "epoch": 3.8773119605425403, "grad_norm": 0.05492212250828743, "learning_rate": 0.01, "loss": 2.0099, "step": 37734 }, { "epoch": 3.877620221948212, "grad_norm": 0.07766193896532059, "learning_rate": 0.01, "loss": 1.982, "step": 37737 }, { "epoch": 3.877928483353884, "grad_norm": 0.040087949484586716, "learning_rate": 0.01, "loss": 2.018, "step": 37740 }, { "epoch": 3.878236744759556, "grad_norm": 0.07583244144916534, "learning_rate": 0.01, "loss": 1.9694, "step": 37743 }, { "epoch": 3.8785450061652282, "grad_norm": 0.0815076008439064, "learning_rate": 0.01, "loss": 1.9871, "step": 37746 }, { "epoch": 3.8788532675709, "grad_norm": 0.09940320998430252, "learning_rate": 0.01, "loss": 2.0211, "step": 37749 }, { "epoch": 3.879161528976572, "grad_norm": 0.07148890197277069, "learning_rate": 0.01, "loss": 2.0068, "step": 37752 }, { "epoch": 3.8794697903822444, "grad_norm": 0.0479053258895874, "learning_rate": 0.01, "loss": 2.0073, "step": 37755 }, { "epoch": 3.879778051787916, "grad_norm": 0.0394553542137146, "learning_rate": 0.01, "loss": 1.9948, "step": 37758 }, { "epoch": 3.8800863131935883, "grad_norm": 0.04193533584475517, "learning_rate": 0.01, "loss": 1.992, "step": 37761 }, { "epoch": 3.88039457459926, "grad_norm": 0.09164579212665558, "learning_rate": 0.01, "loss": 1.9968, "step": 37764 }, { "epoch": 3.8807028360049323, "grad_norm": 0.06852073222398758, "learning_rate": 0.01, "loss": 2.0127, "step": 37767 }, { "epoch": 3.881011097410604, "grad_norm": 0.049102578312158585, "learning_rate": 0.01, "loss": 2.0228, "step": 37770 }, { "epoch": 3.881319358816276, "grad_norm": 0.12974314391613007, "learning_rate": 0.01, "loss": 2.0158, "step": 37773 }, { "epoch": 3.8816276202219484, "grad_norm": 0.07722889631986618, "learning_rate": 0.01, "loss": 1.9791, "step": 37776 }, { "epoch": 3.88193588162762, "grad_norm": 0.07344157248735428, "learning_rate": 0.01, "loss": 2.0069, "step": 37779 }, { "epoch": 3.8822441430332923, "grad_norm": 0.035780053585767746, "learning_rate": 0.01, "loss": 2.0301, "step": 37782 }, { "epoch": 3.882552404438964, "grad_norm": 0.04180228337645531, "learning_rate": 0.01, "loss": 2.0013, "step": 37785 }, { "epoch": 3.8828606658446363, "grad_norm": 0.06659300625324249, "learning_rate": 0.01, "loss": 2.0016, "step": 37788 }, { "epoch": 3.883168927250308, "grad_norm": 0.08275042474269867, "learning_rate": 0.01, "loss": 1.9989, "step": 37791 }, { "epoch": 3.8834771886559802, "grad_norm": 0.12271406501531601, "learning_rate": 0.01, "loss": 2.0088, "step": 37794 }, { "epoch": 3.8837854500616524, "grad_norm": 0.14413659274578094, "learning_rate": 0.01, "loss": 2.0153, "step": 37797 }, { "epoch": 3.884093711467324, "grad_norm": 0.0712043046951294, "learning_rate": 0.01, "loss": 1.9928, "step": 37800 }, { "epoch": 3.8844019728729964, "grad_norm": 0.06497600674629211, "learning_rate": 0.01, "loss": 2.018, "step": 37803 }, { "epoch": 3.8847102342786686, "grad_norm": 0.053064875304698944, "learning_rate": 0.01, "loss": 1.9804, "step": 37806 }, { "epoch": 3.8850184956843403, "grad_norm": 0.04865000769495964, "learning_rate": 0.01, "loss": 2.0012, "step": 37809 }, { "epoch": 3.885326757090012, "grad_norm": 0.03862098976969719, "learning_rate": 0.01, "loss": 1.9836, "step": 37812 }, { "epoch": 3.8856350184956843, "grad_norm": 0.07821226119995117, "learning_rate": 0.01, "loss": 1.9974, "step": 37815 }, { "epoch": 3.8859432799013565, "grad_norm": 0.04477280378341675, "learning_rate": 0.01, "loss": 1.9914, "step": 37818 }, { "epoch": 3.886251541307028, "grad_norm": 0.11052905023097992, "learning_rate": 0.01, "loss": 1.9593, "step": 37821 }, { "epoch": 3.8865598027127004, "grad_norm": 0.07528085261583328, "learning_rate": 0.01, "loss": 1.9931, "step": 37824 }, { "epoch": 3.8868680641183726, "grad_norm": 0.09329832345247269, "learning_rate": 0.01, "loss": 1.9903, "step": 37827 }, { "epoch": 3.8871763255240444, "grad_norm": 0.03858843818306923, "learning_rate": 0.01, "loss": 2.0163, "step": 37830 }, { "epoch": 3.8874845869297165, "grad_norm": 0.05364726856350899, "learning_rate": 0.01, "loss": 1.9904, "step": 37833 }, { "epoch": 3.8877928483353883, "grad_norm": 0.036494240164756775, "learning_rate": 0.01, "loss": 1.9889, "step": 37836 }, { "epoch": 3.8881011097410605, "grad_norm": 0.033809784799814224, "learning_rate": 0.01, "loss": 1.9991, "step": 37839 }, { "epoch": 3.8884093711467322, "grad_norm": 0.08358091861009598, "learning_rate": 0.01, "loss": 1.9997, "step": 37842 }, { "epoch": 3.8887176325524044, "grad_norm": 0.04314170405268669, "learning_rate": 0.01, "loss": 1.9829, "step": 37845 }, { "epoch": 3.8890258939580766, "grad_norm": 0.06448940187692642, "learning_rate": 0.01, "loss": 2.0054, "step": 37848 }, { "epoch": 3.8893341553637484, "grad_norm": 0.04236144572496414, "learning_rate": 0.01, "loss": 2.0074, "step": 37851 }, { "epoch": 3.8896424167694206, "grad_norm": 0.08682555705308914, "learning_rate": 0.01, "loss": 1.9907, "step": 37854 }, { "epoch": 3.8899506781750923, "grad_norm": 0.1599910706281662, "learning_rate": 0.01, "loss": 2.0193, "step": 37857 }, { "epoch": 3.8902589395807645, "grad_norm": 0.04853571951389313, "learning_rate": 0.01, "loss": 2.013, "step": 37860 }, { "epoch": 3.8905672009864363, "grad_norm": 0.054008036851882935, "learning_rate": 0.01, "loss": 2.0035, "step": 37863 }, { "epoch": 3.8908754623921085, "grad_norm": 0.0459190271794796, "learning_rate": 0.01, "loss": 2.0101, "step": 37866 }, { "epoch": 3.8911837237977807, "grad_norm": 0.047940943390131, "learning_rate": 0.01, "loss": 1.9976, "step": 37869 }, { "epoch": 3.8914919852034524, "grad_norm": 0.041892241686582565, "learning_rate": 0.01, "loss": 1.9977, "step": 37872 }, { "epoch": 3.8918002466091246, "grad_norm": 0.037861257791519165, "learning_rate": 0.01, "loss": 2.0089, "step": 37875 }, { "epoch": 3.892108508014797, "grad_norm": 0.1314014196395874, "learning_rate": 0.01, "loss": 2.0124, "step": 37878 }, { "epoch": 3.8924167694204685, "grad_norm": 0.053526077419519424, "learning_rate": 0.01, "loss": 1.9998, "step": 37881 }, { "epoch": 3.8927250308261403, "grad_norm": 0.044471751898527145, "learning_rate": 0.01, "loss": 1.9975, "step": 37884 }, { "epoch": 3.8930332922318125, "grad_norm": 0.10089153051376343, "learning_rate": 0.01, "loss": 2.005, "step": 37887 }, { "epoch": 3.8933415536374847, "grad_norm": 0.09028996527194977, "learning_rate": 0.01, "loss": 2.0231, "step": 37890 }, { "epoch": 3.8936498150431564, "grad_norm": 0.06257162988185883, "learning_rate": 0.01, "loss": 2.0181, "step": 37893 }, { "epoch": 3.8939580764488286, "grad_norm": 0.056669414043426514, "learning_rate": 0.01, "loss": 2.0005, "step": 37896 }, { "epoch": 3.894266337854501, "grad_norm": 0.09135915338993073, "learning_rate": 0.01, "loss": 2.0008, "step": 37899 }, { "epoch": 3.8945745992601726, "grad_norm": 0.055741071701049805, "learning_rate": 0.01, "loss": 1.9884, "step": 37902 }, { "epoch": 3.8948828606658448, "grad_norm": 0.06624908000230789, "learning_rate": 0.01, "loss": 2.0107, "step": 37905 }, { "epoch": 3.8951911220715165, "grad_norm": 0.030004529282450676, "learning_rate": 0.01, "loss": 2.0076, "step": 37908 }, { "epoch": 3.8954993834771887, "grad_norm": 0.04271325841546059, "learning_rate": 0.01, "loss": 1.9966, "step": 37911 }, { "epoch": 3.8958076448828605, "grad_norm": 0.04701056331396103, "learning_rate": 0.01, "loss": 1.9912, "step": 37914 }, { "epoch": 3.8961159062885327, "grad_norm": 0.04298432916402817, "learning_rate": 0.01, "loss": 1.9828, "step": 37917 }, { "epoch": 3.896424167694205, "grad_norm": 0.04561863839626312, "learning_rate": 0.01, "loss": 1.9627, "step": 37920 }, { "epoch": 3.8967324290998766, "grad_norm": 0.12965039908885956, "learning_rate": 0.01, "loss": 2.0225, "step": 37923 }, { "epoch": 3.897040690505549, "grad_norm": 0.1109326183795929, "learning_rate": 0.01, "loss": 2.0033, "step": 37926 }, { "epoch": 3.8973489519112205, "grad_norm": 0.06224660202860832, "learning_rate": 0.01, "loss": 2.0008, "step": 37929 }, { "epoch": 3.8976572133168927, "grad_norm": 0.06318973749876022, "learning_rate": 0.01, "loss": 2.0092, "step": 37932 }, { "epoch": 3.8979654747225645, "grad_norm": 0.06799352914094925, "learning_rate": 0.01, "loss": 2.0034, "step": 37935 }, { "epoch": 3.8982737361282367, "grad_norm": 0.07071252167224884, "learning_rate": 0.01, "loss": 2.0239, "step": 37938 }, { "epoch": 3.898581997533909, "grad_norm": 0.03879907727241516, "learning_rate": 0.01, "loss": 1.9725, "step": 37941 }, { "epoch": 3.8988902589395806, "grad_norm": 0.048606619238853455, "learning_rate": 0.01, "loss": 2.0106, "step": 37944 }, { "epoch": 3.899198520345253, "grad_norm": 0.09814203530550003, "learning_rate": 0.01, "loss": 1.9919, "step": 37947 }, { "epoch": 3.899506781750925, "grad_norm": 0.04557656869292259, "learning_rate": 0.01, "loss": 1.98, "step": 37950 }, { "epoch": 3.8998150431565968, "grad_norm": 0.04122527688741684, "learning_rate": 0.01, "loss": 2.0131, "step": 37953 }, { "epoch": 3.900123304562269, "grad_norm": 0.060118671506643295, "learning_rate": 0.01, "loss": 2.0066, "step": 37956 }, { "epoch": 3.9004315659679407, "grad_norm": 0.14660358428955078, "learning_rate": 0.01, "loss": 1.9894, "step": 37959 }, { "epoch": 3.900739827373613, "grad_norm": 0.09819690883159637, "learning_rate": 0.01, "loss": 1.9912, "step": 37962 }, { "epoch": 3.9010480887792847, "grad_norm": 0.041503068059682846, "learning_rate": 0.01, "loss": 1.9565, "step": 37965 }, { "epoch": 3.901356350184957, "grad_norm": 0.04021459445357323, "learning_rate": 0.01, "loss": 2.0216, "step": 37968 }, { "epoch": 3.901664611590629, "grad_norm": 0.04789562150835991, "learning_rate": 0.01, "loss": 2.021, "step": 37971 }, { "epoch": 3.901972872996301, "grad_norm": 0.03949809446930885, "learning_rate": 0.01, "loss": 1.9655, "step": 37974 }, { "epoch": 3.902281134401973, "grad_norm": 0.041829485446214676, "learning_rate": 0.01, "loss": 2.0131, "step": 37977 }, { "epoch": 3.9025893958076447, "grad_norm": 0.13577204942703247, "learning_rate": 0.01, "loss": 1.9844, "step": 37980 }, { "epoch": 3.902897657213317, "grad_norm": 0.03821146488189697, "learning_rate": 0.01, "loss": 2.0024, "step": 37983 }, { "epoch": 3.9032059186189887, "grad_norm": 0.07382892072200775, "learning_rate": 0.01, "loss": 2.0065, "step": 37986 }, { "epoch": 3.903514180024661, "grad_norm": 0.04287475720047951, "learning_rate": 0.01, "loss": 1.9884, "step": 37989 }, { "epoch": 3.903822441430333, "grad_norm": 0.08588143438100815, "learning_rate": 0.01, "loss": 2.0178, "step": 37992 }, { "epoch": 3.904130702836005, "grad_norm": 0.060327284038066864, "learning_rate": 0.01, "loss": 1.9785, "step": 37995 }, { "epoch": 3.904438964241677, "grad_norm": 0.050726667046546936, "learning_rate": 0.01, "loss": 1.9798, "step": 37998 }, { "epoch": 3.904747225647349, "grad_norm": 0.0876336544752121, "learning_rate": 0.01, "loss": 1.9977, "step": 38001 }, { "epoch": 3.905055487053021, "grad_norm": 0.05690234154462814, "learning_rate": 0.01, "loss": 1.9835, "step": 38004 }, { "epoch": 3.9053637484586927, "grad_norm": 0.07977151870727539, "learning_rate": 0.01, "loss": 2.0067, "step": 38007 }, { "epoch": 3.905672009864365, "grad_norm": 0.038005661219358444, "learning_rate": 0.01, "loss": 2.0113, "step": 38010 }, { "epoch": 3.905980271270037, "grad_norm": 0.1033119261264801, "learning_rate": 0.01, "loss": 1.9964, "step": 38013 }, { "epoch": 3.906288532675709, "grad_norm": 0.05024554207921028, "learning_rate": 0.01, "loss": 1.9703, "step": 38016 }, { "epoch": 3.906596794081381, "grad_norm": 0.07625278830528259, "learning_rate": 0.01, "loss": 1.9895, "step": 38019 }, { "epoch": 3.9069050554870532, "grad_norm": 0.10337291657924652, "learning_rate": 0.01, "loss": 2.0167, "step": 38022 }, { "epoch": 3.907213316892725, "grad_norm": 0.04415413364768028, "learning_rate": 0.01, "loss": 2.0055, "step": 38025 }, { "epoch": 3.907521578298397, "grad_norm": 0.0347541943192482, "learning_rate": 0.01, "loss": 2.0018, "step": 38028 }, { "epoch": 3.907829839704069, "grad_norm": 0.036209288984537125, "learning_rate": 0.01, "loss": 1.9647, "step": 38031 }, { "epoch": 3.908138101109741, "grad_norm": 0.0959850400686264, "learning_rate": 0.01, "loss": 2.0013, "step": 38034 }, { "epoch": 3.908446362515413, "grad_norm": 0.05654274299740791, "learning_rate": 0.01, "loss": 1.983, "step": 38037 }, { "epoch": 3.908754623921085, "grad_norm": 0.11333990842103958, "learning_rate": 0.01, "loss": 1.9921, "step": 38040 }, { "epoch": 3.9090628853267573, "grad_norm": 0.05329067260026932, "learning_rate": 0.01, "loss": 2.0203, "step": 38043 }, { "epoch": 3.909371146732429, "grad_norm": 0.03489474579691887, "learning_rate": 0.01, "loss": 1.9992, "step": 38046 }, { "epoch": 3.909679408138101, "grad_norm": 0.044765155762434006, "learning_rate": 0.01, "loss": 1.9994, "step": 38049 }, { "epoch": 3.909987669543773, "grad_norm": 0.11689729988574982, "learning_rate": 0.01, "loss": 2.0322, "step": 38052 }, { "epoch": 3.910295930949445, "grad_norm": 0.04785189777612686, "learning_rate": 0.01, "loss": 2.0103, "step": 38055 }, { "epoch": 3.910604192355117, "grad_norm": 0.040157750248909, "learning_rate": 0.01, "loss": 1.9836, "step": 38058 }, { "epoch": 3.910912453760789, "grad_norm": 0.04107039049267769, "learning_rate": 0.01, "loss": 2.0106, "step": 38061 }, { "epoch": 3.9112207151664613, "grad_norm": 0.1298598051071167, "learning_rate": 0.01, "loss": 1.9866, "step": 38064 }, { "epoch": 3.911528976572133, "grad_norm": 0.04832058027386665, "learning_rate": 0.01, "loss": 1.9831, "step": 38067 }, { "epoch": 3.9118372379778052, "grad_norm": 0.035566527396440506, "learning_rate": 0.01, "loss": 2.0097, "step": 38070 }, { "epoch": 3.9121454993834774, "grad_norm": 0.12645745277404785, "learning_rate": 0.01, "loss": 1.9978, "step": 38073 }, { "epoch": 3.912453760789149, "grad_norm": 0.13044588267803192, "learning_rate": 0.01, "loss": 1.9921, "step": 38076 }, { "epoch": 3.912762022194821, "grad_norm": 0.0751858800649643, "learning_rate": 0.01, "loss": 2.024, "step": 38079 }, { "epoch": 3.913070283600493, "grad_norm": 0.06417164206504822, "learning_rate": 0.01, "loss": 2.0241, "step": 38082 }, { "epoch": 3.9133785450061653, "grad_norm": 0.03895511105656624, "learning_rate": 0.01, "loss": 2.0074, "step": 38085 }, { "epoch": 3.913686806411837, "grad_norm": 0.03581606224179268, "learning_rate": 0.01, "loss": 1.9923, "step": 38088 }, { "epoch": 3.9139950678175093, "grad_norm": 0.02936650812625885, "learning_rate": 0.01, "loss": 1.989, "step": 38091 }, { "epoch": 3.9143033292231815, "grad_norm": 0.036661721765995026, "learning_rate": 0.01, "loss": 2.009, "step": 38094 }, { "epoch": 3.914611590628853, "grad_norm": 0.08295129984617233, "learning_rate": 0.01, "loss": 2.0141, "step": 38097 }, { "epoch": 3.9149198520345254, "grad_norm": 0.09887045621871948, "learning_rate": 0.01, "loss": 2.0028, "step": 38100 }, { "epoch": 3.915228113440197, "grad_norm": 0.0821511521935463, "learning_rate": 0.01, "loss": 1.9886, "step": 38103 }, { "epoch": 3.9155363748458694, "grad_norm": 0.0894683301448822, "learning_rate": 0.01, "loss": 2.0059, "step": 38106 }, { "epoch": 3.915844636251541, "grad_norm": 0.043715961277484894, "learning_rate": 0.01, "loss": 2.0197, "step": 38109 }, { "epoch": 3.9161528976572133, "grad_norm": 0.06341227144002914, "learning_rate": 0.01, "loss": 2.004, "step": 38112 }, { "epoch": 3.9164611590628855, "grad_norm": 0.10574996471405029, "learning_rate": 0.01, "loss": 1.9834, "step": 38115 }, { "epoch": 3.9167694204685573, "grad_norm": 0.07099676132202148, "learning_rate": 0.01, "loss": 1.9827, "step": 38118 }, { "epoch": 3.9170776818742294, "grad_norm": 0.051802802830934525, "learning_rate": 0.01, "loss": 2.0185, "step": 38121 }, { "epoch": 3.917385943279901, "grad_norm": 0.059143371880054474, "learning_rate": 0.01, "loss": 2.0009, "step": 38124 }, { "epoch": 3.9176942046855734, "grad_norm": 0.07478468120098114, "learning_rate": 0.01, "loss": 1.99, "step": 38127 }, { "epoch": 3.918002466091245, "grad_norm": 0.05269934609532356, "learning_rate": 0.01, "loss": 2.002, "step": 38130 }, { "epoch": 3.9183107274969173, "grad_norm": 0.03774020075798035, "learning_rate": 0.01, "loss": 1.9863, "step": 38133 }, { "epoch": 3.9186189889025895, "grad_norm": 0.06620101630687714, "learning_rate": 0.01, "loss": 1.9894, "step": 38136 }, { "epoch": 3.9189272503082613, "grad_norm": 0.13302361965179443, "learning_rate": 0.01, "loss": 1.9997, "step": 38139 }, { "epoch": 3.9192355117139335, "grad_norm": 0.05435695871710777, "learning_rate": 0.01, "loss": 1.9911, "step": 38142 }, { "epoch": 3.9195437731196057, "grad_norm": 0.041471030563116074, "learning_rate": 0.01, "loss": 1.9936, "step": 38145 }, { "epoch": 3.9198520345252774, "grad_norm": 0.032130394130945206, "learning_rate": 0.01, "loss": 1.9983, "step": 38148 }, { "epoch": 3.920160295930949, "grad_norm": 0.032780006527900696, "learning_rate": 0.01, "loss": 1.9729, "step": 38151 }, { "epoch": 3.9204685573366214, "grad_norm": 0.03699677437543869, "learning_rate": 0.01, "loss": 2.0168, "step": 38154 }, { "epoch": 3.9207768187422936, "grad_norm": 0.04473430663347244, "learning_rate": 0.01, "loss": 2.0035, "step": 38157 }, { "epoch": 3.9210850801479653, "grad_norm": 0.06131662428379059, "learning_rate": 0.01, "loss": 2.0053, "step": 38160 }, { "epoch": 3.9213933415536375, "grad_norm": 0.10085607320070267, "learning_rate": 0.01, "loss": 1.9963, "step": 38163 }, { "epoch": 3.9217016029593097, "grad_norm": 0.11734067648649216, "learning_rate": 0.01, "loss": 1.9955, "step": 38166 }, { "epoch": 3.9220098643649814, "grad_norm": 0.09287893027067184, "learning_rate": 0.01, "loss": 1.9914, "step": 38169 }, { "epoch": 3.9223181257706536, "grad_norm": 0.058617495000362396, "learning_rate": 0.01, "loss": 1.9977, "step": 38172 }, { "epoch": 3.9226263871763254, "grad_norm": 0.0597362145781517, "learning_rate": 0.01, "loss": 1.9811, "step": 38175 }, { "epoch": 3.9229346485819976, "grad_norm": 0.046750448644161224, "learning_rate": 0.01, "loss": 1.9766, "step": 38178 }, { "epoch": 3.9232429099876693, "grad_norm": 0.03863513097167015, "learning_rate": 0.01, "loss": 2.0081, "step": 38181 }, { "epoch": 3.9235511713933415, "grad_norm": 0.03276417404413223, "learning_rate": 0.01, "loss": 2.0101, "step": 38184 }, { "epoch": 3.9238594327990137, "grad_norm": 0.03270519897341728, "learning_rate": 0.01, "loss": 1.9997, "step": 38187 }, { "epoch": 3.9241676942046855, "grad_norm": 0.05820414423942566, "learning_rate": 0.01, "loss": 2.0085, "step": 38190 }, { "epoch": 3.9244759556103577, "grad_norm": 0.0589318610727787, "learning_rate": 0.01, "loss": 2.015, "step": 38193 }, { "epoch": 3.92478421701603, "grad_norm": 0.05013816058635712, "learning_rate": 0.01, "loss": 1.9994, "step": 38196 }, { "epoch": 3.9250924784217016, "grad_norm": 0.10934565216302872, "learning_rate": 0.01, "loss": 2.0149, "step": 38199 }, { "epoch": 3.9254007398273734, "grad_norm": 0.08114335685968399, "learning_rate": 0.01, "loss": 1.9721, "step": 38202 }, { "epoch": 3.9257090012330456, "grad_norm": 0.056287068873643875, "learning_rate": 0.01, "loss": 1.9753, "step": 38205 }, { "epoch": 3.9260172626387178, "grad_norm": 0.04509197920560837, "learning_rate": 0.01, "loss": 2.0047, "step": 38208 }, { "epoch": 3.9263255240443895, "grad_norm": 0.04409080743789673, "learning_rate": 0.01, "loss": 1.9804, "step": 38211 }, { "epoch": 3.9266337854500617, "grad_norm": 0.044174257665872574, "learning_rate": 0.01, "loss": 1.9897, "step": 38214 }, { "epoch": 3.926942046855734, "grad_norm": 0.05122366175055504, "learning_rate": 0.01, "loss": 1.9924, "step": 38217 }, { "epoch": 3.9272503082614056, "grad_norm": 0.12582433223724365, "learning_rate": 0.01, "loss": 1.9903, "step": 38220 }, { "epoch": 3.927558569667078, "grad_norm": 0.060516782104969025, "learning_rate": 0.01, "loss": 2.0088, "step": 38223 }, { "epoch": 3.9278668310727496, "grad_norm": 0.04210484027862549, "learning_rate": 0.01, "loss": 2.0051, "step": 38226 }, { "epoch": 3.928175092478422, "grad_norm": 0.043509434908628464, "learning_rate": 0.01, "loss": 1.9963, "step": 38229 }, { "epoch": 3.9284833538840935, "grad_norm": 0.03601152077317238, "learning_rate": 0.01, "loss": 1.9945, "step": 38232 }, { "epoch": 3.9287916152897657, "grad_norm": 0.04590906575322151, "learning_rate": 0.01, "loss": 2.01, "step": 38235 }, { "epoch": 3.929099876695438, "grad_norm": 0.06336984038352966, "learning_rate": 0.01, "loss": 1.9757, "step": 38238 }, { "epoch": 3.9294081381011097, "grad_norm": 0.07014600187540054, "learning_rate": 0.01, "loss": 1.9891, "step": 38241 }, { "epoch": 3.929716399506782, "grad_norm": 0.07760234922170639, "learning_rate": 0.01, "loss": 2.0032, "step": 38244 }, { "epoch": 3.9300246609124536, "grad_norm": 0.1088213101029396, "learning_rate": 0.01, "loss": 2.0149, "step": 38247 }, { "epoch": 3.930332922318126, "grad_norm": 0.04306039586663246, "learning_rate": 0.01, "loss": 2.014, "step": 38250 }, { "epoch": 3.9306411837237976, "grad_norm": 0.057269759476184845, "learning_rate": 0.01, "loss": 1.9873, "step": 38253 }, { "epoch": 3.9309494451294698, "grad_norm": 0.04819104075431824, "learning_rate": 0.01, "loss": 2.011, "step": 38256 }, { "epoch": 3.931257706535142, "grad_norm": 0.038784999400377274, "learning_rate": 0.01, "loss": 1.9981, "step": 38259 }, { "epoch": 3.9315659679408137, "grad_norm": 0.08599941432476044, "learning_rate": 0.01, "loss": 1.9838, "step": 38262 }, { "epoch": 3.931874229346486, "grad_norm": 0.10680285841226578, "learning_rate": 0.01, "loss": 1.9891, "step": 38265 }, { "epoch": 3.932182490752158, "grad_norm": 0.09683788567781448, "learning_rate": 0.01, "loss": 2.0191, "step": 38268 }, { "epoch": 3.93249075215783, "grad_norm": 0.040799640119075775, "learning_rate": 0.01, "loss": 1.9741, "step": 38271 }, { "epoch": 3.9327990135635016, "grad_norm": 0.08265798538923264, "learning_rate": 0.01, "loss": 2.0359, "step": 38274 }, { "epoch": 3.933107274969174, "grad_norm": 0.0628572627902031, "learning_rate": 0.01, "loss": 2.018, "step": 38277 }, { "epoch": 3.933415536374846, "grad_norm": 0.06475923955440521, "learning_rate": 0.01, "loss": 2.0023, "step": 38280 }, { "epoch": 3.9337237977805177, "grad_norm": 0.043740466237068176, "learning_rate": 0.01, "loss": 2.0003, "step": 38283 }, { "epoch": 3.93403205918619, "grad_norm": 0.029720323160290718, "learning_rate": 0.01, "loss": 1.9905, "step": 38286 }, { "epoch": 3.934340320591862, "grad_norm": 0.050941772758960724, "learning_rate": 0.01, "loss": 1.9757, "step": 38289 }, { "epoch": 3.934648581997534, "grad_norm": 0.1068761870265007, "learning_rate": 0.01, "loss": 2.0054, "step": 38292 }, { "epoch": 3.934956843403206, "grad_norm": 0.06410616636276245, "learning_rate": 0.01, "loss": 1.9931, "step": 38295 }, { "epoch": 3.935265104808878, "grad_norm": 0.10598262399435043, "learning_rate": 0.01, "loss": 2.0068, "step": 38298 }, { "epoch": 3.93557336621455, "grad_norm": 0.05296116694808006, "learning_rate": 0.01, "loss": 1.9625, "step": 38301 }, { "epoch": 3.9358816276202218, "grad_norm": 0.04025321081280708, "learning_rate": 0.01, "loss": 2.0119, "step": 38304 }, { "epoch": 3.936189889025894, "grad_norm": 0.11896882951259613, "learning_rate": 0.01, "loss": 2.0066, "step": 38307 }, { "epoch": 3.936498150431566, "grad_norm": 0.07203146070241928, "learning_rate": 0.01, "loss": 1.9781, "step": 38310 }, { "epoch": 3.936806411837238, "grad_norm": 0.05813155323266983, "learning_rate": 0.01, "loss": 1.9872, "step": 38313 }, { "epoch": 3.93711467324291, "grad_norm": 0.0439312644302845, "learning_rate": 0.01, "loss": 1.9977, "step": 38316 }, { "epoch": 3.937422934648582, "grad_norm": 0.05411090329289436, "learning_rate": 0.01, "loss": 2.0366, "step": 38319 }, { "epoch": 3.937731196054254, "grad_norm": 0.035287659615278244, "learning_rate": 0.01, "loss": 2.0147, "step": 38322 }, { "epoch": 3.938039457459926, "grad_norm": 0.08188273012638092, "learning_rate": 0.01, "loss": 1.9825, "step": 38325 }, { "epoch": 3.938347718865598, "grad_norm": 0.06879545003175735, "learning_rate": 0.01, "loss": 2.0037, "step": 38328 }, { "epoch": 3.93865598027127, "grad_norm": 0.06965189427137375, "learning_rate": 0.01, "loss": 1.9757, "step": 38331 }, { "epoch": 3.938964241676942, "grad_norm": 0.07698633521795273, "learning_rate": 0.01, "loss": 1.9823, "step": 38334 }, { "epoch": 3.939272503082614, "grad_norm": 0.06989213079214096, "learning_rate": 0.01, "loss": 1.9765, "step": 38337 }, { "epoch": 3.9395807644882863, "grad_norm": 0.09224730730056763, "learning_rate": 0.01, "loss": 1.9898, "step": 38340 }, { "epoch": 3.939889025893958, "grad_norm": 0.15504223108291626, "learning_rate": 0.01, "loss": 2.0109, "step": 38343 }, { "epoch": 3.94019728729963, "grad_norm": 0.06935428082942963, "learning_rate": 0.01, "loss": 2.0157, "step": 38346 }, { "epoch": 3.940505548705302, "grad_norm": 0.051906321197748184, "learning_rate": 0.01, "loss": 1.9971, "step": 38349 }, { "epoch": 3.940813810110974, "grad_norm": 0.059517163783311844, "learning_rate": 0.01, "loss": 1.9764, "step": 38352 }, { "epoch": 3.941122071516646, "grad_norm": 0.031905319541692734, "learning_rate": 0.01, "loss": 1.9958, "step": 38355 }, { "epoch": 3.941430332922318, "grad_norm": 0.06473187357187271, "learning_rate": 0.01, "loss": 1.9852, "step": 38358 }, { "epoch": 3.9417385943279903, "grad_norm": 0.0756862536072731, "learning_rate": 0.01, "loss": 2.0025, "step": 38361 }, { "epoch": 3.942046855733662, "grad_norm": 0.06513907015323639, "learning_rate": 0.01, "loss": 2.0076, "step": 38364 }, { "epoch": 3.9423551171393343, "grad_norm": 0.0908990353345871, "learning_rate": 0.01, "loss": 1.9868, "step": 38367 }, { "epoch": 3.942663378545006, "grad_norm": 0.06427323073148727, "learning_rate": 0.01, "loss": 1.9848, "step": 38370 }, { "epoch": 3.9429716399506782, "grad_norm": 0.09153769910335541, "learning_rate": 0.01, "loss": 1.9816, "step": 38373 }, { "epoch": 3.94327990135635, "grad_norm": 0.0451352633535862, "learning_rate": 0.01, "loss": 1.9973, "step": 38376 }, { "epoch": 3.943588162762022, "grad_norm": 0.06844813376665115, "learning_rate": 0.01, "loss": 1.9987, "step": 38379 }, { "epoch": 3.9438964241676944, "grad_norm": 0.07901204377412796, "learning_rate": 0.01, "loss": 2.0011, "step": 38382 }, { "epoch": 3.944204685573366, "grad_norm": 0.08094186335802078, "learning_rate": 0.01, "loss": 2.0076, "step": 38385 }, { "epoch": 3.9445129469790383, "grad_norm": 0.09053739905357361, "learning_rate": 0.01, "loss": 1.9936, "step": 38388 }, { "epoch": 3.9448212083847105, "grad_norm": 0.04751008749008179, "learning_rate": 0.01, "loss": 1.993, "step": 38391 }, { "epoch": 3.9451294697903823, "grad_norm": 0.042345285415649414, "learning_rate": 0.01, "loss": 1.9857, "step": 38394 }, { "epoch": 3.945437731196054, "grad_norm": 0.1027415469288826, "learning_rate": 0.01, "loss": 1.9761, "step": 38397 }, { "epoch": 3.945745992601726, "grad_norm": 0.12780915200710297, "learning_rate": 0.01, "loss": 2.0122, "step": 38400 }, { "epoch": 3.9460542540073984, "grad_norm": 0.0731390118598938, "learning_rate": 0.01, "loss": 2.0005, "step": 38403 }, { "epoch": 3.94636251541307, "grad_norm": 0.07617928087711334, "learning_rate": 0.01, "loss": 2.0131, "step": 38406 }, { "epoch": 3.9466707768187423, "grad_norm": 0.049755457788705826, "learning_rate": 0.01, "loss": 1.9756, "step": 38409 }, { "epoch": 3.9469790382244145, "grad_norm": 0.043812233954668045, "learning_rate": 0.01, "loss": 1.9935, "step": 38412 }, { "epoch": 3.9472872996300863, "grad_norm": 0.07829032093286514, "learning_rate": 0.01, "loss": 2.0004, "step": 38415 }, { "epoch": 3.9475955610357585, "grad_norm": 0.056485142558813095, "learning_rate": 0.01, "loss": 2.0117, "step": 38418 }, { "epoch": 3.9479038224414302, "grad_norm": 0.09569665789604187, "learning_rate": 0.01, "loss": 1.9906, "step": 38421 }, { "epoch": 3.9482120838471024, "grad_norm": 0.07466506212949753, "learning_rate": 0.01, "loss": 2.0046, "step": 38424 }, { "epoch": 3.948520345252774, "grad_norm": 0.08420269191265106, "learning_rate": 0.01, "loss": 2.0009, "step": 38427 }, { "epoch": 3.9488286066584464, "grad_norm": 0.05188721418380737, "learning_rate": 0.01, "loss": 2.0409, "step": 38430 }, { "epoch": 3.9491368680641186, "grad_norm": 0.042890697717666626, "learning_rate": 0.01, "loss": 1.9798, "step": 38433 }, { "epoch": 3.9494451294697903, "grad_norm": 0.05752531811594963, "learning_rate": 0.01, "loss": 1.975, "step": 38436 }, { "epoch": 3.9497533908754625, "grad_norm": 0.0403323695063591, "learning_rate": 0.01, "loss": 1.9951, "step": 38439 }, { "epoch": 3.9500616522811343, "grad_norm": 0.035122547298669815, "learning_rate": 0.01, "loss": 1.9995, "step": 38442 }, { "epoch": 3.9503699136868065, "grad_norm": 0.04063395410776138, "learning_rate": 0.01, "loss": 2.0264, "step": 38445 }, { "epoch": 3.950678175092478, "grad_norm": 0.05300283804535866, "learning_rate": 0.01, "loss": 2.0159, "step": 38448 }, { "epoch": 3.9509864364981504, "grad_norm": 0.06941550225019455, "learning_rate": 0.01, "loss": 2.0155, "step": 38451 }, { "epoch": 3.9512946979038226, "grad_norm": 0.04268152639269829, "learning_rate": 0.01, "loss": 2.0116, "step": 38454 }, { "epoch": 3.9516029593094943, "grad_norm": 0.058871544897556305, "learning_rate": 0.01, "loss": 1.9888, "step": 38457 }, { "epoch": 3.9519112207151665, "grad_norm": 0.0643804594874382, "learning_rate": 0.01, "loss": 1.9971, "step": 38460 }, { "epoch": 3.9522194821208387, "grad_norm": 0.04509326070547104, "learning_rate": 0.01, "loss": 2.0231, "step": 38463 }, { "epoch": 3.9525277435265105, "grad_norm": 0.04596889764070511, "learning_rate": 0.01, "loss": 1.9926, "step": 38466 }, { "epoch": 3.9528360049321822, "grad_norm": 0.08639726787805557, "learning_rate": 0.01, "loss": 1.973, "step": 38469 }, { "epoch": 3.9531442663378544, "grad_norm": 0.08404930680990219, "learning_rate": 0.01, "loss": 1.9867, "step": 38472 }, { "epoch": 3.9534525277435266, "grad_norm": 0.09206783026456833, "learning_rate": 0.01, "loss": 1.968, "step": 38475 }, { "epoch": 3.9537607891491984, "grad_norm": 0.041316352784633636, "learning_rate": 0.01, "loss": 1.9764, "step": 38478 }, { "epoch": 3.9540690505548706, "grad_norm": 0.07000883668661118, "learning_rate": 0.01, "loss": 2.0071, "step": 38481 }, { "epoch": 3.9543773119605428, "grad_norm": 0.048716410994529724, "learning_rate": 0.01, "loss": 1.9984, "step": 38484 }, { "epoch": 3.9546855733662145, "grad_norm": 0.05251453444361687, "learning_rate": 0.01, "loss": 2.0007, "step": 38487 }, { "epoch": 3.9549938347718867, "grad_norm": 0.04517770931124687, "learning_rate": 0.01, "loss": 1.9998, "step": 38490 }, { "epoch": 3.9553020961775585, "grad_norm": 0.07227790355682373, "learning_rate": 0.01, "loss": 1.9965, "step": 38493 }, { "epoch": 3.9556103575832307, "grad_norm": 0.09953956305980682, "learning_rate": 0.01, "loss": 1.9875, "step": 38496 }, { "epoch": 3.9559186189889024, "grad_norm": 0.06194831430912018, "learning_rate": 0.01, "loss": 2.0008, "step": 38499 }, { "epoch": 3.9562268803945746, "grad_norm": 0.10566883534193039, "learning_rate": 0.01, "loss": 1.9839, "step": 38502 }, { "epoch": 3.956535141800247, "grad_norm": 0.04950516298413277, "learning_rate": 0.01, "loss": 1.9868, "step": 38505 }, { "epoch": 3.9568434032059185, "grad_norm": 0.04657790809869766, "learning_rate": 0.01, "loss": 1.9794, "step": 38508 }, { "epoch": 3.9571516646115907, "grad_norm": 0.05642826855182648, "learning_rate": 0.01, "loss": 1.9739, "step": 38511 }, { "epoch": 3.9574599260172625, "grad_norm": 0.049003373831510544, "learning_rate": 0.01, "loss": 1.9892, "step": 38514 }, { "epoch": 3.9577681874229347, "grad_norm": 0.05019281804561615, "learning_rate": 0.01, "loss": 1.9758, "step": 38517 }, { "epoch": 3.9580764488286064, "grad_norm": 0.05060233548283577, "learning_rate": 0.01, "loss": 2.0093, "step": 38520 }, { "epoch": 3.9583847102342786, "grad_norm": 0.051873739808797836, "learning_rate": 0.01, "loss": 1.998, "step": 38523 }, { "epoch": 3.958692971639951, "grad_norm": 0.11860267072916031, "learning_rate": 0.01, "loss": 1.9988, "step": 38526 }, { "epoch": 3.9590012330456226, "grad_norm": 0.03894282132387161, "learning_rate": 0.01, "loss": 2.0014, "step": 38529 }, { "epoch": 3.9593094944512948, "grad_norm": 0.0839400663971901, "learning_rate": 0.01, "loss": 1.9988, "step": 38532 }, { "epoch": 3.959617755856967, "grad_norm": 0.10270547866821289, "learning_rate": 0.01, "loss": 1.9829, "step": 38535 }, { "epoch": 3.9599260172626387, "grad_norm": 0.07309349626302719, "learning_rate": 0.01, "loss": 2.0124, "step": 38538 }, { "epoch": 3.9602342786683105, "grad_norm": 0.03441464155912399, "learning_rate": 0.01, "loss": 2.0036, "step": 38541 }, { "epoch": 3.9605425400739827, "grad_norm": 0.04434891417622566, "learning_rate": 0.01, "loss": 1.9907, "step": 38544 }, { "epoch": 3.960850801479655, "grad_norm": 0.030151626095175743, "learning_rate": 0.01, "loss": 1.9991, "step": 38547 }, { "epoch": 3.9611590628853266, "grad_norm": 0.03249426558613777, "learning_rate": 0.01, "loss": 1.9883, "step": 38550 }, { "epoch": 3.961467324290999, "grad_norm": 0.06774075329303741, "learning_rate": 0.01, "loss": 1.9894, "step": 38553 }, { "epoch": 3.961775585696671, "grad_norm": 0.1510474681854248, "learning_rate": 0.01, "loss": 1.989, "step": 38556 }, { "epoch": 3.9620838471023427, "grad_norm": 0.080832839012146, "learning_rate": 0.01, "loss": 2.0033, "step": 38559 }, { "epoch": 3.962392108508015, "grad_norm": 0.08914501219987869, "learning_rate": 0.01, "loss": 2.0032, "step": 38562 }, { "epoch": 3.9627003699136867, "grad_norm": 0.10509749501943588, "learning_rate": 0.01, "loss": 1.9611, "step": 38565 }, { "epoch": 3.963008631319359, "grad_norm": 0.0980307012796402, "learning_rate": 0.01, "loss": 2.006, "step": 38568 }, { "epoch": 3.9633168927250306, "grad_norm": 0.04917595908045769, "learning_rate": 0.01, "loss": 2.0061, "step": 38571 }, { "epoch": 3.963625154130703, "grad_norm": 0.04090237617492676, "learning_rate": 0.01, "loss": 2.0112, "step": 38574 }, { "epoch": 3.963933415536375, "grad_norm": 0.06782566010951996, "learning_rate": 0.01, "loss": 2.0158, "step": 38577 }, { "epoch": 3.9642416769420468, "grad_norm": 0.04495246335864067, "learning_rate": 0.01, "loss": 1.9784, "step": 38580 }, { "epoch": 3.964549938347719, "grad_norm": 0.11113790422677994, "learning_rate": 0.01, "loss": 1.9719, "step": 38583 }, { "epoch": 3.9648581997533907, "grad_norm": 0.09196839481592178, "learning_rate": 0.01, "loss": 1.997, "step": 38586 }, { "epoch": 3.965166461159063, "grad_norm": 0.07392636686563492, "learning_rate": 0.01, "loss": 2.0102, "step": 38589 }, { "epoch": 3.9654747225647347, "grad_norm": 0.0521097257733345, "learning_rate": 0.01, "loss": 1.983, "step": 38592 }, { "epoch": 3.965782983970407, "grad_norm": 0.05052189901471138, "learning_rate": 0.01, "loss": 1.9929, "step": 38595 }, { "epoch": 3.966091245376079, "grad_norm": 0.08592119067907333, "learning_rate": 0.01, "loss": 1.9949, "step": 38598 }, { "epoch": 3.966399506781751, "grad_norm": 0.08535821735858917, "learning_rate": 0.01, "loss": 1.9743, "step": 38601 }, { "epoch": 3.966707768187423, "grad_norm": 0.06658685207366943, "learning_rate": 0.01, "loss": 2.0038, "step": 38604 }, { "epoch": 3.967016029593095, "grad_norm": 0.04116528108716011, "learning_rate": 0.01, "loss": 1.969, "step": 38607 }, { "epoch": 3.967324290998767, "grad_norm": 0.04124008119106293, "learning_rate": 0.01, "loss": 1.9889, "step": 38610 }, { "epoch": 3.967632552404439, "grad_norm": 0.047569263726472855, "learning_rate": 0.01, "loss": 1.9848, "step": 38613 }, { "epoch": 3.967940813810111, "grad_norm": 0.04800506308674812, "learning_rate": 0.01, "loss": 2.0008, "step": 38616 }, { "epoch": 3.968249075215783, "grad_norm": 0.06227673590183258, "learning_rate": 0.01, "loss": 2.0099, "step": 38619 }, { "epoch": 3.968557336621455, "grad_norm": 0.036228880286216736, "learning_rate": 0.01, "loss": 1.9945, "step": 38622 }, { "epoch": 3.968865598027127, "grad_norm": 0.03745630383491516, "learning_rate": 0.01, "loss": 1.9916, "step": 38625 }, { "epoch": 3.969173859432799, "grad_norm": 0.08907367289066315, "learning_rate": 0.01, "loss": 2.0278, "step": 38628 }, { "epoch": 3.969482120838471, "grad_norm": 0.054506488144397736, "learning_rate": 0.01, "loss": 2.0001, "step": 38631 }, { "epoch": 3.969790382244143, "grad_norm": 0.04185614362359047, "learning_rate": 0.01, "loss": 1.9847, "step": 38634 }, { "epoch": 3.970098643649815, "grad_norm": 0.04917627200484276, "learning_rate": 0.01, "loss": 2.0155, "step": 38637 }, { "epoch": 3.970406905055487, "grad_norm": 0.040258195251226425, "learning_rate": 0.01, "loss": 2.0119, "step": 38640 }, { "epoch": 3.970715166461159, "grad_norm": 0.08127589523792267, "learning_rate": 0.01, "loss": 1.9753, "step": 38643 }, { "epoch": 3.971023427866831, "grad_norm": 0.07298692315816879, "learning_rate": 0.01, "loss": 1.9958, "step": 38646 }, { "epoch": 3.9713316892725032, "grad_norm": 0.09801699966192245, "learning_rate": 0.01, "loss": 1.9747, "step": 38649 }, { "epoch": 3.971639950678175, "grad_norm": 0.10016069561243057, "learning_rate": 0.01, "loss": 1.9986, "step": 38652 }, { "epoch": 3.971948212083847, "grad_norm": 0.038826216012239456, "learning_rate": 0.01, "loss": 1.9768, "step": 38655 }, { "epoch": 3.9722564734895194, "grad_norm": 0.04124876856803894, "learning_rate": 0.01, "loss": 1.977, "step": 38658 }, { "epoch": 3.972564734895191, "grad_norm": 0.03699329122900963, "learning_rate": 0.01, "loss": 2.0101, "step": 38661 }, { "epoch": 3.972872996300863, "grad_norm": 0.03506563603878021, "learning_rate": 0.01, "loss": 2.0035, "step": 38664 }, { "epoch": 3.973181257706535, "grad_norm": 0.08274342864751816, "learning_rate": 0.01, "loss": 1.9819, "step": 38667 }, { "epoch": 3.9734895191122073, "grad_norm": 0.10225984454154968, "learning_rate": 0.01, "loss": 2.0115, "step": 38670 }, { "epoch": 3.973797780517879, "grad_norm": 0.08367688208818436, "learning_rate": 0.01, "loss": 1.9809, "step": 38673 }, { "epoch": 3.974106041923551, "grad_norm": 0.05682690069079399, "learning_rate": 0.01, "loss": 2.0012, "step": 38676 }, { "epoch": 3.9744143033292234, "grad_norm": 0.03980601951479912, "learning_rate": 0.01, "loss": 1.9668, "step": 38679 }, { "epoch": 3.974722564734895, "grad_norm": 0.03310983628034592, "learning_rate": 0.01, "loss": 1.9976, "step": 38682 }, { "epoch": 3.9750308261405674, "grad_norm": 0.05037367716431618, "learning_rate": 0.01, "loss": 1.9845, "step": 38685 }, { "epoch": 3.975339087546239, "grad_norm": 0.09068721532821655, "learning_rate": 0.01, "loss": 1.9848, "step": 38688 }, { "epoch": 3.9756473489519113, "grad_norm": 0.10251244902610779, "learning_rate": 0.01, "loss": 1.9981, "step": 38691 }, { "epoch": 3.975955610357583, "grad_norm": 0.04860818758606911, "learning_rate": 0.01, "loss": 2.0073, "step": 38694 }, { "epoch": 3.9762638717632552, "grad_norm": 0.0460125096142292, "learning_rate": 0.01, "loss": 1.9925, "step": 38697 }, { "epoch": 3.9765721331689274, "grad_norm": 0.03295229375362396, "learning_rate": 0.01, "loss": 1.9682, "step": 38700 }, { "epoch": 3.976880394574599, "grad_norm": 0.0434846356511116, "learning_rate": 0.01, "loss": 1.9944, "step": 38703 }, { "epoch": 3.9771886559802714, "grad_norm": 0.04077495262026787, "learning_rate": 0.01, "loss": 2.0107, "step": 38706 }, { "epoch": 3.977496917385943, "grad_norm": 0.08263571560382843, "learning_rate": 0.01, "loss": 2.0125, "step": 38709 }, { "epoch": 3.9778051787916153, "grad_norm": 0.1033141016960144, "learning_rate": 0.01, "loss": 2.02, "step": 38712 }, { "epoch": 3.978113440197287, "grad_norm": 0.12253855168819427, "learning_rate": 0.01, "loss": 2.0039, "step": 38715 }, { "epoch": 3.9784217016029593, "grad_norm": 0.08476614207029343, "learning_rate": 0.01, "loss": 1.974, "step": 38718 }, { "epoch": 3.9787299630086315, "grad_norm": 0.08248502016067505, "learning_rate": 0.01, "loss": 1.9934, "step": 38721 }, { "epoch": 3.979038224414303, "grad_norm": 0.06338318437337875, "learning_rate": 0.01, "loss": 1.9632, "step": 38724 }, { "epoch": 3.9793464858199754, "grad_norm": 0.061125461012125015, "learning_rate": 0.01, "loss": 1.9902, "step": 38727 }, { "epoch": 3.9796547472256476, "grad_norm": 0.04191330447793007, "learning_rate": 0.01, "loss": 1.9837, "step": 38730 }, { "epoch": 3.9799630086313194, "grad_norm": 0.04181262478232384, "learning_rate": 0.01, "loss": 1.9952, "step": 38733 }, { "epoch": 3.980271270036991, "grad_norm": 0.054846856743097305, "learning_rate": 0.01, "loss": 2.0109, "step": 38736 }, { "epoch": 3.9805795314426633, "grad_norm": 0.1322845071554184, "learning_rate": 0.01, "loss": 1.9626, "step": 38739 }, { "epoch": 3.9808877928483355, "grad_norm": 0.06795060634613037, "learning_rate": 0.01, "loss": 1.9972, "step": 38742 }, { "epoch": 3.9811960542540072, "grad_norm": 0.04729272425174713, "learning_rate": 0.01, "loss": 2.0027, "step": 38745 }, { "epoch": 3.9815043156596794, "grad_norm": 0.05951160192489624, "learning_rate": 0.01, "loss": 2.0016, "step": 38748 }, { "epoch": 3.9818125770653516, "grad_norm": 0.14003396034240723, "learning_rate": 0.01, "loss": 2.0166, "step": 38751 }, { "epoch": 3.9821208384710234, "grad_norm": 0.04996626079082489, "learning_rate": 0.01, "loss": 2.002, "step": 38754 }, { "epoch": 3.9824290998766956, "grad_norm": 0.08134348690509796, "learning_rate": 0.01, "loss": 2.0159, "step": 38757 }, { "epoch": 3.9827373612823673, "grad_norm": 0.04592986777424812, "learning_rate": 0.01, "loss": 2.0107, "step": 38760 }, { "epoch": 3.9830456226880395, "grad_norm": 0.06769111007452011, "learning_rate": 0.01, "loss": 1.998, "step": 38763 }, { "epoch": 3.9833538840937113, "grad_norm": 0.04501201957464218, "learning_rate": 0.01, "loss": 2.0177, "step": 38766 }, { "epoch": 3.9836621454993835, "grad_norm": 0.07144643366336823, "learning_rate": 0.01, "loss": 1.9948, "step": 38769 }, { "epoch": 3.9839704069050557, "grad_norm": 0.05670906975865364, "learning_rate": 0.01, "loss": 2.0011, "step": 38772 }, { "epoch": 3.9842786683107274, "grad_norm": 0.03870624676346779, "learning_rate": 0.01, "loss": 2.0089, "step": 38775 }, { "epoch": 3.9845869297163996, "grad_norm": 0.08053532242774963, "learning_rate": 0.01, "loss": 1.9955, "step": 38778 }, { "epoch": 3.9848951911220714, "grad_norm": 0.03753774240612984, "learning_rate": 0.01, "loss": 1.9866, "step": 38781 }, { "epoch": 3.9852034525277436, "grad_norm": 0.04568708315491676, "learning_rate": 0.01, "loss": 1.9811, "step": 38784 }, { "epoch": 3.9855117139334153, "grad_norm": 0.03626095503568649, "learning_rate": 0.01, "loss": 1.995, "step": 38787 }, { "epoch": 3.9858199753390875, "grad_norm": 0.04620308801531792, "learning_rate": 0.01, "loss": 2.0172, "step": 38790 }, { "epoch": 3.9861282367447597, "grad_norm": 0.05767418071627617, "learning_rate": 0.01, "loss": 1.9905, "step": 38793 }, { "epoch": 3.9864364981504314, "grad_norm": 0.04969481751322746, "learning_rate": 0.01, "loss": 1.9795, "step": 38796 }, { "epoch": 3.9867447595561036, "grad_norm": 0.0532878078520298, "learning_rate": 0.01, "loss": 1.9875, "step": 38799 }, { "epoch": 3.987053020961776, "grad_norm": 0.07379619777202606, "learning_rate": 0.01, "loss": 2.0124, "step": 38802 }, { "epoch": 3.9873612823674476, "grad_norm": 0.07538430392742157, "learning_rate": 0.01, "loss": 2.0105, "step": 38805 }, { "epoch": 3.9876695437731193, "grad_norm": 0.0686052069067955, "learning_rate": 0.01, "loss": 2.0059, "step": 38808 }, { "epoch": 3.9879778051787915, "grad_norm": 0.08726216852664948, "learning_rate": 0.01, "loss": 1.9803, "step": 38811 }, { "epoch": 3.9882860665844637, "grad_norm": 0.08535965532064438, "learning_rate": 0.01, "loss": 1.9997, "step": 38814 }, { "epoch": 3.9885943279901355, "grad_norm": 0.046852223575115204, "learning_rate": 0.01, "loss": 1.982, "step": 38817 }, { "epoch": 3.9889025893958077, "grad_norm": 0.06200144812464714, "learning_rate": 0.01, "loss": 2.0423, "step": 38820 }, { "epoch": 3.98921085080148, "grad_norm": 0.03675130382180214, "learning_rate": 0.01, "loss": 2.0003, "step": 38823 }, { "epoch": 3.9895191122071516, "grad_norm": 0.054221536964178085, "learning_rate": 0.01, "loss": 2.0149, "step": 38826 }, { "epoch": 3.989827373612824, "grad_norm": 0.0411151684820652, "learning_rate": 0.01, "loss": 2.0047, "step": 38829 }, { "epoch": 3.9901356350184956, "grad_norm": 0.03962259367108345, "learning_rate": 0.01, "loss": 2.01, "step": 38832 }, { "epoch": 3.9904438964241677, "grad_norm": 0.04097359627485275, "learning_rate": 0.01, "loss": 1.9974, "step": 38835 }, { "epoch": 3.9907521578298395, "grad_norm": 0.13092494010925293, "learning_rate": 0.01, "loss": 1.9936, "step": 38838 }, { "epoch": 3.9910604192355117, "grad_norm": 0.03308350592851639, "learning_rate": 0.01, "loss": 1.9941, "step": 38841 }, { "epoch": 3.991368680641184, "grad_norm": 0.06447092443704605, "learning_rate": 0.01, "loss": 2.0022, "step": 38844 }, { "epoch": 3.9916769420468556, "grad_norm": 0.06456363946199417, "learning_rate": 0.01, "loss": 2.0051, "step": 38847 }, { "epoch": 3.991985203452528, "grad_norm": 0.04208895191550255, "learning_rate": 0.01, "loss": 2.01, "step": 38850 }, { "epoch": 3.9922934648582, "grad_norm": 0.03307265415787697, "learning_rate": 0.01, "loss": 1.9962, "step": 38853 }, { "epoch": 3.9926017262638718, "grad_norm": 0.06227843463420868, "learning_rate": 0.01, "loss": 1.9864, "step": 38856 }, { "epoch": 3.9929099876695435, "grad_norm": 0.1568191796541214, "learning_rate": 0.01, "loss": 1.9979, "step": 38859 }, { "epoch": 3.9932182490752157, "grad_norm": 0.05974143370985985, "learning_rate": 0.01, "loss": 2.0018, "step": 38862 }, { "epoch": 3.993526510480888, "grad_norm": 0.07882939279079437, "learning_rate": 0.01, "loss": 2.0102, "step": 38865 }, { "epoch": 3.9938347718865597, "grad_norm": 0.053341448307037354, "learning_rate": 0.01, "loss": 2.0123, "step": 38868 }, { "epoch": 3.994143033292232, "grad_norm": 0.0673101395368576, "learning_rate": 0.01, "loss": 2.0183, "step": 38871 }, { "epoch": 3.994451294697904, "grad_norm": 0.06340447813272476, "learning_rate": 0.01, "loss": 1.9966, "step": 38874 }, { "epoch": 3.994759556103576, "grad_norm": 0.06743529438972473, "learning_rate": 0.01, "loss": 2.0047, "step": 38877 }, { "epoch": 3.995067817509248, "grad_norm": 0.044702883809804916, "learning_rate": 0.01, "loss": 1.9927, "step": 38880 }, { "epoch": 3.9953760789149197, "grad_norm": 0.038102682679891586, "learning_rate": 0.01, "loss": 2.0005, "step": 38883 }, { "epoch": 3.995684340320592, "grad_norm": 0.0471016988158226, "learning_rate": 0.01, "loss": 1.9687, "step": 38886 }, { "epoch": 3.9959926017262637, "grad_norm": 0.10289987921714783, "learning_rate": 0.01, "loss": 2.0056, "step": 38889 }, { "epoch": 3.996300863131936, "grad_norm": 0.08012085407972336, "learning_rate": 0.01, "loss": 2.011, "step": 38892 }, { "epoch": 3.996609124537608, "grad_norm": 0.07357537001371384, "learning_rate": 0.01, "loss": 1.9837, "step": 38895 }, { "epoch": 3.99691738594328, "grad_norm": 0.08909714221954346, "learning_rate": 0.01, "loss": 2.0116, "step": 38898 }, { "epoch": 3.997225647348952, "grad_norm": 0.044727593660354614, "learning_rate": 0.01, "loss": 2.0027, "step": 38901 }, { "epoch": 3.9975339087546238, "grad_norm": 0.039593808352947235, "learning_rate": 0.01, "loss": 2.0071, "step": 38904 }, { "epoch": 3.997842170160296, "grad_norm": 0.03478686884045601, "learning_rate": 0.01, "loss": 2.0185, "step": 38907 }, { "epoch": 3.9981504315659677, "grad_norm": 0.09729648381471634, "learning_rate": 0.01, "loss": 2.0008, "step": 38910 }, { "epoch": 3.99845869297164, "grad_norm": 0.12499833852052689, "learning_rate": 0.01, "loss": 1.9861, "step": 38913 }, { "epoch": 3.998766954377312, "grad_norm": 0.10060276091098785, "learning_rate": 0.01, "loss": 1.9634, "step": 38916 }, { "epoch": 3.999075215782984, "grad_norm": 0.060861632227897644, "learning_rate": 0.01, "loss": 1.9902, "step": 38919 }, { "epoch": 3.999383477188656, "grad_norm": 0.058074526488780975, "learning_rate": 0.01, "loss": 1.9935, "step": 38922 }, { "epoch": 3.9996917385943282, "grad_norm": 0.08413925021886826, "learning_rate": 0.01, "loss": 1.9958, "step": 38925 }, { "epoch": 4.0, "grad_norm": 0.06637567281723022, "learning_rate": 0.01, "loss": 1.978, "step": 38928 }, { "epoch": 3.9937423061140747, "grad_norm": 0.04972624033689499, "learning_rate": 0.01, "loss": 2.0291, "step": 38931 }, { "epoch": 3.9940500615510874, "grad_norm": 0.04883033037185669, "learning_rate": 0.01, "loss": 2.0152, "step": 38934 }, { "epoch": 3.9943578169881, "grad_norm": 0.03551949933171272, "learning_rate": 0.01, "loss": 2.0028, "step": 38937 }, { "epoch": 3.994665572425113, "grad_norm": 0.03538177162408829, "learning_rate": 0.01, "loss": 2.0138, "step": 38940 }, { "epoch": 3.994973327862126, "grad_norm": 0.13709349930286407, "learning_rate": 0.01, "loss": 2.0197, "step": 38943 }, { "epoch": 3.9952810832991386, "grad_norm": 0.05646511912345886, "learning_rate": 0.01, "loss": 2.0143, "step": 38946 }, { "epoch": 3.995588838736151, "grad_norm": 0.05141003429889679, "learning_rate": 0.01, "loss": 2.025, "step": 38949 }, { "epoch": 3.9958965941731637, "grad_norm": 0.1064891591668129, "learning_rate": 0.01, "loss": 2.0142, "step": 38952 }, { "epoch": 3.9962043496101765, "grad_norm": 0.058718711137771606, "learning_rate": 0.01, "loss": 2.0061, "step": 38955 }, { "epoch": 3.9965121050471892, "grad_norm": 0.05866394191980362, "learning_rate": 0.01, "loss": 2.0145, "step": 38958 }, { "epoch": 3.996819860484202, "grad_norm": 0.036083538085222244, "learning_rate": 0.01, "loss": 1.9958, "step": 38961 }, { "epoch": 3.9971276159212143, "grad_norm": 0.05041573569178581, "learning_rate": 0.01, "loss": 2.0197, "step": 38964 }, { "epoch": 3.997435371358227, "grad_norm": 0.05070210620760918, "learning_rate": 0.01, "loss": 2.0093, "step": 38967 }, { "epoch": 3.99774312679524, "grad_norm": 0.06114819273352623, "learning_rate": 0.01, "loss": 2.0137, "step": 38970 }, { "epoch": 3.9980508822322527, "grad_norm": 0.05506499856710434, "learning_rate": 0.01, "loss": 1.9954, "step": 38973 }, { "epoch": 3.9983586376692655, "grad_norm": 0.06224251538515091, "learning_rate": 0.01, "loss": 2.0011, "step": 38976 }, { "epoch": 3.9986663931062782, "grad_norm": 0.05054394155740738, "learning_rate": 0.01, "loss": 1.9941, "step": 38979 }, { "epoch": 3.998974148543291, "grad_norm": 0.03862088546156883, "learning_rate": 0.01, "loss": 2.0132, "step": 38982 }, { "epoch": 3.999281903980304, "grad_norm": 0.04840533435344696, "learning_rate": 0.01, "loss": 2.0206, "step": 38985 }, { "epoch": 3.9995896594173166, "grad_norm": 0.05625883862376213, "learning_rate": 0.01, "loss": 1.993, "step": 38988 }, { "epoch": 3.9998974148543294, "grad_norm": 0.04202372580766678, "learning_rate": 0.01, "loss": 2.0116, "step": 38991 }, { "epoch": 4.000205170291342, "grad_norm": 0.03988848999142647, "learning_rate": 0.01, "loss": 2.005, "step": 38994 }, { "epoch": 4.000512925728355, "grad_norm": 0.08701229840517044, "learning_rate": 0.01, "loss": 1.9903, "step": 38997 }, { "epoch": 4.000820681165367, "grad_norm": 0.13237404823303223, "learning_rate": 0.01, "loss": 2.0183, "step": 39000 }, { "epoch": 4.00112843660238, "grad_norm": 0.0758156031370163, "learning_rate": 0.01, "loss": 1.9999, "step": 39003 }, { "epoch": 4.001436192039392, "grad_norm": 0.07759084552526474, "learning_rate": 0.01, "loss": 1.9862, "step": 39006 }, { "epoch": 4.001743947476405, "grad_norm": 0.08195871859788895, "learning_rate": 0.01, "loss": 2.0274, "step": 39009 }, { "epoch": 4.002051702913418, "grad_norm": 0.048866480588912964, "learning_rate": 0.01, "loss": 2.0046, "step": 39012 }, { "epoch": 4.002359458350431, "grad_norm": 0.04807426407933235, "learning_rate": 0.01, "loss": 1.9824, "step": 39015 }, { "epoch": 4.0026672137874435, "grad_norm": 0.07215370982885361, "learning_rate": 0.01, "loss": 2.0194, "step": 39018 }, { "epoch": 4.002974969224456, "grad_norm": 0.05422010272741318, "learning_rate": 0.01, "loss": 2.001, "step": 39021 }, { "epoch": 4.003282724661469, "grad_norm": 0.06720750778913498, "learning_rate": 0.01, "loss": 2.0191, "step": 39024 }, { "epoch": 4.003590480098482, "grad_norm": 0.04077715426683426, "learning_rate": 0.01, "loss": 2.0178, "step": 39027 }, { "epoch": 4.003898235535495, "grad_norm": 0.05866453796625137, "learning_rate": 0.01, "loss": 2.0035, "step": 39030 }, { "epoch": 4.004205990972507, "grad_norm": 0.03861930966377258, "learning_rate": 0.01, "loss": 2.0153, "step": 39033 }, { "epoch": 4.00451374640952, "grad_norm": 0.03546518459916115, "learning_rate": 0.01, "loss": 2.0189, "step": 39036 }, { "epoch": 4.004821501846533, "grad_norm": 0.034643933176994324, "learning_rate": 0.01, "loss": 2.0126, "step": 39039 }, { "epoch": 4.005129257283546, "grad_norm": 0.0831715315580368, "learning_rate": 0.01, "loss": 2.0098, "step": 39042 }, { "epoch": 4.005437012720558, "grad_norm": 0.06925207376480103, "learning_rate": 0.01, "loss": 2.0021, "step": 39045 }, { "epoch": 4.00574476815757, "grad_norm": 0.08311621099710464, "learning_rate": 0.01, "loss": 2.0402, "step": 39048 }, { "epoch": 4.006052523594583, "grad_norm": 0.08028697222471237, "learning_rate": 0.01, "loss": 2.0159, "step": 39051 }, { "epoch": 4.006360279031596, "grad_norm": 0.1259499341249466, "learning_rate": 0.01, "loss": 2.028, "step": 39054 }, { "epoch": 4.006668034468609, "grad_norm": 0.06442322582006454, "learning_rate": 0.01, "loss": 2.0176, "step": 39057 }, { "epoch": 4.0069757899056215, "grad_norm": 0.08281309902667999, "learning_rate": 0.01, "loss": 2.0191, "step": 39060 }, { "epoch": 4.007283545342634, "grad_norm": 0.04970608651638031, "learning_rate": 0.01, "loss": 2.0307, "step": 39063 }, { "epoch": 4.007591300779647, "grad_norm": 0.04361598566174507, "learning_rate": 0.01, "loss": 2.0094, "step": 39066 }, { "epoch": 4.00789905621666, "grad_norm": 0.04321054369211197, "learning_rate": 0.01, "loss": 2.002, "step": 39069 }, { "epoch": 4.008206811653673, "grad_norm": 0.05904306843876839, "learning_rate": 0.01, "loss": 2.0219, "step": 39072 }, { "epoch": 4.008514567090685, "grad_norm": 0.0728040412068367, "learning_rate": 0.01, "loss": 2.0044, "step": 39075 }, { "epoch": 4.008822322527698, "grad_norm": 0.09545755386352539, "learning_rate": 0.01, "loss": 1.9959, "step": 39078 }, { "epoch": 4.009130077964711, "grad_norm": 0.10636181384325027, "learning_rate": 0.01, "loss": 2.0097, "step": 39081 }, { "epoch": 4.009437833401724, "grad_norm": 0.08768682181835175, "learning_rate": 0.01, "loss": 2.0063, "step": 39084 }, { "epoch": 4.0097455888387366, "grad_norm": 0.04949640482664108, "learning_rate": 0.01, "loss": 2.0049, "step": 39087 }, { "epoch": 4.010053344275748, "grad_norm": 0.0413932166993618, "learning_rate": 0.01, "loss": 1.9751, "step": 39090 }, { "epoch": 4.010361099712761, "grad_norm": 0.02756902389228344, "learning_rate": 0.01, "loss": 1.9841, "step": 39093 }, { "epoch": 4.010668855149774, "grad_norm": 0.08485575020313263, "learning_rate": 0.01, "loss": 1.9895, "step": 39096 }, { "epoch": 4.010976610586787, "grad_norm": 0.0896422490477562, "learning_rate": 0.01, "loss": 1.9864, "step": 39099 }, { "epoch": 4.0112843660238, "grad_norm": 0.09696487337350845, "learning_rate": 0.01, "loss": 2.0172, "step": 39102 }, { "epoch": 4.011592121460812, "grad_norm": 0.059906743466854095, "learning_rate": 0.01, "loss": 1.9832, "step": 39105 }, { "epoch": 4.011899876897825, "grad_norm": 0.12596943974494934, "learning_rate": 0.01, "loss": 2.0141, "step": 39108 }, { "epoch": 4.012207632334838, "grad_norm": 0.08249086886644363, "learning_rate": 0.01, "loss": 2.0007, "step": 39111 }, { "epoch": 4.012515387771851, "grad_norm": 0.1073683500289917, "learning_rate": 0.01, "loss": 2.0216, "step": 39114 }, { "epoch": 4.0128231432088635, "grad_norm": 0.050193801522254944, "learning_rate": 0.01, "loss": 2.0219, "step": 39117 }, { "epoch": 4.013130898645876, "grad_norm": 0.04790155962109566, "learning_rate": 0.01, "loss": 1.9866, "step": 39120 }, { "epoch": 4.013438654082889, "grad_norm": 0.1130373477935791, "learning_rate": 0.01, "loss": 2.0338, "step": 39123 }, { "epoch": 4.013746409519902, "grad_norm": 0.16575901210308075, "learning_rate": 0.01, "loss": 1.9917, "step": 39126 }, { "epoch": 4.014054164956915, "grad_norm": 0.03896254301071167, "learning_rate": 0.01, "loss": 2.0133, "step": 39129 }, { "epoch": 4.014361920393927, "grad_norm": 0.033224210143089294, "learning_rate": 0.01, "loss": 1.9954, "step": 39132 }, { "epoch": 4.014669675830939, "grad_norm": 0.04422049596905708, "learning_rate": 0.01, "loss": 2.0278, "step": 39135 }, { "epoch": 4.014977431267952, "grad_norm": 0.046339549124240875, "learning_rate": 0.01, "loss": 2.0015, "step": 39138 }, { "epoch": 4.015285186704965, "grad_norm": 0.04534962773323059, "learning_rate": 0.01, "loss": 2.0238, "step": 39141 }, { "epoch": 4.015592942141978, "grad_norm": 0.0634172111749649, "learning_rate": 0.01, "loss": 2.0067, "step": 39144 }, { "epoch": 4.01590069757899, "grad_norm": 0.0435175783932209, "learning_rate": 0.01, "loss": 2.0146, "step": 39147 }, { "epoch": 4.016208453016003, "grad_norm": 0.04143211990594864, "learning_rate": 0.01, "loss": 1.9967, "step": 39150 }, { "epoch": 4.016516208453016, "grad_norm": 0.06689900159835815, "learning_rate": 0.01, "loss": 2.0033, "step": 39153 }, { "epoch": 4.016823963890029, "grad_norm": 0.19841893017292023, "learning_rate": 0.01, "loss": 2.0139, "step": 39156 }, { "epoch": 4.0171317193270415, "grad_norm": 0.09940902143716812, "learning_rate": 0.01, "loss": 2.0032, "step": 39159 }, { "epoch": 4.017439474764054, "grad_norm": 0.0738014280796051, "learning_rate": 0.01, "loss": 2.0058, "step": 39162 }, { "epoch": 4.017747230201067, "grad_norm": 0.06441953033208847, "learning_rate": 0.01, "loss": 1.9901, "step": 39165 }, { "epoch": 4.01805498563808, "grad_norm": 0.03884090855717659, "learning_rate": 0.01, "loss": 2.0078, "step": 39168 }, { "epoch": 4.018362741075093, "grad_norm": 0.036479830741882324, "learning_rate": 0.01, "loss": 1.9914, "step": 39171 }, { "epoch": 4.018670496512105, "grad_norm": 0.04188670963048935, "learning_rate": 0.01, "loss": 2.0239, "step": 39174 }, { "epoch": 4.018978251949118, "grad_norm": 0.07043974846601486, "learning_rate": 0.01, "loss": 2.0145, "step": 39177 }, { "epoch": 4.01928600738613, "grad_norm": 0.07807689160108566, "learning_rate": 0.01, "loss": 2.0254, "step": 39180 }, { "epoch": 4.019593762823143, "grad_norm": 0.05508783459663391, "learning_rate": 0.01, "loss": 2.0082, "step": 39183 }, { "epoch": 4.019901518260156, "grad_norm": 0.040311504155397415, "learning_rate": 0.01, "loss": 2.0097, "step": 39186 }, { "epoch": 4.020209273697168, "grad_norm": 0.03907238692045212, "learning_rate": 0.01, "loss": 2.0205, "step": 39189 }, { "epoch": 4.020517029134181, "grad_norm": 0.1371290385723114, "learning_rate": 0.01, "loss": 2.0055, "step": 39192 }, { "epoch": 4.020824784571194, "grad_norm": 0.05499713122844696, "learning_rate": 0.01, "loss": 2.0182, "step": 39195 }, { "epoch": 4.021132540008207, "grad_norm": 0.049838222563266754, "learning_rate": 0.01, "loss": 2.0118, "step": 39198 }, { "epoch": 4.0214402954452195, "grad_norm": 0.04565451666712761, "learning_rate": 0.01, "loss": 2.0178, "step": 39201 }, { "epoch": 4.021748050882232, "grad_norm": 0.06939809769392014, "learning_rate": 0.01, "loss": 1.9955, "step": 39204 }, { "epoch": 4.022055806319245, "grad_norm": 0.036337222903966904, "learning_rate": 0.01, "loss": 2.0002, "step": 39207 }, { "epoch": 4.022363561756258, "grad_norm": 0.11887694150209427, "learning_rate": 0.01, "loss": 2.0154, "step": 39210 }, { "epoch": 4.022671317193271, "grad_norm": 0.05074974149465561, "learning_rate": 0.01, "loss": 1.9788, "step": 39213 }, { "epoch": 4.022979072630283, "grad_norm": 0.04023103788495064, "learning_rate": 0.01, "loss": 2.0112, "step": 39216 }, { "epoch": 4.023286828067296, "grad_norm": 0.09158769249916077, "learning_rate": 0.01, "loss": 2.0184, "step": 39219 }, { "epoch": 4.023594583504309, "grad_norm": 0.06678687036037445, "learning_rate": 0.01, "loss": 2.0338, "step": 39222 }, { "epoch": 4.023902338941321, "grad_norm": 0.055356625467538834, "learning_rate": 0.01, "loss": 2.0084, "step": 39225 }, { "epoch": 4.024210094378334, "grad_norm": 0.051128923892974854, "learning_rate": 0.01, "loss": 2.0291, "step": 39228 }, { "epoch": 4.024517849815346, "grad_norm": 0.08770928531885147, "learning_rate": 0.01, "loss": 2.0006, "step": 39231 }, { "epoch": 4.024825605252359, "grad_norm": 0.06450860947370529, "learning_rate": 0.01, "loss": 1.9926, "step": 39234 }, { "epoch": 4.025133360689372, "grad_norm": 0.03998043015599251, "learning_rate": 0.01, "loss": 1.9993, "step": 39237 }, { "epoch": 4.025441116126385, "grad_norm": 0.036666858941316605, "learning_rate": 0.01, "loss": 2.0451, "step": 39240 }, { "epoch": 4.025748871563398, "grad_norm": 0.03850167244672775, "learning_rate": 0.01, "loss": 1.9825, "step": 39243 }, { "epoch": 4.02605662700041, "grad_norm": 0.03165037930011749, "learning_rate": 0.01, "loss": 1.9817, "step": 39246 }, { "epoch": 4.026364382437423, "grad_norm": 0.03471562638878822, "learning_rate": 0.01, "loss": 1.9923, "step": 39249 }, { "epoch": 4.026672137874436, "grad_norm": 0.10216906666755676, "learning_rate": 0.01, "loss": 1.9971, "step": 39252 }, { "epoch": 4.026979893311449, "grad_norm": 0.13467098772525787, "learning_rate": 0.01, "loss": 1.9973, "step": 39255 }, { "epoch": 4.0272876487484615, "grad_norm": 0.05322883278131485, "learning_rate": 0.01, "loss": 2.0068, "step": 39258 }, { "epoch": 4.027595404185474, "grad_norm": 0.0329531729221344, "learning_rate": 0.01, "loss": 2.0209, "step": 39261 }, { "epoch": 4.027903159622487, "grad_norm": 0.04283340275287628, "learning_rate": 0.01, "loss": 2.0052, "step": 39264 }, { "epoch": 4.0282109150595, "grad_norm": 0.09303436428308487, "learning_rate": 0.01, "loss": 2.003, "step": 39267 }, { "epoch": 4.028518670496512, "grad_norm": 0.04981641098856926, "learning_rate": 0.01, "loss": 1.9977, "step": 39270 }, { "epoch": 4.0288264259335245, "grad_norm": 0.05858089402318001, "learning_rate": 0.01, "loss": 2.0025, "step": 39273 }, { "epoch": 4.029134181370537, "grad_norm": 0.05271727591753006, "learning_rate": 0.01, "loss": 1.9937, "step": 39276 }, { "epoch": 4.02944193680755, "grad_norm": 0.0625569149851799, "learning_rate": 0.01, "loss": 2.0077, "step": 39279 }, { "epoch": 4.029749692244563, "grad_norm": 0.06783869862556458, "learning_rate": 0.01, "loss": 2.0183, "step": 39282 }, { "epoch": 4.030057447681576, "grad_norm": 0.05175579711794853, "learning_rate": 0.01, "loss": 2.0017, "step": 39285 }, { "epoch": 4.030365203118588, "grad_norm": 0.12327279895544052, "learning_rate": 0.01, "loss": 2.0061, "step": 39288 }, { "epoch": 4.030672958555601, "grad_norm": 0.09904535114765167, "learning_rate": 0.01, "loss": 2.0286, "step": 39291 }, { "epoch": 4.030980713992614, "grad_norm": 0.06502281874418259, "learning_rate": 0.01, "loss": 2.0119, "step": 39294 }, { "epoch": 4.031288469429627, "grad_norm": 0.05546945706009865, "learning_rate": 0.01, "loss": 2.0186, "step": 39297 }, { "epoch": 4.0315962248666395, "grad_norm": 0.03637825697660446, "learning_rate": 0.01, "loss": 1.9894, "step": 39300 }, { "epoch": 4.031903980303652, "grad_norm": 0.03696468472480774, "learning_rate": 0.01, "loss": 2.0207, "step": 39303 }, { "epoch": 4.032211735740665, "grad_norm": 0.11666533350944519, "learning_rate": 0.01, "loss": 2.0067, "step": 39306 }, { "epoch": 4.032519491177678, "grad_norm": 0.10067463666200638, "learning_rate": 0.01, "loss": 2.0312, "step": 39309 }, { "epoch": 4.032827246614691, "grad_norm": 0.0598643533885479, "learning_rate": 0.01, "loss": 2.0021, "step": 39312 }, { "epoch": 4.0331350020517025, "grad_norm": 0.057127151638269424, "learning_rate": 0.01, "loss": 1.9996, "step": 39315 }, { "epoch": 4.033442757488715, "grad_norm": 0.05786604434251785, "learning_rate": 0.01, "loss": 2.0131, "step": 39318 }, { "epoch": 4.033750512925728, "grad_norm": 0.040237389504909515, "learning_rate": 0.01, "loss": 2.0022, "step": 39321 }, { "epoch": 4.034058268362741, "grad_norm": 0.09420931339263916, "learning_rate": 0.01, "loss": 1.9952, "step": 39324 }, { "epoch": 4.034366023799754, "grad_norm": 0.11208292096853256, "learning_rate": 0.01, "loss": 2.0142, "step": 39327 }, { "epoch": 4.034673779236766, "grad_norm": 0.07000657171010971, "learning_rate": 0.01, "loss": 2.0025, "step": 39330 }, { "epoch": 4.034981534673779, "grad_norm": 0.05524434149265289, "learning_rate": 0.01, "loss": 2.0297, "step": 39333 }, { "epoch": 4.035289290110792, "grad_norm": 0.03453601896762848, "learning_rate": 0.01, "loss": 2.0086, "step": 39336 }, { "epoch": 4.035597045547805, "grad_norm": 0.046687569469213486, "learning_rate": 0.01, "loss": 1.9913, "step": 39339 }, { "epoch": 4.0359048009848175, "grad_norm": 0.04349486157298088, "learning_rate": 0.01, "loss": 2.0224, "step": 39342 }, { "epoch": 4.03621255642183, "grad_norm": 0.08332299441099167, "learning_rate": 0.01, "loss": 2.011, "step": 39345 }, { "epoch": 4.036520311858843, "grad_norm": 0.07960904389619827, "learning_rate": 0.01, "loss": 2.0046, "step": 39348 }, { "epoch": 4.036828067295856, "grad_norm": 0.06175240874290466, "learning_rate": 0.01, "loss": 2.0159, "step": 39351 }, { "epoch": 4.037135822732869, "grad_norm": 0.03728936240077019, "learning_rate": 0.01, "loss": 2.0074, "step": 39354 }, { "epoch": 4.037443578169881, "grad_norm": 0.046342454850673676, "learning_rate": 0.01, "loss": 2.0055, "step": 39357 }, { "epoch": 4.037751333606893, "grad_norm": 0.059486132115125656, "learning_rate": 0.01, "loss": 2.0171, "step": 39360 }, { "epoch": 4.038059089043906, "grad_norm": 0.09025575965642929, "learning_rate": 0.01, "loss": 2.0306, "step": 39363 }, { "epoch": 4.038366844480919, "grad_norm": 0.0599956177175045, "learning_rate": 0.01, "loss": 1.9936, "step": 39366 }, { "epoch": 4.038674599917932, "grad_norm": 0.06386277079582214, "learning_rate": 0.01, "loss": 2.0194, "step": 39369 }, { "epoch": 4.038982355354944, "grad_norm": 0.10979870706796646, "learning_rate": 0.01, "loss": 1.9965, "step": 39372 }, { "epoch": 4.039290110791957, "grad_norm": 0.13358891010284424, "learning_rate": 0.01, "loss": 1.9985, "step": 39375 }, { "epoch": 4.03959786622897, "grad_norm": 0.07506405562162399, "learning_rate": 0.01, "loss": 2.0069, "step": 39378 }, { "epoch": 4.039905621665983, "grad_norm": 0.03969530388712883, "learning_rate": 0.01, "loss": 2.0134, "step": 39381 }, { "epoch": 4.0402133771029956, "grad_norm": 0.06700876355171204, "learning_rate": 0.01, "loss": 2.005, "step": 39384 }, { "epoch": 4.040521132540008, "grad_norm": 0.06389278918504715, "learning_rate": 0.01, "loss": 2.015, "step": 39387 }, { "epoch": 4.040828887977021, "grad_norm": 0.04753594473004341, "learning_rate": 0.01, "loss": 2.0097, "step": 39390 }, { "epoch": 4.041136643414034, "grad_norm": 0.0517578050494194, "learning_rate": 0.01, "loss": 1.979, "step": 39393 }, { "epoch": 4.041444398851047, "grad_norm": 0.043198052793741226, "learning_rate": 0.01, "loss": 2.0231, "step": 39396 }, { "epoch": 4.0417521542880595, "grad_norm": 0.04130061715841293, "learning_rate": 0.01, "loss": 2.0426, "step": 39399 }, { "epoch": 4.042059909725072, "grad_norm": 0.037592917680740356, "learning_rate": 0.01, "loss": 2.0027, "step": 39402 }, { "epoch": 4.042367665162084, "grad_norm": 0.07257888466119766, "learning_rate": 0.01, "loss": 2.0057, "step": 39405 }, { "epoch": 4.042675420599097, "grad_norm": 0.08611748367547989, "learning_rate": 0.01, "loss": 2.0175, "step": 39408 }, { "epoch": 4.04298317603611, "grad_norm": 0.11179140955209732, "learning_rate": 0.01, "loss": 2.0172, "step": 39411 }, { "epoch": 4.0432909314731225, "grad_norm": 0.09140360355377197, "learning_rate": 0.01, "loss": 2.0149, "step": 39414 }, { "epoch": 4.043598686910135, "grad_norm": 0.040217410773038864, "learning_rate": 0.01, "loss": 2.0294, "step": 39417 }, { "epoch": 4.043906442347148, "grad_norm": 0.04423901066184044, "learning_rate": 0.01, "loss": 1.9824, "step": 39420 }, { "epoch": 4.044214197784161, "grad_norm": 0.047180719673633575, "learning_rate": 0.01, "loss": 1.9865, "step": 39423 }, { "epoch": 4.044521953221174, "grad_norm": 0.03836076334118843, "learning_rate": 0.01, "loss": 2.0081, "step": 39426 }, { "epoch": 4.044829708658186, "grad_norm": 0.08214177936315536, "learning_rate": 0.01, "loss": 2.0078, "step": 39429 }, { "epoch": 4.045137464095199, "grad_norm": 0.04551496356725693, "learning_rate": 0.01, "loss": 2.0025, "step": 39432 }, { "epoch": 4.045445219532212, "grad_norm": 0.08608072996139526, "learning_rate": 0.01, "loss": 1.9935, "step": 39435 }, { "epoch": 4.045752974969225, "grad_norm": 0.03998774662613869, "learning_rate": 0.01, "loss": 2.0074, "step": 39438 }, { "epoch": 4.0460607304062375, "grad_norm": 0.06219779700040817, "learning_rate": 0.01, "loss": 2.001, "step": 39441 }, { "epoch": 4.04636848584325, "grad_norm": 0.12329383194446564, "learning_rate": 0.01, "loss": 1.9988, "step": 39444 }, { "epoch": 4.046676241280263, "grad_norm": 0.10584307461977005, "learning_rate": 0.01, "loss": 1.9929, "step": 39447 }, { "epoch": 4.046983996717275, "grad_norm": 0.06407187879085541, "learning_rate": 0.01, "loss": 2.023, "step": 39450 }, { "epoch": 4.047291752154288, "grad_norm": 0.057736970484256744, "learning_rate": 0.01, "loss": 2.0166, "step": 39453 }, { "epoch": 4.0475995075913005, "grad_norm": 0.062264811247587204, "learning_rate": 0.01, "loss": 1.9873, "step": 39456 }, { "epoch": 4.047907263028313, "grad_norm": 0.0482734851539135, "learning_rate": 0.01, "loss": 1.9886, "step": 39459 }, { "epoch": 4.048215018465326, "grad_norm": 0.03942275047302246, "learning_rate": 0.01, "loss": 2.0209, "step": 39462 }, { "epoch": 4.048522773902339, "grad_norm": 0.08987545222043991, "learning_rate": 0.01, "loss": 2.0103, "step": 39465 }, { "epoch": 4.048830529339352, "grad_norm": 0.08133430033922195, "learning_rate": 0.01, "loss": 2.0039, "step": 39468 }, { "epoch": 4.049138284776364, "grad_norm": 0.03588191419839859, "learning_rate": 0.01, "loss": 2.0078, "step": 39471 }, { "epoch": 4.049446040213377, "grad_norm": 0.058000437915325165, "learning_rate": 0.01, "loss": 1.9893, "step": 39474 }, { "epoch": 4.04975379565039, "grad_norm": 0.08807062357664108, "learning_rate": 0.01, "loss": 2.0289, "step": 39477 }, { "epoch": 4.050061551087403, "grad_norm": 0.05153276026248932, "learning_rate": 0.01, "loss": 2.0242, "step": 39480 }, { "epoch": 4.0503693065244155, "grad_norm": 0.08411730825901031, "learning_rate": 0.01, "loss": 1.9963, "step": 39483 }, { "epoch": 4.050677061961428, "grad_norm": 0.09271445870399475, "learning_rate": 0.01, "loss": 2.0005, "step": 39486 }, { "epoch": 4.050984817398441, "grad_norm": 0.07173626124858856, "learning_rate": 0.01, "loss": 2.0146, "step": 39489 }, { "epoch": 4.051292572835454, "grad_norm": 0.058220986276865005, "learning_rate": 0.01, "loss": 2.0296, "step": 39492 }, { "epoch": 4.051600328272466, "grad_norm": 0.09059619903564453, "learning_rate": 0.01, "loss": 2.0276, "step": 39495 }, { "epoch": 4.0519080837094785, "grad_norm": 0.11159291118383408, "learning_rate": 0.01, "loss": 1.9806, "step": 39498 }, { "epoch": 4.052215839146491, "grad_norm": 0.03904344514012337, "learning_rate": 0.01, "loss": 2.0083, "step": 39501 }, { "epoch": 4.052523594583504, "grad_norm": 0.12682397663593292, "learning_rate": 0.01, "loss": 2.0227, "step": 39504 }, { "epoch": 4.052831350020517, "grad_norm": 0.06277670711278915, "learning_rate": 0.01, "loss": 1.9906, "step": 39507 }, { "epoch": 4.05313910545753, "grad_norm": 0.03622843325138092, "learning_rate": 0.01, "loss": 1.9835, "step": 39510 }, { "epoch": 4.053446860894542, "grad_norm": 0.056741926819086075, "learning_rate": 0.01, "loss": 2.0041, "step": 39513 }, { "epoch": 4.053754616331555, "grad_norm": 0.0654342994093895, "learning_rate": 0.01, "loss": 2.0023, "step": 39516 }, { "epoch": 4.054062371768568, "grad_norm": 0.05300883948802948, "learning_rate": 0.01, "loss": 2.0084, "step": 39519 }, { "epoch": 4.054370127205581, "grad_norm": 0.1103016659617424, "learning_rate": 0.01, "loss": 1.9686, "step": 39522 }, { "epoch": 4.0546778826425935, "grad_norm": 0.10315825045108795, "learning_rate": 0.01, "loss": 2.0048, "step": 39525 }, { "epoch": 4.054985638079606, "grad_norm": 0.11007767170667648, "learning_rate": 0.01, "loss": 1.9977, "step": 39528 }, { "epoch": 4.055293393516619, "grad_norm": 0.053403954952955246, "learning_rate": 0.01, "loss": 1.9443, "step": 39531 }, { "epoch": 4.055601148953632, "grad_norm": 0.05486089363694191, "learning_rate": 0.01, "loss": 2.0179, "step": 39534 }, { "epoch": 4.055908904390645, "grad_norm": 0.05680430680513382, "learning_rate": 0.01, "loss": 1.9991, "step": 39537 }, { "epoch": 4.056216659827657, "grad_norm": 0.07202226668596268, "learning_rate": 0.01, "loss": 2.0184, "step": 39540 }, { "epoch": 4.056524415264669, "grad_norm": 0.050710614770650864, "learning_rate": 0.01, "loss": 2.0005, "step": 39543 }, { "epoch": 4.056832170701682, "grad_norm": 0.05018226429820061, "learning_rate": 0.01, "loss": 2.0119, "step": 39546 }, { "epoch": 4.057139926138695, "grad_norm": 0.04539674147963524, "learning_rate": 0.01, "loss": 2.0258, "step": 39549 }, { "epoch": 4.057447681575708, "grad_norm": 0.0405915230512619, "learning_rate": 0.01, "loss": 2.0315, "step": 39552 }, { "epoch": 4.0577554370127205, "grad_norm": 0.04220306873321533, "learning_rate": 0.01, "loss": 2.0191, "step": 39555 }, { "epoch": 4.058063192449733, "grad_norm": 0.06802038848400116, "learning_rate": 0.01, "loss": 2.0144, "step": 39558 }, { "epoch": 4.058370947886746, "grad_norm": 0.16860038042068481, "learning_rate": 0.01, "loss": 1.9748, "step": 39561 }, { "epoch": 4.058678703323759, "grad_norm": 0.13039590418338776, "learning_rate": 0.01, "loss": 2.0167, "step": 39564 }, { "epoch": 4.058986458760772, "grad_norm": 0.05722019821405411, "learning_rate": 0.01, "loss": 1.9917, "step": 39567 }, { "epoch": 4.059294214197784, "grad_norm": 0.05984622612595558, "learning_rate": 0.01, "loss": 1.9915, "step": 39570 }, { "epoch": 4.059601969634797, "grad_norm": 0.04732242226600647, "learning_rate": 0.01, "loss": 1.9961, "step": 39573 }, { "epoch": 4.05990972507181, "grad_norm": 0.04328930750489235, "learning_rate": 0.01, "loss": 2.0046, "step": 39576 }, { "epoch": 4.060217480508823, "grad_norm": 0.06630559265613556, "learning_rate": 0.01, "loss": 2.0459, "step": 39579 }, { "epoch": 4.0605252359458355, "grad_norm": 0.06271739304065704, "learning_rate": 0.01, "loss": 2.0283, "step": 39582 }, { "epoch": 4.060832991382847, "grad_norm": 0.05598871782422066, "learning_rate": 0.01, "loss": 2.0322, "step": 39585 }, { "epoch": 4.06114074681986, "grad_norm": 0.04648594930768013, "learning_rate": 0.01, "loss": 2.0021, "step": 39588 }, { "epoch": 4.061448502256873, "grad_norm": 0.06579215824604034, "learning_rate": 0.01, "loss": 2.0039, "step": 39591 }, { "epoch": 4.061756257693886, "grad_norm": 0.05206866189837456, "learning_rate": 0.01, "loss": 2.0298, "step": 39594 }, { "epoch": 4.0620640131308985, "grad_norm": 0.06146420165896416, "learning_rate": 0.01, "loss": 1.998, "step": 39597 }, { "epoch": 4.062371768567911, "grad_norm": 0.09550274908542633, "learning_rate": 0.01, "loss": 2.023, "step": 39600 }, { "epoch": 4.062679524004924, "grad_norm": 0.041216351091861725, "learning_rate": 0.01, "loss": 2.0012, "step": 39603 }, { "epoch": 4.062987279441937, "grad_norm": 0.050457730889320374, "learning_rate": 0.01, "loss": 1.9998, "step": 39606 }, { "epoch": 4.06329503487895, "grad_norm": 0.03148980066180229, "learning_rate": 0.01, "loss": 2.0121, "step": 39609 }, { "epoch": 4.063602790315962, "grad_norm": 0.03415573388338089, "learning_rate": 0.01, "loss": 1.9832, "step": 39612 }, { "epoch": 4.063910545752975, "grad_norm": 0.038664404302835464, "learning_rate": 0.01, "loss": 1.9781, "step": 39615 }, { "epoch": 4.064218301189988, "grad_norm": 0.10088005661964417, "learning_rate": 0.01, "loss": 2.0241, "step": 39618 }, { "epoch": 4.064526056627001, "grad_norm": 0.05986418575048447, "learning_rate": 0.01, "loss": 1.9719, "step": 39621 }, { "epoch": 4.0648338120640135, "grad_norm": 0.04586298391222954, "learning_rate": 0.01, "loss": 2.0231, "step": 39624 }, { "epoch": 4.065141567501026, "grad_norm": 0.09720548987388611, "learning_rate": 0.01, "loss": 2.0234, "step": 39627 }, { "epoch": 4.065449322938038, "grad_norm": 0.1310441493988037, "learning_rate": 0.01, "loss": 2.0267, "step": 39630 }, { "epoch": 4.065757078375051, "grad_norm": 0.0445309616625309, "learning_rate": 0.01, "loss": 2.0295, "step": 39633 }, { "epoch": 4.066064833812064, "grad_norm": 0.05492424964904785, "learning_rate": 0.01, "loss": 2.0201, "step": 39636 }, { "epoch": 4.0663725892490765, "grad_norm": 0.09137557446956635, "learning_rate": 0.01, "loss": 1.9959, "step": 39639 }, { "epoch": 4.066680344686089, "grad_norm": 0.07394784688949585, "learning_rate": 0.01, "loss": 1.9933, "step": 39642 }, { "epoch": 4.066988100123102, "grad_norm": 0.0890335887670517, "learning_rate": 0.01, "loss": 2.0079, "step": 39645 }, { "epoch": 4.067295855560115, "grad_norm": 0.06005888432264328, "learning_rate": 0.01, "loss": 1.997, "step": 39648 }, { "epoch": 4.067603610997128, "grad_norm": 0.08164118975400925, "learning_rate": 0.01, "loss": 1.995, "step": 39651 }, { "epoch": 4.06791136643414, "grad_norm": 0.041462332010269165, "learning_rate": 0.01, "loss": 1.999, "step": 39654 }, { "epoch": 4.068219121871153, "grad_norm": 0.14200016856193542, "learning_rate": 0.01, "loss": 2.0083, "step": 39657 }, { "epoch": 4.068526877308166, "grad_norm": 0.04974464327096939, "learning_rate": 0.01, "loss": 2.0038, "step": 39660 }, { "epoch": 4.068834632745179, "grad_norm": 0.05781686678528786, "learning_rate": 0.01, "loss": 2.0022, "step": 39663 }, { "epoch": 4.0691423881821915, "grad_norm": 0.050336726009845734, "learning_rate": 0.01, "loss": 1.9971, "step": 39666 }, { "epoch": 4.069450143619204, "grad_norm": 0.0692603662610054, "learning_rate": 0.01, "loss": 2.024, "step": 39669 }, { "epoch": 4.069757899056217, "grad_norm": 0.052262064069509506, "learning_rate": 0.01, "loss": 2.0063, "step": 39672 }, { "epoch": 4.070065654493229, "grad_norm": 0.0743674710392952, "learning_rate": 0.01, "loss": 2.0361, "step": 39675 }, { "epoch": 4.070373409930242, "grad_norm": 0.03609304502606392, "learning_rate": 0.01, "loss": 1.9913, "step": 39678 }, { "epoch": 4.0706811653672546, "grad_norm": 0.05369256064295769, "learning_rate": 0.01, "loss": 2.027, "step": 39681 }, { "epoch": 4.070988920804267, "grad_norm": 0.04112347960472107, "learning_rate": 0.01, "loss": 2.0202, "step": 39684 }, { "epoch": 4.07129667624128, "grad_norm": 0.05583459138870239, "learning_rate": 0.01, "loss": 2.0233, "step": 39687 }, { "epoch": 4.071604431678293, "grad_norm": 0.06682361662387848, "learning_rate": 0.01, "loss": 2.0344, "step": 39690 }, { "epoch": 4.071912187115306, "grad_norm": 0.08554589748382568, "learning_rate": 0.01, "loss": 2.0021, "step": 39693 }, { "epoch": 4.0722199425523185, "grad_norm": 0.07087216526269913, "learning_rate": 0.01, "loss": 1.9897, "step": 39696 }, { "epoch": 4.072527697989331, "grad_norm": 0.06562238186597824, "learning_rate": 0.01, "loss": 2.0118, "step": 39699 }, { "epoch": 4.072835453426344, "grad_norm": 0.036247964948415756, "learning_rate": 0.01, "loss": 1.9787, "step": 39702 }, { "epoch": 4.073143208863357, "grad_norm": 0.08515971153974533, "learning_rate": 0.01, "loss": 1.9845, "step": 39705 }, { "epoch": 4.07345096430037, "grad_norm": 0.07769300043582916, "learning_rate": 0.01, "loss": 2.0183, "step": 39708 }, { "epoch": 4.073758719737382, "grad_norm": 0.0872216522693634, "learning_rate": 0.01, "loss": 1.9853, "step": 39711 }, { "epoch": 4.074066475174395, "grad_norm": 0.06590529531240463, "learning_rate": 0.01, "loss": 1.9917, "step": 39714 }, { "epoch": 4.074374230611408, "grad_norm": 0.059110917150974274, "learning_rate": 0.01, "loss": 2.0192, "step": 39717 }, { "epoch": 4.07468198604842, "grad_norm": 0.04299427196383476, "learning_rate": 0.01, "loss": 2.0017, "step": 39720 }, { "epoch": 4.074989741485433, "grad_norm": 0.10769655555486679, "learning_rate": 0.01, "loss": 1.9979, "step": 39723 }, { "epoch": 4.075297496922445, "grad_norm": 0.08587364107370377, "learning_rate": 0.01, "loss": 2.004, "step": 39726 }, { "epoch": 4.075605252359458, "grad_norm": 0.08979383856058121, "learning_rate": 0.01, "loss": 2.011, "step": 39729 }, { "epoch": 4.075913007796471, "grad_norm": 0.12115131318569183, "learning_rate": 0.01, "loss": 2.0182, "step": 39732 }, { "epoch": 4.076220763233484, "grad_norm": 0.1939251571893692, "learning_rate": 0.01, "loss": 1.9981, "step": 39735 }, { "epoch": 4.0765285186704965, "grad_norm": 0.05781332403421402, "learning_rate": 0.01, "loss": 2.0088, "step": 39738 }, { "epoch": 4.076836274107509, "grad_norm": 0.03456675633788109, "learning_rate": 0.01, "loss": 1.9861, "step": 39741 }, { "epoch": 4.077144029544522, "grad_norm": 0.040457479655742645, "learning_rate": 0.01, "loss": 1.9973, "step": 39744 }, { "epoch": 4.077451784981535, "grad_norm": 0.06412683427333832, "learning_rate": 0.01, "loss": 2.0057, "step": 39747 }, { "epoch": 4.077759540418548, "grad_norm": 0.05500893294811249, "learning_rate": 0.01, "loss": 2.0056, "step": 39750 }, { "epoch": 4.07806729585556, "grad_norm": 0.05995875224471092, "learning_rate": 0.01, "loss": 1.9968, "step": 39753 }, { "epoch": 4.078375051292573, "grad_norm": 0.1099732369184494, "learning_rate": 0.01, "loss": 2.0105, "step": 39756 }, { "epoch": 4.078682806729586, "grad_norm": 0.11458323895931244, "learning_rate": 0.01, "loss": 2.0063, "step": 39759 }, { "epoch": 4.078990562166599, "grad_norm": 0.07978501915931702, "learning_rate": 0.01, "loss": 2.0074, "step": 39762 }, { "epoch": 4.079298317603611, "grad_norm": 0.04269685968756676, "learning_rate": 0.01, "loss": 2.0154, "step": 39765 }, { "epoch": 4.079606073040623, "grad_norm": 0.051762815564870834, "learning_rate": 0.01, "loss": 2.0067, "step": 39768 }, { "epoch": 4.079913828477636, "grad_norm": 0.030949685722589493, "learning_rate": 0.01, "loss": 2.0025, "step": 39771 }, { "epoch": 4.080221583914649, "grad_norm": 0.1207941323518753, "learning_rate": 0.01, "loss": 2.0009, "step": 39774 }, { "epoch": 4.080529339351662, "grad_norm": 0.12475111335515976, "learning_rate": 0.01, "loss": 1.9991, "step": 39777 }, { "epoch": 4.0808370947886745, "grad_norm": 0.10487036406993866, "learning_rate": 0.01, "loss": 2.0025, "step": 39780 }, { "epoch": 4.081144850225687, "grad_norm": 0.088489830493927, "learning_rate": 0.01, "loss": 2.0399, "step": 39783 }, { "epoch": 4.0814526056627, "grad_norm": 0.06538698077201843, "learning_rate": 0.01, "loss": 1.9969, "step": 39786 }, { "epoch": 4.081760361099713, "grad_norm": 0.05029508098959923, "learning_rate": 0.01, "loss": 2.0022, "step": 39789 }, { "epoch": 4.082068116536726, "grad_norm": 0.04905729368329048, "learning_rate": 0.01, "loss": 2.005, "step": 39792 }, { "epoch": 4.082375871973738, "grad_norm": 0.060302965342998505, "learning_rate": 0.01, "loss": 1.9986, "step": 39795 }, { "epoch": 4.082683627410751, "grad_norm": 0.06065473333001137, "learning_rate": 0.01, "loss": 1.9804, "step": 39798 }, { "epoch": 4.082991382847764, "grad_norm": 0.04160602018237114, "learning_rate": 0.01, "loss": 2.0136, "step": 39801 }, { "epoch": 4.083299138284777, "grad_norm": 0.04311453923583031, "learning_rate": 0.01, "loss": 1.9955, "step": 39804 }, { "epoch": 4.0836068937217895, "grad_norm": 0.03628389909863472, "learning_rate": 0.01, "loss": 1.9935, "step": 39807 }, { "epoch": 4.083914649158801, "grad_norm": 0.09799020737409592, "learning_rate": 0.01, "loss": 1.9933, "step": 39810 }, { "epoch": 4.084222404595814, "grad_norm": 0.07026764005422592, "learning_rate": 0.01, "loss": 1.989, "step": 39813 }, { "epoch": 4.084530160032827, "grad_norm": 0.11306377500295639, "learning_rate": 0.01, "loss": 2.0241, "step": 39816 }, { "epoch": 4.08483791546984, "grad_norm": 0.13824355602264404, "learning_rate": 0.01, "loss": 2.0319, "step": 39819 }, { "epoch": 4.0851456709068525, "grad_norm": 0.07312388718128204, "learning_rate": 0.01, "loss": 2.0323, "step": 39822 }, { "epoch": 4.085453426343865, "grad_norm": 0.06378597021102905, "learning_rate": 0.01, "loss": 1.9952, "step": 39825 }, { "epoch": 4.085761181780878, "grad_norm": 0.04329591616988182, "learning_rate": 0.01, "loss": 2.0342, "step": 39828 }, { "epoch": 4.086068937217891, "grad_norm": 0.05057251825928688, "learning_rate": 0.01, "loss": 1.9695, "step": 39831 }, { "epoch": 4.086376692654904, "grad_norm": 0.047054242342710495, "learning_rate": 0.01, "loss": 2.0329, "step": 39834 }, { "epoch": 4.086684448091916, "grad_norm": 0.06537993997335434, "learning_rate": 0.01, "loss": 1.987, "step": 39837 }, { "epoch": 4.086992203528929, "grad_norm": 0.08527065068483353, "learning_rate": 0.01, "loss": 2.0031, "step": 39840 }, { "epoch": 4.087299958965942, "grad_norm": 0.05401263013482094, "learning_rate": 0.01, "loss": 1.9946, "step": 39843 }, { "epoch": 4.087607714402955, "grad_norm": 0.04258432239294052, "learning_rate": 0.01, "loss": 2.0098, "step": 39846 }, { "epoch": 4.087915469839968, "grad_norm": 0.08178570866584778, "learning_rate": 0.01, "loss": 1.9965, "step": 39849 }, { "epoch": 4.08822322527698, "grad_norm": 0.06067502498626709, "learning_rate": 0.01, "loss": 2.0209, "step": 39852 }, { "epoch": 4.088530980713992, "grad_norm": 0.04289801046252251, "learning_rate": 0.01, "loss": 1.9943, "step": 39855 }, { "epoch": 4.088838736151005, "grad_norm": 0.10091704875230789, "learning_rate": 0.01, "loss": 2.0113, "step": 39858 }, { "epoch": 4.089146491588018, "grad_norm": 0.05211932212114334, "learning_rate": 0.01, "loss": 2.0202, "step": 39861 }, { "epoch": 4.089454247025031, "grad_norm": 0.08183594793081284, "learning_rate": 0.01, "loss": 2.0139, "step": 39864 }, { "epoch": 4.089762002462043, "grad_norm": 0.13209478557109833, "learning_rate": 0.01, "loss": 2.0115, "step": 39867 }, { "epoch": 4.090069757899056, "grad_norm": 0.08384159207344055, "learning_rate": 0.01, "loss": 1.9652, "step": 39870 }, { "epoch": 4.090377513336069, "grad_norm": 0.10875227302312851, "learning_rate": 0.01, "loss": 1.9956, "step": 39873 }, { "epoch": 4.090685268773082, "grad_norm": 0.09350070357322693, "learning_rate": 0.01, "loss": 1.9902, "step": 39876 }, { "epoch": 4.0909930242100945, "grad_norm": 0.0466117262840271, "learning_rate": 0.01, "loss": 2.0039, "step": 39879 }, { "epoch": 4.091300779647107, "grad_norm": 0.09274325519800186, "learning_rate": 0.01, "loss": 1.9977, "step": 39882 }, { "epoch": 4.09160853508412, "grad_norm": 0.05788525938987732, "learning_rate": 0.01, "loss": 2.0027, "step": 39885 }, { "epoch": 4.091916290521133, "grad_norm": 0.07033047825098038, "learning_rate": 0.01, "loss": 2.0231, "step": 39888 }, { "epoch": 4.092224045958146, "grad_norm": 0.07227031141519547, "learning_rate": 0.01, "loss": 2.0085, "step": 39891 }, { "epoch": 4.092531801395158, "grad_norm": 0.08519124984741211, "learning_rate": 0.01, "loss": 2.0037, "step": 39894 }, { "epoch": 4.09283955683217, "grad_norm": 0.038923412561416626, "learning_rate": 0.01, "loss": 2.0254, "step": 39897 }, { "epoch": 4.093147312269183, "grad_norm": 0.03778720274567604, "learning_rate": 0.01, "loss": 1.9985, "step": 39900 }, { "epoch": 4.093455067706196, "grad_norm": 0.046422503888607025, "learning_rate": 0.01, "loss": 1.9981, "step": 39903 }, { "epoch": 4.093762823143209, "grad_norm": 0.13946807384490967, "learning_rate": 0.01, "loss": 2.0215, "step": 39906 }, { "epoch": 4.094070578580221, "grad_norm": 0.07924194633960724, "learning_rate": 0.01, "loss": 2.0005, "step": 39909 }, { "epoch": 4.094378334017234, "grad_norm": 0.04913933575153351, "learning_rate": 0.01, "loss": 1.9827, "step": 39912 }, { "epoch": 4.094686089454247, "grad_norm": 0.03534455969929695, "learning_rate": 0.01, "loss": 2.0171, "step": 39915 }, { "epoch": 4.09499384489126, "grad_norm": 0.03871719539165497, "learning_rate": 0.01, "loss": 1.9864, "step": 39918 }, { "epoch": 4.0953016003282725, "grad_norm": 0.05986125394701958, "learning_rate": 0.01, "loss": 2.01, "step": 39921 }, { "epoch": 4.095609355765285, "grad_norm": 0.06470568478107452, "learning_rate": 0.01, "loss": 2.0015, "step": 39924 }, { "epoch": 4.095917111202298, "grad_norm": 0.05725434049963951, "learning_rate": 0.01, "loss": 2.0056, "step": 39927 }, { "epoch": 4.096224866639311, "grad_norm": 0.05102056264877319, "learning_rate": 0.01, "loss": 2.0173, "step": 39930 }, { "epoch": 4.096532622076324, "grad_norm": 0.04540586844086647, "learning_rate": 0.01, "loss": 2.0176, "step": 39933 }, { "epoch": 4.096840377513336, "grad_norm": 0.08318594843149185, "learning_rate": 0.01, "loss": 2.0327, "step": 39936 }, { "epoch": 4.097148132950349, "grad_norm": 0.0833197757601738, "learning_rate": 0.01, "loss": 1.9842, "step": 39939 }, { "epoch": 4.097455888387361, "grad_norm": 0.06058152765035629, "learning_rate": 0.01, "loss": 2.0175, "step": 39942 }, { "epoch": 4.097763643824374, "grad_norm": 0.09669670462608337, "learning_rate": 0.01, "loss": 2.0037, "step": 39945 }, { "epoch": 4.098071399261387, "grad_norm": 0.06633847951889038, "learning_rate": 0.01, "loss": 2.0179, "step": 39948 }, { "epoch": 4.098379154698399, "grad_norm": 0.1021406352519989, "learning_rate": 0.01, "loss": 2.021, "step": 39951 }, { "epoch": 4.098686910135412, "grad_norm": 0.047858573496341705, "learning_rate": 0.01, "loss": 1.9945, "step": 39954 }, { "epoch": 4.098994665572425, "grad_norm": 0.04331503435969353, "learning_rate": 0.01, "loss": 2.0304, "step": 39957 }, { "epoch": 4.099302421009438, "grad_norm": 0.0399680957198143, "learning_rate": 0.01, "loss": 1.9915, "step": 39960 }, { "epoch": 4.0996101764464505, "grad_norm": 0.09427163749933243, "learning_rate": 0.01, "loss": 1.997, "step": 39963 }, { "epoch": 4.099917931883463, "grad_norm": 0.06199577823281288, "learning_rate": 0.01, "loss": 2.0176, "step": 39966 }, { "epoch": 4.100225687320476, "grad_norm": 0.04445228725671768, "learning_rate": 0.01, "loss": 1.98, "step": 39969 }, { "epoch": 4.100533442757489, "grad_norm": 0.04458901658654213, "learning_rate": 0.01, "loss": 1.9895, "step": 39972 }, { "epoch": 4.100841198194502, "grad_norm": 0.05720875784754753, "learning_rate": 0.01, "loss": 2.0225, "step": 39975 }, { "epoch": 4.101148953631514, "grad_norm": 0.0806422233581543, "learning_rate": 0.01, "loss": 2.0104, "step": 39978 }, { "epoch": 4.101456709068527, "grad_norm": 0.07191216200590134, "learning_rate": 0.01, "loss": 1.9938, "step": 39981 }, { "epoch": 4.10176446450554, "grad_norm": 0.04921800270676613, "learning_rate": 0.01, "loss": 2.0137, "step": 39984 }, { "epoch": 4.102072219942552, "grad_norm": 0.09171293675899506, "learning_rate": 0.01, "loss": 2.0173, "step": 39987 }, { "epoch": 4.102379975379565, "grad_norm": 0.08604893833398819, "learning_rate": 0.01, "loss": 1.998, "step": 39990 }, { "epoch": 4.1026877308165774, "grad_norm": 0.0489855632185936, "learning_rate": 0.01, "loss": 2.0014, "step": 39993 }, { "epoch": 4.10299548625359, "grad_norm": 0.09026241302490234, "learning_rate": 0.01, "loss": 2.0229, "step": 39996 }, { "epoch": 4.103303241690603, "grad_norm": 0.03529585152864456, "learning_rate": 0.01, "loss": 1.989, "step": 39999 }, { "epoch": 4.103610997127616, "grad_norm": 0.10228559374809265, "learning_rate": 0.01, "loss": 2.0266, "step": 40002 }, { "epoch": 4.103918752564629, "grad_norm": 0.04634955897927284, "learning_rate": 0.01, "loss": 1.9951, "step": 40005 }, { "epoch": 4.104226508001641, "grad_norm": 0.059635523706674576, "learning_rate": 0.01, "loss": 1.9998, "step": 40008 }, { "epoch": 4.104534263438654, "grad_norm": 0.07889647036790848, "learning_rate": 0.01, "loss": 2.0126, "step": 40011 }, { "epoch": 4.104842018875667, "grad_norm": 0.056453004479408264, "learning_rate": 0.01, "loss": 1.9949, "step": 40014 }, { "epoch": 4.10514977431268, "grad_norm": 0.038360368460416794, "learning_rate": 0.01, "loss": 2.0026, "step": 40017 }, { "epoch": 4.1054575297496925, "grad_norm": 0.08849389106035233, "learning_rate": 0.01, "loss": 2.0166, "step": 40020 }, { "epoch": 4.105765285186705, "grad_norm": 0.06063401326537132, "learning_rate": 0.01, "loss": 2.011, "step": 40023 }, { "epoch": 4.106073040623718, "grad_norm": 0.08102670311927795, "learning_rate": 0.01, "loss": 2.0157, "step": 40026 }, { "epoch": 4.106380796060731, "grad_norm": 0.12359301745891571, "learning_rate": 0.01, "loss": 2.0092, "step": 40029 }, { "epoch": 4.106688551497743, "grad_norm": 0.04933517053723335, "learning_rate": 0.01, "loss": 2.0297, "step": 40032 }, { "epoch": 4.1069963069347555, "grad_norm": 0.06319262832403183, "learning_rate": 0.01, "loss": 1.9731, "step": 40035 }, { "epoch": 4.107304062371768, "grad_norm": 0.1100512370467186, "learning_rate": 0.01, "loss": 1.9963, "step": 40038 }, { "epoch": 4.107611817808781, "grad_norm": 0.04997371882200241, "learning_rate": 0.01, "loss": 2.0149, "step": 40041 }, { "epoch": 4.107919573245794, "grad_norm": 0.034478992223739624, "learning_rate": 0.01, "loss": 2.0092, "step": 40044 }, { "epoch": 4.108227328682807, "grad_norm": 0.07332266122102737, "learning_rate": 0.01, "loss": 1.987, "step": 40047 }, { "epoch": 4.108535084119819, "grad_norm": 0.07530300319194794, "learning_rate": 0.01, "loss": 2.0238, "step": 40050 }, { "epoch": 4.108842839556832, "grad_norm": 0.05256710946559906, "learning_rate": 0.01, "loss": 2.0044, "step": 40053 }, { "epoch": 4.109150594993845, "grad_norm": 0.0342395044863224, "learning_rate": 0.01, "loss": 2.0046, "step": 40056 }, { "epoch": 4.109458350430858, "grad_norm": 0.05822393298149109, "learning_rate": 0.01, "loss": 1.9793, "step": 40059 }, { "epoch": 4.1097661058678705, "grad_norm": 0.11184973269701004, "learning_rate": 0.01, "loss": 2.0095, "step": 40062 }, { "epoch": 4.110073861304883, "grad_norm": 0.10424233227968216, "learning_rate": 0.01, "loss": 2.0096, "step": 40065 }, { "epoch": 4.110381616741896, "grad_norm": 0.07766477763652802, "learning_rate": 0.01, "loss": 2.0173, "step": 40068 }, { "epoch": 4.110689372178909, "grad_norm": 0.05194033682346344, "learning_rate": 0.01, "loss": 1.9849, "step": 40071 }, { "epoch": 4.110997127615922, "grad_norm": 0.059180594980716705, "learning_rate": 0.01, "loss": 2.0041, "step": 40074 }, { "epoch": 4.1113048830529335, "grad_norm": 0.056441277265548706, "learning_rate": 0.01, "loss": 2.0308, "step": 40077 }, { "epoch": 4.111612638489946, "grad_norm": 0.10722373425960541, "learning_rate": 0.01, "loss": 1.9856, "step": 40080 }, { "epoch": 4.111920393926959, "grad_norm": 0.05328166112303734, "learning_rate": 0.01, "loss": 2.0163, "step": 40083 }, { "epoch": 4.112228149363972, "grad_norm": 0.0920194610953331, "learning_rate": 0.01, "loss": 2.0242, "step": 40086 }, { "epoch": 4.112535904800985, "grad_norm": 0.09387510269880295, "learning_rate": 0.01, "loss": 1.9928, "step": 40089 }, { "epoch": 4.112843660237997, "grad_norm": 0.05598600208759308, "learning_rate": 0.01, "loss": 2.0063, "step": 40092 }, { "epoch": 4.11315141567501, "grad_norm": 0.07474517822265625, "learning_rate": 0.01, "loss": 2.026, "step": 40095 }, { "epoch": 4.113459171112023, "grad_norm": 0.05041923001408577, "learning_rate": 0.01, "loss": 2.0211, "step": 40098 }, { "epoch": 4.113766926549036, "grad_norm": 0.09958838671445847, "learning_rate": 0.01, "loss": 2.007, "step": 40101 }, { "epoch": 4.1140746819860485, "grad_norm": 0.07080104202032089, "learning_rate": 0.01, "loss": 2.0061, "step": 40104 }, { "epoch": 4.114382437423061, "grad_norm": 0.04018507897853851, "learning_rate": 0.01, "loss": 2.0279, "step": 40107 }, { "epoch": 4.114690192860074, "grad_norm": 0.034507866948843, "learning_rate": 0.01, "loss": 1.995, "step": 40110 }, { "epoch": 4.114997948297087, "grad_norm": 0.09373819082975388, "learning_rate": 0.01, "loss": 1.9992, "step": 40113 }, { "epoch": 4.1153057037341, "grad_norm": 0.09473507106304169, "learning_rate": 0.01, "loss": 2.0374, "step": 40116 }, { "epoch": 4.115613459171112, "grad_norm": 0.06728008389472961, "learning_rate": 0.01, "loss": 2.0208, "step": 40119 }, { "epoch": 4.115921214608124, "grad_norm": 0.03551316261291504, "learning_rate": 0.01, "loss": 2.0162, "step": 40122 }, { "epoch": 4.116228970045137, "grad_norm": 0.048464663326740265, "learning_rate": 0.01, "loss": 1.9957, "step": 40125 }, { "epoch": 4.11653672548215, "grad_norm": 0.06069257855415344, "learning_rate": 0.01, "loss": 2.0113, "step": 40128 }, { "epoch": 4.116844480919163, "grad_norm": 0.04196292161941528, "learning_rate": 0.01, "loss": 2.0046, "step": 40131 }, { "epoch": 4.117152236356175, "grad_norm": 0.056623879820108414, "learning_rate": 0.01, "loss": 1.9986, "step": 40134 }, { "epoch": 4.117459991793188, "grad_norm": 0.04838422313332558, "learning_rate": 0.01, "loss": 2.0206, "step": 40137 }, { "epoch": 4.117767747230201, "grad_norm": 0.05446647107601166, "learning_rate": 0.01, "loss": 2.0209, "step": 40140 }, { "epoch": 4.118075502667214, "grad_norm": 0.04145834967494011, "learning_rate": 0.01, "loss": 2.0121, "step": 40143 }, { "epoch": 4.118383258104227, "grad_norm": 0.07565242797136307, "learning_rate": 0.01, "loss": 2.0105, "step": 40146 }, { "epoch": 4.118691013541239, "grad_norm": 0.16024605929851532, "learning_rate": 0.01, "loss": 1.9793, "step": 40149 }, { "epoch": 4.118998768978252, "grad_norm": 0.10673293471336365, "learning_rate": 0.01, "loss": 2.0035, "step": 40152 }, { "epoch": 4.119306524415265, "grad_norm": 0.04822041094303131, "learning_rate": 0.01, "loss": 2.0003, "step": 40155 }, { "epoch": 4.119614279852278, "grad_norm": 0.05074264481663704, "learning_rate": 0.01, "loss": 2.0096, "step": 40158 }, { "epoch": 4.1199220352892905, "grad_norm": 0.0467289499938488, "learning_rate": 0.01, "loss": 1.9832, "step": 40161 }, { "epoch": 4.120229790726303, "grad_norm": 0.06745675206184387, "learning_rate": 0.01, "loss": 2.0095, "step": 40164 }, { "epoch": 4.120537546163315, "grad_norm": 0.05463431030511856, "learning_rate": 0.01, "loss": 2.0028, "step": 40167 }, { "epoch": 4.120845301600328, "grad_norm": 0.057621899992227554, "learning_rate": 0.01, "loss": 2.0032, "step": 40170 }, { "epoch": 4.121153057037341, "grad_norm": 0.03955928236246109, "learning_rate": 0.01, "loss": 1.9945, "step": 40173 }, { "epoch": 4.1214608124743535, "grad_norm": 0.09852177649736404, "learning_rate": 0.01, "loss": 2.0124, "step": 40176 }, { "epoch": 4.121768567911366, "grad_norm": 0.12283693999052048, "learning_rate": 0.01, "loss": 1.9923, "step": 40179 }, { "epoch": 4.122076323348379, "grad_norm": 0.046991802752017975, "learning_rate": 0.01, "loss": 2.0032, "step": 40182 }, { "epoch": 4.122384078785392, "grad_norm": 0.032669685781002045, "learning_rate": 0.01, "loss": 2.005, "step": 40185 }, { "epoch": 4.122691834222405, "grad_norm": 0.029325902462005615, "learning_rate": 0.01, "loss": 2.0086, "step": 40188 }, { "epoch": 4.122999589659417, "grad_norm": 0.05086469277739525, "learning_rate": 0.01, "loss": 1.986, "step": 40191 }, { "epoch": 4.12330734509643, "grad_norm": 0.05128968507051468, "learning_rate": 0.01, "loss": 1.983, "step": 40194 }, { "epoch": 4.123615100533443, "grad_norm": 0.06649978458881378, "learning_rate": 0.01, "loss": 2.0385, "step": 40197 }, { "epoch": 4.123922855970456, "grad_norm": 0.07910530269145966, "learning_rate": 0.01, "loss": 2.0116, "step": 40200 }, { "epoch": 4.1242306114074685, "grad_norm": 0.07179386913776398, "learning_rate": 0.01, "loss": 1.9867, "step": 40203 }, { "epoch": 4.124538366844481, "grad_norm": 0.04028356075286865, "learning_rate": 0.01, "loss": 1.9831, "step": 40206 }, { "epoch": 4.124846122281494, "grad_norm": 0.14858920872211456, "learning_rate": 0.01, "loss": 1.9975, "step": 40209 }, { "epoch": 4.125153877718506, "grad_norm": 0.0865345299243927, "learning_rate": 0.01, "loss": 1.9879, "step": 40212 }, { "epoch": 4.125461633155519, "grad_norm": 0.04412895813584328, "learning_rate": 0.01, "loss": 1.9943, "step": 40215 }, { "epoch": 4.1257693885925315, "grad_norm": 0.0482075996696949, "learning_rate": 0.01, "loss": 2.013, "step": 40218 }, { "epoch": 4.126077144029544, "grad_norm": 0.0381084606051445, "learning_rate": 0.01, "loss": 1.9939, "step": 40221 }, { "epoch": 4.126384899466557, "grad_norm": 0.03551500290632248, "learning_rate": 0.01, "loss": 2.0002, "step": 40224 }, { "epoch": 4.12669265490357, "grad_norm": 0.029879622161388397, "learning_rate": 0.01, "loss": 1.9924, "step": 40227 }, { "epoch": 4.127000410340583, "grad_norm": 0.04461509361863136, "learning_rate": 0.01, "loss": 2.006, "step": 40230 }, { "epoch": 4.127308165777595, "grad_norm": 0.08034146577119827, "learning_rate": 0.01, "loss": 1.9929, "step": 40233 }, { "epoch": 4.127615921214608, "grad_norm": 0.03752557560801506, "learning_rate": 0.01, "loss": 1.9943, "step": 40236 }, { "epoch": 4.127923676651621, "grad_norm": 0.09391690045595169, "learning_rate": 0.01, "loss": 2.0141, "step": 40239 }, { "epoch": 4.128231432088634, "grad_norm": 0.10475057363510132, "learning_rate": 0.01, "loss": 2.0169, "step": 40242 }, { "epoch": 4.1285391875256465, "grad_norm": 0.05833762139081955, "learning_rate": 0.01, "loss": 2.0309, "step": 40245 }, { "epoch": 4.128846942962659, "grad_norm": 0.07527395337820053, "learning_rate": 0.01, "loss": 2.0051, "step": 40248 }, { "epoch": 4.129154698399672, "grad_norm": 0.037238337099552155, "learning_rate": 0.01, "loss": 2.0569, "step": 40251 }, { "epoch": 4.129462453836685, "grad_norm": 0.08547326922416687, "learning_rate": 0.01, "loss": 1.9589, "step": 40254 }, { "epoch": 4.129770209273697, "grad_norm": 0.04352400079369545, "learning_rate": 0.01, "loss": 2.0079, "step": 40257 }, { "epoch": 4.1300779647107095, "grad_norm": 0.03715146332979202, "learning_rate": 0.01, "loss": 2.005, "step": 40260 }, { "epoch": 4.130385720147722, "grad_norm": 0.03312180936336517, "learning_rate": 0.01, "loss": 1.9865, "step": 40263 }, { "epoch": 4.130693475584735, "grad_norm": 0.14868606626987457, "learning_rate": 0.01, "loss": 2.016, "step": 40266 }, { "epoch": 4.131001231021748, "grad_norm": 0.0412895530462265, "learning_rate": 0.01, "loss": 2.0146, "step": 40269 }, { "epoch": 4.131308986458761, "grad_norm": 0.059335898607969284, "learning_rate": 0.01, "loss": 2.0028, "step": 40272 }, { "epoch": 4.131616741895773, "grad_norm": 0.08401087671518326, "learning_rate": 0.01, "loss": 2.0027, "step": 40275 }, { "epoch": 4.131924497332786, "grad_norm": 0.09615478664636612, "learning_rate": 0.01, "loss": 2.0188, "step": 40278 }, { "epoch": 4.132232252769799, "grad_norm": 0.06387398391962051, "learning_rate": 0.01, "loss": 2.0117, "step": 40281 }, { "epoch": 4.132540008206812, "grad_norm": 0.07762457430362701, "learning_rate": 0.01, "loss": 2.0155, "step": 40284 }, { "epoch": 4.132847763643825, "grad_norm": 0.08931776881217957, "learning_rate": 0.01, "loss": 2.0027, "step": 40287 }, { "epoch": 4.133155519080837, "grad_norm": 0.03763844072818756, "learning_rate": 0.01, "loss": 1.9951, "step": 40290 }, { "epoch": 4.13346327451785, "grad_norm": 0.06352074444293976, "learning_rate": 0.01, "loss": 2.0431, "step": 40293 }, { "epoch": 4.133771029954863, "grad_norm": 0.04778370261192322, "learning_rate": 0.01, "loss": 1.9819, "step": 40296 }, { "epoch": 4.134078785391876, "grad_norm": 0.06776099652051926, "learning_rate": 0.01, "loss": 1.9962, "step": 40299 }, { "epoch": 4.134386540828888, "grad_norm": 0.072423554956913, "learning_rate": 0.01, "loss": 2.0301, "step": 40302 }, { "epoch": 4.1346942962659, "grad_norm": 0.10224346071481705, "learning_rate": 0.01, "loss": 2.0101, "step": 40305 }, { "epoch": 4.135002051702913, "grad_norm": 0.045054350048303604, "learning_rate": 0.01, "loss": 1.9851, "step": 40308 }, { "epoch": 4.135309807139926, "grad_norm": 0.10343952476978302, "learning_rate": 0.01, "loss": 2.0008, "step": 40311 }, { "epoch": 4.135617562576939, "grad_norm": 0.12315492331981659, "learning_rate": 0.01, "loss": 2.0195, "step": 40314 }, { "epoch": 4.1359253180139515, "grad_norm": 0.06620458513498306, "learning_rate": 0.01, "loss": 2.0019, "step": 40317 }, { "epoch": 4.136233073450964, "grad_norm": 0.06378698348999023, "learning_rate": 0.01, "loss": 2.0046, "step": 40320 }, { "epoch": 4.136540828887977, "grad_norm": 0.06669528782367706, "learning_rate": 0.01, "loss": 2.022, "step": 40323 }, { "epoch": 4.13684858432499, "grad_norm": 0.07347302883863449, "learning_rate": 0.01, "loss": 2.0038, "step": 40326 }, { "epoch": 4.137156339762003, "grad_norm": 0.06786450743675232, "learning_rate": 0.01, "loss": 2.0327, "step": 40329 }, { "epoch": 4.137464095199015, "grad_norm": 0.10124550014734268, "learning_rate": 0.01, "loss": 2.0072, "step": 40332 }, { "epoch": 4.137771850636028, "grad_norm": 0.07563018798828125, "learning_rate": 0.01, "loss": 1.9818, "step": 40335 }, { "epoch": 4.138079606073041, "grad_norm": 0.08920261263847351, "learning_rate": 0.01, "loss": 2.032, "step": 40338 }, { "epoch": 4.138387361510054, "grad_norm": 0.07678209245204926, "learning_rate": 0.01, "loss": 2.0099, "step": 40341 }, { "epoch": 4.1386951169470665, "grad_norm": 0.03569749370217323, "learning_rate": 0.01, "loss": 2.0312, "step": 40344 }, { "epoch": 4.139002872384078, "grad_norm": 0.046265408396720886, "learning_rate": 0.01, "loss": 2.0071, "step": 40347 }, { "epoch": 4.139310627821091, "grad_norm": 0.04766137897968292, "learning_rate": 0.01, "loss": 2.0042, "step": 40350 }, { "epoch": 4.139618383258104, "grad_norm": 0.09092257916927338, "learning_rate": 0.01, "loss": 2.0095, "step": 40353 }, { "epoch": 4.139926138695117, "grad_norm": 0.05511738359928131, "learning_rate": 0.01, "loss": 2.019, "step": 40356 }, { "epoch": 4.1402338941321295, "grad_norm": 0.04944039136171341, "learning_rate": 0.01, "loss": 1.999, "step": 40359 }, { "epoch": 4.140541649569142, "grad_norm": 0.04905549809336662, "learning_rate": 0.01, "loss": 2.025, "step": 40362 }, { "epoch": 4.140849405006155, "grad_norm": 0.07323266565799713, "learning_rate": 0.01, "loss": 2.0044, "step": 40365 }, { "epoch": 4.141157160443168, "grad_norm": 0.07263099402189255, "learning_rate": 0.01, "loss": 2.016, "step": 40368 }, { "epoch": 4.141464915880181, "grad_norm": 0.05902627855539322, "learning_rate": 0.01, "loss": 1.9856, "step": 40371 }, { "epoch": 4.141772671317193, "grad_norm": 0.0597088448703289, "learning_rate": 0.01, "loss": 2.0105, "step": 40374 }, { "epoch": 4.142080426754206, "grad_norm": 0.04829247295856476, "learning_rate": 0.01, "loss": 2.0114, "step": 40377 }, { "epoch": 4.142388182191219, "grad_norm": 0.10093509405851364, "learning_rate": 0.01, "loss": 2.0092, "step": 40380 }, { "epoch": 4.142695937628232, "grad_norm": 0.06805883347988129, "learning_rate": 0.01, "loss": 1.9984, "step": 40383 }, { "epoch": 4.1430036930652445, "grad_norm": 0.05700362101197243, "learning_rate": 0.01, "loss": 2.0166, "step": 40386 }, { "epoch": 4.143311448502257, "grad_norm": 0.10143036395311356, "learning_rate": 0.01, "loss": 2.0133, "step": 40389 }, { "epoch": 4.143619203939269, "grad_norm": 0.0752461776137352, "learning_rate": 0.01, "loss": 2.0011, "step": 40392 }, { "epoch": 4.143926959376282, "grad_norm": 0.10534628480672836, "learning_rate": 0.01, "loss": 1.9837, "step": 40395 }, { "epoch": 4.144234714813295, "grad_norm": 0.042690735310316086, "learning_rate": 0.01, "loss": 1.9985, "step": 40398 }, { "epoch": 4.1445424702503075, "grad_norm": 0.04265071824193001, "learning_rate": 0.01, "loss": 1.9943, "step": 40401 }, { "epoch": 4.14485022568732, "grad_norm": 0.04449966922402382, "learning_rate": 0.01, "loss": 1.9964, "step": 40404 }, { "epoch": 4.145157981124333, "grad_norm": 0.06278623640537262, "learning_rate": 0.01, "loss": 1.9997, "step": 40407 }, { "epoch": 4.145465736561346, "grad_norm": 0.09633596986532211, "learning_rate": 0.01, "loss": 2.0184, "step": 40410 }, { "epoch": 4.145773491998359, "grad_norm": 0.06465546041727066, "learning_rate": 0.01, "loss": 2.0004, "step": 40413 }, { "epoch": 4.146081247435371, "grad_norm": 0.07929039746522903, "learning_rate": 0.01, "loss": 2.0275, "step": 40416 }, { "epoch": 4.146389002872384, "grad_norm": 0.04692156985402107, "learning_rate": 0.01, "loss": 1.9896, "step": 40419 }, { "epoch": 4.146696758309397, "grad_norm": 0.09933728724718094, "learning_rate": 0.01, "loss": 2.0155, "step": 40422 }, { "epoch": 4.14700451374641, "grad_norm": 0.07595301419496536, "learning_rate": 0.01, "loss": 2.0262, "step": 40425 }, { "epoch": 4.1473122691834226, "grad_norm": 0.06487558037042618, "learning_rate": 0.01, "loss": 2.0018, "step": 40428 }, { "epoch": 4.147620024620435, "grad_norm": 0.06612923741340637, "learning_rate": 0.01, "loss": 1.9956, "step": 40431 }, { "epoch": 4.147927780057448, "grad_norm": 0.08177967369556427, "learning_rate": 0.01, "loss": 2.0085, "step": 40434 }, { "epoch": 4.14823553549446, "grad_norm": 0.12479262799024582, "learning_rate": 0.01, "loss": 2.0037, "step": 40437 }, { "epoch": 4.148543290931473, "grad_norm": 0.053598951548337936, "learning_rate": 0.01, "loss": 2.0029, "step": 40440 }, { "epoch": 4.148851046368486, "grad_norm": 0.05604923143982887, "learning_rate": 0.01, "loss": 1.9985, "step": 40443 }, { "epoch": 4.149158801805498, "grad_norm": 0.044350240379571915, "learning_rate": 0.01, "loss": 2.0065, "step": 40446 }, { "epoch": 4.149466557242511, "grad_norm": 0.03988419473171234, "learning_rate": 0.01, "loss": 1.9988, "step": 40449 }, { "epoch": 4.149774312679524, "grad_norm": 0.03739091381430626, "learning_rate": 0.01, "loss": 2.0002, "step": 40452 }, { "epoch": 4.150082068116537, "grad_norm": 0.0570409819483757, "learning_rate": 0.01, "loss": 2.0044, "step": 40455 }, { "epoch": 4.1503898235535495, "grad_norm": 0.11358576267957687, "learning_rate": 0.01, "loss": 2.0115, "step": 40458 }, { "epoch": 4.150697578990562, "grad_norm": 0.08568841218948364, "learning_rate": 0.01, "loss": 2.0213, "step": 40461 }, { "epoch": 4.151005334427575, "grad_norm": 0.10045678168535233, "learning_rate": 0.01, "loss": 2.0108, "step": 40464 }, { "epoch": 4.151313089864588, "grad_norm": 0.0720805749297142, "learning_rate": 0.01, "loss": 2.0028, "step": 40467 }, { "epoch": 4.151620845301601, "grad_norm": 0.136969655752182, "learning_rate": 0.01, "loss": 2.0117, "step": 40470 }, { "epoch": 4.151928600738613, "grad_norm": 0.1128631979227066, "learning_rate": 0.01, "loss": 2.0143, "step": 40473 }, { "epoch": 4.152236356175626, "grad_norm": 0.06798527389764786, "learning_rate": 0.01, "loss": 1.9935, "step": 40476 }, { "epoch": 4.152544111612638, "grad_norm": 0.04990516975522041, "learning_rate": 0.01, "loss": 2.0104, "step": 40479 }, { "epoch": 4.152851867049651, "grad_norm": 0.0540931336581707, "learning_rate": 0.01, "loss": 2.0031, "step": 40482 }, { "epoch": 4.153159622486664, "grad_norm": 0.06866136193275452, "learning_rate": 0.01, "loss": 1.9815, "step": 40485 }, { "epoch": 4.153467377923676, "grad_norm": 0.05682525411248207, "learning_rate": 0.01, "loss": 2.009, "step": 40488 }, { "epoch": 4.153775133360689, "grad_norm": 0.07948753237724304, "learning_rate": 0.01, "loss": 1.9954, "step": 40491 }, { "epoch": 4.154082888797702, "grad_norm": 0.10209072381258011, "learning_rate": 0.01, "loss": 2.0219, "step": 40494 }, { "epoch": 4.154390644234715, "grad_norm": 0.04499402642250061, "learning_rate": 0.01, "loss": 2.0006, "step": 40497 }, { "epoch": 4.1546983996717275, "grad_norm": 0.03882760554552078, "learning_rate": 0.01, "loss": 1.9914, "step": 40500 }, { "epoch": 4.15500615510874, "grad_norm": 0.1203235313296318, "learning_rate": 0.01, "loss": 2.0149, "step": 40503 }, { "epoch": 4.155313910545753, "grad_norm": 0.10118594765663147, "learning_rate": 0.01, "loss": 2.0209, "step": 40506 }, { "epoch": 4.155621665982766, "grad_norm": 0.057894591242074966, "learning_rate": 0.01, "loss": 1.9982, "step": 40509 }, { "epoch": 4.155929421419779, "grad_norm": 0.09510418772697449, "learning_rate": 0.01, "loss": 2.0122, "step": 40512 }, { "epoch": 4.156237176856791, "grad_norm": 0.06964008510112762, "learning_rate": 0.01, "loss": 1.9887, "step": 40515 }, { "epoch": 4.156544932293804, "grad_norm": 0.04545210301876068, "learning_rate": 0.01, "loss": 2.0019, "step": 40518 }, { "epoch": 4.156852687730817, "grad_norm": 0.03616971895098686, "learning_rate": 0.01, "loss": 1.9781, "step": 40521 }, { "epoch": 4.157160443167829, "grad_norm": 0.038192588835954666, "learning_rate": 0.01, "loss": 1.9969, "step": 40524 }, { "epoch": 4.157468198604842, "grad_norm": 0.046076469123363495, "learning_rate": 0.01, "loss": 2.0271, "step": 40527 }, { "epoch": 4.157775954041854, "grad_norm": 0.08816591650247574, "learning_rate": 0.01, "loss": 2.0141, "step": 40530 }, { "epoch": 4.158083709478867, "grad_norm": 0.10513858497142792, "learning_rate": 0.01, "loss": 1.9838, "step": 40533 }, { "epoch": 4.15839146491588, "grad_norm": 0.05273423343896866, "learning_rate": 0.01, "loss": 1.9984, "step": 40536 }, { "epoch": 4.158699220352893, "grad_norm": 0.11179885268211365, "learning_rate": 0.01, "loss": 2.0221, "step": 40539 }, { "epoch": 4.1590069757899055, "grad_norm": 0.049401313066482544, "learning_rate": 0.01, "loss": 2.0208, "step": 40542 }, { "epoch": 4.159314731226918, "grad_norm": 0.04798784479498863, "learning_rate": 0.01, "loss": 1.9949, "step": 40545 }, { "epoch": 4.159622486663931, "grad_norm": 0.04317957162857056, "learning_rate": 0.01, "loss": 2.018, "step": 40548 }, { "epoch": 4.159930242100944, "grad_norm": 0.031536102294921875, "learning_rate": 0.01, "loss": 2.0026, "step": 40551 }, { "epoch": 4.160237997537957, "grad_norm": 0.09016025066375732, "learning_rate": 0.01, "loss": 2.0033, "step": 40554 }, { "epoch": 4.160545752974969, "grad_norm": 0.048575472086668015, "learning_rate": 0.01, "loss": 1.9979, "step": 40557 }, { "epoch": 4.160853508411982, "grad_norm": 0.1236988827586174, "learning_rate": 0.01, "loss": 2.0054, "step": 40560 }, { "epoch": 4.161161263848995, "grad_norm": 0.13077518343925476, "learning_rate": 0.01, "loss": 2.0178, "step": 40563 }, { "epoch": 4.161469019286008, "grad_norm": 0.10104335844516754, "learning_rate": 0.01, "loss": 1.998, "step": 40566 }, { "epoch": 4.16177677472302, "grad_norm": 0.08807270973920822, "learning_rate": 0.01, "loss": 2.0259, "step": 40569 }, { "epoch": 4.162084530160032, "grad_norm": 0.05569664016366005, "learning_rate": 0.01, "loss": 2.0218, "step": 40572 }, { "epoch": 4.162392285597045, "grad_norm": 0.04396286606788635, "learning_rate": 0.01, "loss": 2.0121, "step": 40575 }, { "epoch": 4.162700041034058, "grad_norm": 0.041279278695583344, "learning_rate": 0.01, "loss": 2.0012, "step": 40578 }, { "epoch": 4.163007796471071, "grad_norm": 0.05588890612125397, "learning_rate": 0.01, "loss": 1.9789, "step": 40581 }, { "epoch": 4.163315551908084, "grad_norm": 0.03629462048411369, "learning_rate": 0.01, "loss": 1.9929, "step": 40584 }, { "epoch": 4.163623307345096, "grad_norm": 0.05996616184711456, "learning_rate": 0.01, "loss": 1.9962, "step": 40587 }, { "epoch": 4.163931062782109, "grad_norm": 0.0385168232023716, "learning_rate": 0.01, "loss": 2.019, "step": 40590 }, { "epoch": 4.164238818219122, "grad_norm": 0.04890631511807442, "learning_rate": 0.01, "loss": 1.9967, "step": 40593 }, { "epoch": 4.164546573656135, "grad_norm": 0.06312946230173111, "learning_rate": 0.01, "loss": 2.0192, "step": 40596 }, { "epoch": 4.1648543290931475, "grad_norm": 0.05250309780240059, "learning_rate": 0.01, "loss": 2.0032, "step": 40599 }, { "epoch": 4.16516208453016, "grad_norm": 0.05581989511847496, "learning_rate": 0.01, "loss": 2.0196, "step": 40602 }, { "epoch": 4.165469839967173, "grad_norm": 0.033916786313056946, "learning_rate": 0.01, "loss": 1.9937, "step": 40605 }, { "epoch": 4.165777595404186, "grad_norm": 0.11886032670736313, "learning_rate": 0.01, "loss": 2.0084, "step": 40608 }, { "epoch": 4.166085350841199, "grad_norm": 0.06368937343358994, "learning_rate": 0.01, "loss": 2.014, "step": 40611 }, { "epoch": 4.1663931062782105, "grad_norm": 0.04808172211050987, "learning_rate": 0.01, "loss": 2.0084, "step": 40614 }, { "epoch": 4.166700861715223, "grad_norm": 0.03308833763003349, "learning_rate": 0.01, "loss": 1.9924, "step": 40617 }, { "epoch": 4.167008617152236, "grad_norm": 0.041934408247470856, "learning_rate": 0.01, "loss": 1.9891, "step": 40620 }, { "epoch": 4.167316372589249, "grad_norm": 0.0798778086900711, "learning_rate": 0.01, "loss": 2.0101, "step": 40623 }, { "epoch": 4.167624128026262, "grad_norm": 0.055231209844350815, "learning_rate": 0.01, "loss": 1.9722, "step": 40626 }, { "epoch": 4.167931883463274, "grad_norm": 0.04620687663555145, "learning_rate": 0.01, "loss": 1.9981, "step": 40629 }, { "epoch": 4.168239638900287, "grad_norm": 0.09074298292398453, "learning_rate": 0.01, "loss": 2.0001, "step": 40632 }, { "epoch": 4.1685473943373, "grad_norm": 0.09173654764890671, "learning_rate": 0.01, "loss": 2.017, "step": 40635 }, { "epoch": 4.168855149774313, "grad_norm": 0.052037082612514496, "learning_rate": 0.01, "loss": 2.0113, "step": 40638 }, { "epoch": 4.1691629052113255, "grad_norm": 0.07793330401182175, "learning_rate": 0.01, "loss": 2.0135, "step": 40641 }, { "epoch": 4.169470660648338, "grad_norm": 0.12099308520555496, "learning_rate": 0.01, "loss": 1.9946, "step": 40644 }, { "epoch": 4.169778416085351, "grad_norm": 0.08884571492671967, "learning_rate": 0.01, "loss": 2.015, "step": 40647 }, { "epoch": 4.170086171522364, "grad_norm": 0.07165110856294632, "learning_rate": 0.01, "loss": 1.9748, "step": 40650 }, { "epoch": 4.170393926959377, "grad_norm": 0.10293866693973541, "learning_rate": 0.01, "loss": 2.004, "step": 40653 }, { "epoch": 4.170701682396389, "grad_norm": 0.09834318608045578, "learning_rate": 0.01, "loss": 2.0313, "step": 40656 }, { "epoch": 4.171009437833401, "grad_norm": 0.047723252326250076, "learning_rate": 0.01, "loss": 1.9854, "step": 40659 }, { "epoch": 4.171317193270414, "grad_norm": 0.05522621423006058, "learning_rate": 0.01, "loss": 2.0193, "step": 40662 }, { "epoch": 4.171624948707427, "grad_norm": 0.03939025476574898, "learning_rate": 0.01, "loss": 1.9989, "step": 40665 }, { "epoch": 4.17193270414444, "grad_norm": 0.03266485407948494, "learning_rate": 0.01, "loss": 2.0121, "step": 40668 }, { "epoch": 4.172240459581452, "grad_norm": 0.033200979232788086, "learning_rate": 0.01, "loss": 1.9716, "step": 40671 }, { "epoch": 4.172548215018465, "grad_norm": 0.07104014605283737, "learning_rate": 0.01, "loss": 1.9833, "step": 40674 }, { "epoch": 4.172855970455478, "grad_norm": 0.0823940560221672, "learning_rate": 0.01, "loss": 1.9914, "step": 40677 }, { "epoch": 4.173163725892491, "grad_norm": 0.062255583703517914, "learning_rate": 0.01, "loss": 2.0182, "step": 40680 }, { "epoch": 4.1734714813295035, "grad_norm": 0.13120077550411224, "learning_rate": 0.01, "loss": 1.9878, "step": 40683 }, { "epoch": 4.173779236766516, "grad_norm": 0.07655888795852661, "learning_rate": 0.01, "loss": 2.0142, "step": 40686 }, { "epoch": 4.174086992203529, "grad_norm": 0.07436554878950119, "learning_rate": 0.01, "loss": 1.9829, "step": 40689 }, { "epoch": 4.174394747640542, "grad_norm": 0.052044034004211426, "learning_rate": 0.01, "loss": 2.0017, "step": 40692 }, { "epoch": 4.174702503077555, "grad_norm": 0.0385587215423584, "learning_rate": 0.01, "loss": 1.9918, "step": 40695 }, { "epoch": 4.175010258514567, "grad_norm": 0.04062971845269203, "learning_rate": 0.01, "loss": 2.0106, "step": 40698 }, { "epoch": 4.17531801395158, "grad_norm": 0.05078468099236488, "learning_rate": 0.01, "loss": 2.0044, "step": 40701 }, { "epoch": 4.175625769388592, "grad_norm": 0.05820399895310402, "learning_rate": 0.01, "loss": 1.9979, "step": 40704 }, { "epoch": 4.175933524825605, "grad_norm": 0.05630197748541832, "learning_rate": 0.01, "loss": 2.0046, "step": 40707 }, { "epoch": 4.176241280262618, "grad_norm": 0.047048747539520264, "learning_rate": 0.01, "loss": 2.0068, "step": 40710 }, { "epoch": 4.17654903569963, "grad_norm": 0.09604250639677048, "learning_rate": 0.01, "loss": 2.0011, "step": 40713 }, { "epoch": 4.176856791136643, "grad_norm": 0.06629175692796707, "learning_rate": 0.01, "loss": 1.9967, "step": 40716 }, { "epoch": 4.177164546573656, "grad_norm": 0.1041264608502388, "learning_rate": 0.01, "loss": 2.0302, "step": 40719 }, { "epoch": 4.177472302010669, "grad_norm": 0.10279016941785812, "learning_rate": 0.01, "loss": 1.9811, "step": 40722 }, { "epoch": 4.1777800574476815, "grad_norm": 0.047386229038238525, "learning_rate": 0.01, "loss": 1.9901, "step": 40725 }, { "epoch": 4.178087812884694, "grad_norm": 0.03940087929368019, "learning_rate": 0.01, "loss": 2.0454, "step": 40728 }, { "epoch": 4.178395568321707, "grad_norm": 0.1065608337521553, "learning_rate": 0.01, "loss": 2.0117, "step": 40731 }, { "epoch": 4.17870332375872, "grad_norm": 0.03307456895709038, "learning_rate": 0.01, "loss": 1.9962, "step": 40734 }, { "epoch": 4.179011079195733, "grad_norm": 0.044952236115932465, "learning_rate": 0.01, "loss": 1.9888, "step": 40737 }, { "epoch": 4.1793188346327454, "grad_norm": 0.04699942097067833, "learning_rate": 0.01, "loss": 1.9779, "step": 40740 }, { "epoch": 4.179626590069758, "grad_norm": 0.04086962714791298, "learning_rate": 0.01, "loss": 2.0037, "step": 40743 }, { "epoch": 4.179934345506771, "grad_norm": 0.04364610090851784, "learning_rate": 0.01, "loss": 2.0134, "step": 40746 }, { "epoch": 4.180242100943783, "grad_norm": 0.13256701827049255, "learning_rate": 0.01, "loss": 2.0185, "step": 40749 }, { "epoch": 4.180549856380796, "grad_norm": 0.1418839991092682, "learning_rate": 0.01, "loss": 1.9959, "step": 40752 }, { "epoch": 4.1808576118178085, "grad_norm": 0.10980424284934998, "learning_rate": 0.01, "loss": 2.0026, "step": 40755 }, { "epoch": 4.181165367254821, "grad_norm": 0.11712696403265, "learning_rate": 0.01, "loss": 2.0136, "step": 40758 }, { "epoch": 4.181473122691834, "grad_norm": 0.04676150158047676, "learning_rate": 0.01, "loss": 2.0204, "step": 40761 }, { "epoch": 4.181780878128847, "grad_norm": 0.06010957807302475, "learning_rate": 0.01, "loss": 1.9897, "step": 40764 }, { "epoch": 4.18208863356586, "grad_norm": 0.04058792442083359, "learning_rate": 0.01, "loss": 1.9985, "step": 40767 }, { "epoch": 4.182396389002872, "grad_norm": 0.05905113369226456, "learning_rate": 0.01, "loss": 1.9825, "step": 40770 }, { "epoch": 4.182704144439885, "grad_norm": 0.031559672206640244, "learning_rate": 0.01, "loss": 1.9935, "step": 40773 }, { "epoch": 4.183011899876898, "grad_norm": 0.03369845449924469, "learning_rate": 0.01, "loss": 2.0148, "step": 40776 }, { "epoch": 4.183319655313911, "grad_norm": 0.03323826193809509, "learning_rate": 0.01, "loss": 2.0067, "step": 40779 }, { "epoch": 4.1836274107509235, "grad_norm": 0.09272177517414093, "learning_rate": 0.01, "loss": 1.9888, "step": 40782 }, { "epoch": 4.183935166187936, "grad_norm": 0.12024440616369247, "learning_rate": 0.01, "loss": 1.9638, "step": 40785 }, { "epoch": 4.184242921624949, "grad_norm": 0.06225709244608879, "learning_rate": 0.01, "loss": 1.9923, "step": 40788 }, { "epoch": 4.184550677061962, "grad_norm": 0.07221471518278122, "learning_rate": 0.01, "loss": 2.015, "step": 40791 }, { "epoch": 4.184858432498974, "grad_norm": 0.05865592509508133, "learning_rate": 0.01, "loss": 2.026, "step": 40794 }, { "epoch": 4.1851661879359865, "grad_norm": 0.05323261767625809, "learning_rate": 0.01, "loss": 1.9944, "step": 40797 }, { "epoch": 4.185473943372999, "grad_norm": 0.0738186463713646, "learning_rate": 0.01, "loss": 1.9985, "step": 40800 }, { "epoch": 4.185781698810012, "grad_norm": 0.05065621808171272, "learning_rate": 0.01, "loss": 2.0006, "step": 40803 }, { "epoch": 4.186089454247025, "grad_norm": 0.07014346867799759, "learning_rate": 0.01, "loss": 1.9919, "step": 40806 }, { "epoch": 4.186397209684038, "grad_norm": 0.09202650934457779, "learning_rate": 0.01, "loss": 2.0173, "step": 40809 }, { "epoch": 4.18670496512105, "grad_norm": 0.07297877967357635, "learning_rate": 0.01, "loss": 2.0011, "step": 40812 }, { "epoch": 4.187012720558063, "grad_norm": 0.11443314701318741, "learning_rate": 0.01, "loss": 2.0166, "step": 40815 }, { "epoch": 4.187320475995076, "grad_norm": 0.10481325536966324, "learning_rate": 0.01, "loss": 1.989, "step": 40818 }, { "epoch": 4.187628231432089, "grad_norm": 0.03423392400145531, "learning_rate": 0.01, "loss": 1.9982, "step": 40821 }, { "epoch": 4.1879359868691015, "grad_norm": 0.03760581091046333, "learning_rate": 0.01, "loss": 2.0011, "step": 40824 }, { "epoch": 4.188243742306114, "grad_norm": 0.04355360567569733, "learning_rate": 0.01, "loss": 1.9884, "step": 40827 }, { "epoch": 4.188551497743127, "grad_norm": 0.07982810586690903, "learning_rate": 0.01, "loss": 2.0474, "step": 40830 }, { "epoch": 4.18885925318014, "grad_norm": 0.17527154088020325, "learning_rate": 0.01, "loss": 2.0216, "step": 40833 }, { "epoch": 4.189167008617153, "grad_norm": 0.09842000156641006, "learning_rate": 0.01, "loss": 2.0119, "step": 40836 }, { "epoch": 4.1894747640541645, "grad_norm": 0.04522009938955307, "learning_rate": 0.01, "loss": 2.0005, "step": 40839 }, { "epoch": 4.189782519491177, "grad_norm": 0.03711741417646408, "learning_rate": 0.01, "loss": 2.014, "step": 40842 }, { "epoch": 4.19009027492819, "grad_norm": 0.06014026701450348, "learning_rate": 0.01, "loss": 2.007, "step": 40845 }, { "epoch": 4.190398030365203, "grad_norm": 0.06271639466285706, "learning_rate": 0.01, "loss": 1.9824, "step": 40848 }, { "epoch": 4.190705785802216, "grad_norm": 0.09300049394369125, "learning_rate": 0.01, "loss": 1.9865, "step": 40851 }, { "epoch": 4.191013541239228, "grad_norm": 0.05301731079816818, "learning_rate": 0.01, "loss": 1.9758, "step": 40854 }, { "epoch": 4.191321296676241, "grad_norm": 0.031089186668395996, "learning_rate": 0.01, "loss": 1.9994, "step": 40857 }, { "epoch": 4.191629052113254, "grad_norm": 0.04351799562573433, "learning_rate": 0.01, "loss": 2.0165, "step": 40860 }, { "epoch": 4.191936807550267, "grad_norm": 0.03952482342720032, "learning_rate": 0.01, "loss": 1.9941, "step": 40863 }, { "epoch": 4.1922445629872795, "grad_norm": 0.09664411842823029, "learning_rate": 0.01, "loss": 2.0385, "step": 40866 }, { "epoch": 4.192552318424292, "grad_norm": 0.11290398985147476, "learning_rate": 0.01, "loss": 1.9979, "step": 40869 }, { "epoch": 4.192860073861305, "grad_norm": 0.03346327319741249, "learning_rate": 0.01, "loss": 1.9883, "step": 40872 }, { "epoch": 4.193167829298318, "grad_norm": 0.07427312433719635, "learning_rate": 0.01, "loss": 1.981, "step": 40875 }, { "epoch": 4.193475584735331, "grad_norm": 0.053705740720033646, "learning_rate": 0.01, "loss": 1.9965, "step": 40878 }, { "epoch": 4.193783340172343, "grad_norm": 0.0523017942905426, "learning_rate": 0.01, "loss": 1.9998, "step": 40881 }, { "epoch": 4.194091095609355, "grad_norm": 0.056644510477781296, "learning_rate": 0.01, "loss": 2.0161, "step": 40884 }, { "epoch": 4.194398851046368, "grad_norm": 0.03145689144730568, "learning_rate": 0.01, "loss": 1.9953, "step": 40887 }, { "epoch": 4.194706606483381, "grad_norm": 0.05200685188174248, "learning_rate": 0.01, "loss": 2.0054, "step": 40890 }, { "epoch": 4.195014361920394, "grad_norm": 0.06475922465324402, "learning_rate": 0.01, "loss": 2.0003, "step": 40893 }, { "epoch": 4.1953221173574065, "grad_norm": 0.0805899053812027, "learning_rate": 0.01, "loss": 1.9886, "step": 40896 }, { "epoch": 4.195629872794419, "grad_norm": 0.09969832003116608, "learning_rate": 0.01, "loss": 2.0236, "step": 40899 }, { "epoch": 4.195937628231432, "grad_norm": 0.0634358674287796, "learning_rate": 0.01, "loss": 2.0091, "step": 40902 }, { "epoch": 4.196245383668445, "grad_norm": 0.09012358635663986, "learning_rate": 0.01, "loss": 2.0019, "step": 40905 }, { "epoch": 4.196553139105458, "grad_norm": 0.06891899555921555, "learning_rate": 0.01, "loss": 2.0084, "step": 40908 }, { "epoch": 4.19686089454247, "grad_norm": 0.07446330040693283, "learning_rate": 0.01, "loss": 2.0082, "step": 40911 }, { "epoch": 4.197168649979483, "grad_norm": 0.03457944840192795, "learning_rate": 0.01, "loss": 1.9777, "step": 40914 }, { "epoch": 4.197476405416496, "grad_norm": 0.03754154592752457, "learning_rate": 0.01, "loss": 1.9845, "step": 40917 }, { "epoch": 4.197784160853509, "grad_norm": 0.0626758560538292, "learning_rate": 0.01, "loss": 2.0333, "step": 40920 }, { "epoch": 4.1980919162905215, "grad_norm": 0.0431663803756237, "learning_rate": 0.01, "loss": 1.9997, "step": 40923 }, { "epoch": 4.198399671727534, "grad_norm": 0.059978071600198746, "learning_rate": 0.01, "loss": 2.0219, "step": 40926 }, { "epoch": 4.198707427164546, "grad_norm": 0.045393504202365875, "learning_rate": 0.01, "loss": 2.0355, "step": 40929 }, { "epoch": 4.199015182601559, "grad_norm": 0.1481604129076004, "learning_rate": 0.01, "loss": 1.9887, "step": 40932 }, { "epoch": 4.199322938038572, "grad_norm": 0.08962929248809814, "learning_rate": 0.01, "loss": 1.9766, "step": 40935 }, { "epoch": 4.1996306934755845, "grad_norm": 0.10029556602239609, "learning_rate": 0.01, "loss": 2.0261, "step": 40938 }, { "epoch": 4.199938448912597, "grad_norm": 0.060972318053245544, "learning_rate": 0.01, "loss": 1.9932, "step": 40941 }, { "epoch": 4.20024620434961, "grad_norm": 0.08175275474786758, "learning_rate": 0.01, "loss": 2.0086, "step": 40944 }, { "epoch": 4.200553959786623, "grad_norm": 0.07742544263601303, "learning_rate": 0.01, "loss": 1.9752, "step": 40947 }, { "epoch": 4.200861715223636, "grad_norm": 0.06145945191383362, "learning_rate": 0.01, "loss": 2.0089, "step": 40950 }, { "epoch": 4.201169470660648, "grad_norm": 0.03785976395010948, "learning_rate": 0.01, "loss": 1.9858, "step": 40953 }, { "epoch": 4.201477226097661, "grad_norm": 0.049219708889722824, "learning_rate": 0.01, "loss": 2.0119, "step": 40956 }, { "epoch": 4.201784981534674, "grad_norm": 0.05865636467933655, "learning_rate": 0.01, "loss": 1.9859, "step": 40959 }, { "epoch": 4.202092736971687, "grad_norm": 0.04115782678127289, "learning_rate": 0.01, "loss": 2.0084, "step": 40962 }, { "epoch": 4.2024004924086995, "grad_norm": 0.07156416773796082, "learning_rate": 0.01, "loss": 2.0183, "step": 40965 }, { "epoch": 4.202708247845712, "grad_norm": 0.11241970211267471, "learning_rate": 0.01, "loss": 2.0136, "step": 40968 }, { "epoch": 4.203016003282725, "grad_norm": 0.052014194428920746, "learning_rate": 0.01, "loss": 2.0316, "step": 40971 }, { "epoch": 4.203323758719737, "grad_norm": 0.03571850433945656, "learning_rate": 0.01, "loss": 2.006, "step": 40974 }, { "epoch": 4.20363151415675, "grad_norm": 0.03782523795962334, "learning_rate": 0.01, "loss": 2.0127, "step": 40977 }, { "epoch": 4.2039392695937625, "grad_norm": 0.05531090870499611, "learning_rate": 0.01, "loss": 1.991, "step": 40980 }, { "epoch": 4.204247025030775, "grad_norm": 0.04620610550045967, "learning_rate": 0.01, "loss": 1.9929, "step": 40983 }, { "epoch": 4.204554780467788, "grad_norm": 0.034513216465711594, "learning_rate": 0.01, "loss": 2.004, "step": 40986 }, { "epoch": 4.204862535904801, "grad_norm": 0.12451375275850296, "learning_rate": 0.01, "loss": 1.9979, "step": 40989 }, { "epoch": 4.205170291341814, "grad_norm": 0.038147494196891785, "learning_rate": 0.01, "loss": 2.0012, "step": 40992 }, { "epoch": 4.205478046778826, "grad_norm": 0.0428970605134964, "learning_rate": 0.01, "loss": 1.9953, "step": 40995 }, { "epoch": 4.205785802215839, "grad_norm": 0.04861719533801079, "learning_rate": 0.01, "loss": 1.9732, "step": 40998 }, { "epoch": 4.206093557652852, "grad_norm": 0.05677000805735588, "learning_rate": 0.01, "loss": 1.9984, "step": 41001 }, { "epoch": 4.206401313089865, "grad_norm": 0.040025245398283005, "learning_rate": 0.01, "loss": 2.0046, "step": 41004 }, { "epoch": 4.2067090685268775, "grad_norm": 0.09284701943397522, "learning_rate": 0.01, "loss": 1.9852, "step": 41007 }, { "epoch": 4.20701682396389, "grad_norm": 0.06293929368257523, "learning_rate": 0.01, "loss": 1.9807, "step": 41010 }, { "epoch": 4.207324579400903, "grad_norm": 0.10422204434871674, "learning_rate": 0.01, "loss": 2.0103, "step": 41013 }, { "epoch": 4.207632334837916, "grad_norm": 0.05504370108246803, "learning_rate": 0.01, "loss": 2.0027, "step": 41016 }, { "epoch": 4.207940090274928, "grad_norm": 0.03916056081652641, "learning_rate": 0.01, "loss": 1.9898, "step": 41019 }, { "epoch": 4.2082478457119405, "grad_norm": 0.05535780265927315, "learning_rate": 0.01, "loss": 1.9761, "step": 41022 }, { "epoch": 4.208555601148953, "grad_norm": 0.11614353209733963, "learning_rate": 0.01, "loss": 2.0394, "step": 41025 }, { "epoch": 4.208863356585966, "grad_norm": 0.054821841418743134, "learning_rate": 0.01, "loss": 2.0171, "step": 41028 }, { "epoch": 4.209171112022979, "grad_norm": 0.0761130154132843, "learning_rate": 0.01, "loss": 2.0194, "step": 41031 }, { "epoch": 4.209478867459992, "grad_norm": 0.05529718101024628, "learning_rate": 0.01, "loss": 2.0132, "step": 41034 }, { "epoch": 4.2097866228970044, "grad_norm": 0.05739063769578934, "learning_rate": 0.01, "loss": 2.0408, "step": 41037 }, { "epoch": 4.210094378334017, "grad_norm": 0.03629623353481293, "learning_rate": 0.01, "loss": 2.0132, "step": 41040 }, { "epoch": 4.21040213377103, "grad_norm": 0.09579025954008102, "learning_rate": 0.01, "loss": 2.0051, "step": 41043 }, { "epoch": 4.210709889208043, "grad_norm": 0.09709680080413818, "learning_rate": 0.01, "loss": 2.0082, "step": 41046 }, { "epoch": 4.211017644645056, "grad_norm": 0.06710288673639297, "learning_rate": 0.01, "loss": 2.0092, "step": 41049 }, { "epoch": 4.211325400082068, "grad_norm": 0.17285798490047455, "learning_rate": 0.01, "loss": 2.0525, "step": 41052 }, { "epoch": 4.211633155519081, "grad_norm": 0.13886108994483948, "learning_rate": 0.01, "loss": 2.0024, "step": 41055 }, { "epoch": 4.211940910956094, "grad_norm": 0.08146315068006516, "learning_rate": 0.01, "loss": 2.0194, "step": 41058 }, { "epoch": 4.212248666393107, "grad_norm": 0.041313815861940384, "learning_rate": 0.01, "loss": 1.9926, "step": 41061 }, { "epoch": 4.212556421830119, "grad_norm": 0.07856039702892303, "learning_rate": 0.01, "loss": 1.9945, "step": 41064 }, { "epoch": 4.212864177267131, "grad_norm": 0.03942018374800682, "learning_rate": 0.01, "loss": 1.9853, "step": 41067 }, { "epoch": 4.213171932704144, "grad_norm": 0.06405628472566605, "learning_rate": 0.01, "loss": 2.0234, "step": 41070 }, { "epoch": 4.213479688141157, "grad_norm": 0.11233191937208176, "learning_rate": 0.01, "loss": 2.0014, "step": 41073 }, { "epoch": 4.21378744357817, "grad_norm": 0.061680734157562256, "learning_rate": 0.01, "loss": 1.9946, "step": 41076 }, { "epoch": 4.2140951990151825, "grad_norm": 0.049544453620910645, "learning_rate": 0.01, "loss": 2.0133, "step": 41079 }, { "epoch": 4.214402954452195, "grad_norm": 0.03959604352712631, "learning_rate": 0.01, "loss": 1.9868, "step": 41082 }, { "epoch": 4.214710709889208, "grad_norm": 0.03029470518231392, "learning_rate": 0.01, "loss": 1.9844, "step": 41085 }, { "epoch": 4.215018465326221, "grad_norm": 0.04742530733346939, "learning_rate": 0.01, "loss": 2.0083, "step": 41088 }, { "epoch": 4.215326220763234, "grad_norm": 0.11508273333311081, "learning_rate": 0.01, "loss": 2.0078, "step": 41091 }, { "epoch": 4.215633976200246, "grad_norm": 0.16011863946914673, "learning_rate": 0.01, "loss": 2.0077, "step": 41094 }, { "epoch": 4.215941731637259, "grad_norm": 0.0831206887960434, "learning_rate": 0.01, "loss": 2.0086, "step": 41097 }, { "epoch": 4.216249487074272, "grad_norm": 0.05117999389767647, "learning_rate": 0.01, "loss": 2.0104, "step": 41100 }, { "epoch": 4.216557242511285, "grad_norm": 0.08137553930282593, "learning_rate": 0.01, "loss": 2.0067, "step": 41103 }, { "epoch": 4.2168649979482975, "grad_norm": 0.04084109142422676, "learning_rate": 0.01, "loss": 1.9989, "step": 41106 }, { "epoch": 4.217172753385309, "grad_norm": 0.11708495765924454, "learning_rate": 0.01, "loss": 2.0223, "step": 41109 }, { "epoch": 4.217480508822322, "grad_norm": 0.036096177995204926, "learning_rate": 0.01, "loss": 2.0058, "step": 41112 }, { "epoch": 4.217788264259335, "grad_norm": 0.03205428272485733, "learning_rate": 0.01, "loss": 2.0139, "step": 41115 }, { "epoch": 4.218096019696348, "grad_norm": 0.05120716243982315, "learning_rate": 0.01, "loss": 1.9981, "step": 41118 }, { "epoch": 4.2184037751333605, "grad_norm": 0.08021282404661179, "learning_rate": 0.01, "loss": 1.9965, "step": 41121 }, { "epoch": 4.218711530570373, "grad_norm": 0.05003154277801514, "learning_rate": 0.01, "loss": 2.0134, "step": 41124 }, { "epoch": 4.219019286007386, "grad_norm": 0.0867641344666481, "learning_rate": 0.01, "loss": 2.01, "step": 41127 }, { "epoch": 4.219327041444399, "grad_norm": 0.0697675347328186, "learning_rate": 0.01, "loss": 1.9933, "step": 41130 }, { "epoch": 4.219634796881412, "grad_norm": 0.06696760654449463, "learning_rate": 0.01, "loss": 1.9863, "step": 41133 }, { "epoch": 4.219942552318424, "grad_norm": 0.09599725902080536, "learning_rate": 0.01, "loss": 1.9817, "step": 41136 }, { "epoch": 4.220250307755437, "grad_norm": 0.04233787953853607, "learning_rate": 0.01, "loss": 1.9715, "step": 41139 }, { "epoch": 4.22055806319245, "grad_norm": 0.08935974538326263, "learning_rate": 0.01, "loss": 2.0081, "step": 41142 }, { "epoch": 4.220865818629463, "grad_norm": 0.049136627465486526, "learning_rate": 0.01, "loss": 1.993, "step": 41145 }, { "epoch": 4.2211735740664755, "grad_norm": 0.0316915288567543, "learning_rate": 0.01, "loss": 2.006, "step": 41148 }, { "epoch": 4.221481329503488, "grad_norm": 0.04271787405014038, "learning_rate": 0.01, "loss": 2.002, "step": 41151 }, { "epoch": 4.2217890849405, "grad_norm": 0.03403954207897186, "learning_rate": 0.01, "loss": 2.0159, "step": 41154 }, { "epoch": 4.222096840377513, "grad_norm": 0.07457596808671951, "learning_rate": 0.01, "loss": 1.9975, "step": 41157 }, { "epoch": 4.222404595814526, "grad_norm": 0.06875099986791611, "learning_rate": 0.01, "loss": 1.9668, "step": 41160 }, { "epoch": 4.2227123512515385, "grad_norm": 0.05186415836215019, "learning_rate": 0.01, "loss": 1.9955, "step": 41163 }, { "epoch": 4.223020106688551, "grad_norm": 0.051673002541065216, "learning_rate": 0.01, "loss": 1.9831, "step": 41166 }, { "epoch": 4.223327862125564, "grad_norm": 0.03935972973704338, "learning_rate": 0.01, "loss": 1.9826, "step": 41169 }, { "epoch": 4.223635617562577, "grad_norm": 0.07746358215808868, "learning_rate": 0.01, "loss": 1.9943, "step": 41172 }, { "epoch": 4.22394337299959, "grad_norm": 0.14719370007514954, "learning_rate": 0.01, "loss": 2.0213, "step": 41175 }, { "epoch": 4.224251128436602, "grad_norm": 0.08811825513839722, "learning_rate": 0.01, "loss": 2.0064, "step": 41178 }, { "epoch": 4.224558883873615, "grad_norm": 0.04378504678606987, "learning_rate": 0.01, "loss": 2.0103, "step": 41181 }, { "epoch": 4.224866639310628, "grad_norm": 0.03978777304291725, "learning_rate": 0.01, "loss": 2.006, "step": 41184 }, { "epoch": 4.225174394747641, "grad_norm": 0.03860542178153992, "learning_rate": 0.01, "loss": 2.009, "step": 41187 }, { "epoch": 4.225482150184654, "grad_norm": 0.06839700043201447, "learning_rate": 0.01, "loss": 2.0195, "step": 41190 }, { "epoch": 4.225789905621666, "grad_norm": 0.09107422828674316, "learning_rate": 0.01, "loss": 2.0095, "step": 41193 }, { "epoch": 4.226097661058679, "grad_norm": 0.08443713933229446, "learning_rate": 0.01, "loss": 1.9715, "step": 41196 }, { "epoch": 4.226405416495691, "grad_norm": 0.07428136467933655, "learning_rate": 0.01, "loss": 1.9926, "step": 41199 }, { "epoch": 4.226713171932704, "grad_norm": 0.07102616876363754, "learning_rate": 0.01, "loss": 2.0161, "step": 41202 }, { "epoch": 4.227020927369717, "grad_norm": 0.06131945922970772, "learning_rate": 0.01, "loss": 1.9965, "step": 41205 }, { "epoch": 4.227328682806729, "grad_norm": 0.07921816408634186, "learning_rate": 0.01, "loss": 2.021, "step": 41208 }, { "epoch": 4.227636438243742, "grad_norm": 0.05274220556020737, "learning_rate": 0.01, "loss": 1.9706, "step": 41211 }, { "epoch": 4.227944193680755, "grad_norm": 0.12561160326004028, "learning_rate": 0.01, "loss": 2.0009, "step": 41214 }, { "epoch": 4.228251949117768, "grad_norm": 0.053055427968502045, "learning_rate": 0.01, "loss": 2.0263, "step": 41217 }, { "epoch": 4.2285597045547805, "grad_norm": 0.04999428242444992, "learning_rate": 0.01, "loss": 2.0028, "step": 41220 }, { "epoch": 4.228867459991793, "grad_norm": 0.06127345934510231, "learning_rate": 0.01, "loss": 2.0279, "step": 41223 }, { "epoch": 4.229175215428806, "grad_norm": 0.05634259432554245, "learning_rate": 0.01, "loss": 2.0321, "step": 41226 }, { "epoch": 4.229482970865819, "grad_norm": 0.030200546607375145, "learning_rate": 0.01, "loss": 1.9793, "step": 41229 }, { "epoch": 4.229790726302832, "grad_norm": 0.12201087921857834, "learning_rate": 0.01, "loss": 1.9938, "step": 41232 }, { "epoch": 4.230098481739844, "grad_norm": 0.07482577860355377, "learning_rate": 0.01, "loss": 2.0076, "step": 41235 }, { "epoch": 4.230406237176857, "grad_norm": 0.10311067849397659, "learning_rate": 0.01, "loss": 2.0183, "step": 41238 }, { "epoch": 4.23071399261387, "grad_norm": 0.09040312469005585, "learning_rate": 0.01, "loss": 1.9846, "step": 41241 }, { "epoch": 4.231021748050882, "grad_norm": 0.06239837408065796, "learning_rate": 0.01, "loss": 2.0053, "step": 41244 }, { "epoch": 4.231329503487895, "grad_norm": 0.033743564039468765, "learning_rate": 0.01, "loss": 2.0273, "step": 41247 }, { "epoch": 4.231637258924907, "grad_norm": 0.046916600316762924, "learning_rate": 0.01, "loss": 1.9828, "step": 41250 }, { "epoch": 4.23194501436192, "grad_norm": 0.06271935999393463, "learning_rate": 0.01, "loss": 2.0086, "step": 41253 }, { "epoch": 4.232252769798933, "grad_norm": 0.05794825777411461, "learning_rate": 0.01, "loss": 1.9867, "step": 41256 }, { "epoch": 4.232560525235946, "grad_norm": 0.0886342003941536, "learning_rate": 0.01, "loss": 2.011, "step": 41259 }, { "epoch": 4.2328682806729585, "grad_norm": 0.06492005288600922, "learning_rate": 0.01, "loss": 1.994, "step": 41262 }, { "epoch": 4.233176036109971, "grad_norm": 0.1275957226753235, "learning_rate": 0.01, "loss": 2.0202, "step": 41265 }, { "epoch": 4.233483791546984, "grad_norm": 0.08362898975610733, "learning_rate": 0.01, "loss": 2.0203, "step": 41268 }, { "epoch": 4.233791546983997, "grad_norm": 0.0909474641084671, "learning_rate": 0.01, "loss": 2.0047, "step": 41271 }, { "epoch": 4.23409930242101, "grad_norm": 0.04275381565093994, "learning_rate": 0.01, "loss": 2.0163, "step": 41274 }, { "epoch": 4.234407057858022, "grad_norm": 0.04790336266160011, "learning_rate": 0.01, "loss": 2.0001, "step": 41277 }, { "epoch": 4.234714813295035, "grad_norm": 0.03371073678135872, "learning_rate": 0.01, "loss": 1.9926, "step": 41280 }, { "epoch": 4.235022568732048, "grad_norm": 0.05681711062788963, "learning_rate": 0.01, "loss": 2.0195, "step": 41283 }, { "epoch": 4.235330324169061, "grad_norm": 0.040656253695487976, "learning_rate": 0.01, "loss": 2.0215, "step": 41286 }, { "epoch": 4.235638079606073, "grad_norm": 0.04097260907292366, "learning_rate": 0.01, "loss": 2.0042, "step": 41289 }, { "epoch": 4.235945835043085, "grad_norm": 0.042979929596185684, "learning_rate": 0.01, "loss": 1.9833, "step": 41292 }, { "epoch": 4.236253590480098, "grad_norm": 0.03832445293664932, "learning_rate": 0.01, "loss": 1.9915, "step": 41295 }, { "epoch": 4.236561345917111, "grad_norm": 0.04340042173862457, "learning_rate": 0.01, "loss": 2.0301, "step": 41298 }, { "epoch": 4.236869101354124, "grad_norm": 0.10501760244369507, "learning_rate": 0.01, "loss": 2.0058, "step": 41301 }, { "epoch": 4.2371768567911365, "grad_norm": 0.09777186065912247, "learning_rate": 0.01, "loss": 2.0283, "step": 41304 }, { "epoch": 4.237484612228149, "grad_norm": 0.06624646484851837, "learning_rate": 0.01, "loss": 2.0187, "step": 41307 }, { "epoch": 4.237792367665162, "grad_norm": 0.055278148502111435, "learning_rate": 0.01, "loss": 1.9958, "step": 41310 }, { "epoch": 4.238100123102175, "grad_norm": 0.11712317168712616, "learning_rate": 0.01, "loss": 1.9903, "step": 41313 }, { "epoch": 4.238407878539188, "grad_norm": 0.06384153664112091, "learning_rate": 0.01, "loss": 2.015, "step": 41316 }, { "epoch": 4.2387156339762, "grad_norm": 0.0486673079431057, "learning_rate": 0.01, "loss": 1.9864, "step": 41319 }, { "epoch": 4.239023389413213, "grad_norm": 0.04492638260126114, "learning_rate": 0.01, "loss": 2.0086, "step": 41322 }, { "epoch": 4.239331144850226, "grad_norm": 0.034185852855443954, "learning_rate": 0.01, "loss": 2.0209, "step": 41325 }, { "epoch": 4.239638900287239, "grad_norm": 0.04562927410006523, "learning_rate": 0.01, "loss": 2.0047, "step": 41328 }, { "epoch": 4.239946655724252, "grad_norm": 0.1411084234714508, "learning_rate": 0.01, "loss": 1.9964, "step": 41331 }, { "epoch": 4.2402544111612634, "grad_norm": 0.17035670578479767, "learning_rate": 0.01, "loss": 1.9915, "step": 41334 }, { "epoch": 4.240562166598276, "grad_norm": 0.06952272355556488, "learning_rate": 0.01, "loss": 2.0187, "step": 41337 }, { "epoch": 4.240869922035289, "grad_norm": 0.08368392288684845, "learning_rate": 0.01, "loss": 1.9897, "step": 41340 }, { "epoch": 4.241177677472302, "grad_norm": 0.04599005728960037, "learning_rate": 0.01, "loss": 2.0073, "step": 41343 }, { "epoch": 4.241485432909315, "grad_norm": 0.045579466968774796, "learning_rate": 0.01, "loss": 1.9885, "step": 41346 }, { "epoch": 4.241793188346327, "grad_norm": 0.049636196345090866, "learning_rate": 0.01, "loss": 1.9875, "step": 41349 }, { "epoch": 4.24210094378334, "grad_norm": 0.047992490231990814, "learning_rate": 0.01, "loss": 1.9881, "step": 41352 }, { "epoch": 4.242408699220353, "grad_norm": 0.05155181884765625, "learning_rate": 0.01, "loss": 1.9802, "step": 41355 }, { "epoch": 4.242716454657366, "grad_norm": 0.04805142432451248, "learning_rate": 0.01, "loss": 2.0093, "step": 41358 }, { "epoch": 4.2430242100943785, "grad_norm": 0.09783121943473816, "learning_rate": 0.01, "loss": 1.9703, "step": 41361 }, { "epoch": 4.243331965531391, "grad_norm": 0.05536942929029465, "learning_rate": 0.01, "loss": 1.9836, "step": 41364 }, { "epoch": 4.243639720968404, "grad_norm": 0.1335456669330597, "learning_rate": 0.01, "loss": 1.9989, "step": 41367 }, { "epoch": 4.243947476405417, "grad_norm": 0.05100298300385475, "learning_rate": 0.01, "loss": 2.0223, "step": 41370 }, { "epoch": 4.24425523184243, "grad_norm": 0.04158430173993111, "learning_rate": 0.01, "loss": 2.0127, "step": 41373 }, { "epoch": 4.244562987279442, "grad_norm": 0.03490443900227547, "learning_rate": 0.01, "loss": 2.0035, "step": 41376 }, { "epoch": 4.244870742716454, "grad_norm": 0.03529435768723488, "learning_rate": 0.01, "loss": 2.0055, "step": 41379 }, { "epoch": 4.245178498153467, "grad_norm": 0.03825982287526131, "learning_rate": 0.01, "loss": 2.0138, "step": 41382 }, { "epoch": 4.24548625359048, "grad_norm": 0.11641440540552139, "learning_rate": 0.01, "loss": 2.0487, "step": 41385 }, { "epoch": 4.245794009027493, "grad_norm": 0.043827321380376816, "learning_rate": 0.01, "loss": 2.009, "step": 41388 }, { "epoch": 4.246101764464505, "grad_norm": 0.060956139117479324, "learning_rate": 0.01, "loss": 1.9972, "step": 41391 }, { "epoch": 4.246409519901518, "grad_norm": 0.11849243938922882, "learning_rate": 0.01, "loss": 1.9968, "step": 41394 }, { "epoch": 4.246717275338531, "grad_norm": 0.08758600056171417, "learning_rate": 0.01, "loss": 1.9596, "step": 41397 }, { "epoch": 4.247025030775544, "grad_norm": 0.0848364606499672, "learning_rate": 0.01, "loss": 2.0016, "step": 41400 }, { "epoch": 4.2473327862125565, "grad_norm": 0.08009137958288193, "learning_rate": 0.01, "loss": 1.9934, "step": 41403 }, { "epoch": 4.247640541649569, "grad_norm": 0.06123640015721321, "learning_rate": 0.01, "loss": 1.9974, "step": 41406 }, { "epoch": 4.247948297086582, "grad_norm": 0.042272213846445084, "learning_rate": 0.01, "loss": 2.0196, "step": 41409 }, { "epoch": 4.248256052523595, "grad_norm": 0.051934532821178436, "learning_rate": 0.01, "loss": 2.0142, "step": 41412 }, { "epoch": 4.248563807960608, "grad_norm": 0.04881110414862633, "learning_rate": 0.01, "loss": 1.9795, "step": 41415 }, { "epoch": 4.24887156339762, "grad_norm": 0.03183252364397049, "learning_rate": 0.01, "loss": 1.9986, "step": 41418 }, { "epoch": 4.249179318834633, "grad_norm": 0.1036367192864418, "learning_rate": 0.01, "loss": 1.976, "step": 41421 }, { "epoch": 4.249487074271645, "grad_norm": 0.08779038488864899, "learning_rate": 0.01, "loss": 2.0271, "step": 41424 }, { "epoch": 4.249794829708658, "grad_norm": 0.07684400677680969, "learning_rate": 0.01, "loss": 1.9796, "step": 41427 }, { "epoch": 4.250102585145671, "grad_norm": 0.08273176103830338, "learning_rate": 0.01, "loss": 1.975, "step": 41430 }, { "epoch": 4.250410340582683, "grad_norm": 0.04036150500178337, "learning_rate": 0.01, "loss": 1.9984, "step": 41433 }, { "epoch": 4.250718096019696, "grad_norm": 0.04532729461789131, "learning_rate": 0.01, "loss": 1.9905, "step": 41436 }, { "epoch": 4.251025851456709, "grad_norm": 0.045257970690727234, "learning_rate": 0.01, "loss": 2.0002, "step": 41439 }, { "epoch": 4.251333606893722, "grad_norm": 0.11603814363479614, "learning_rate": 0.01, "loss": 1.9832, "step": 41442 }, { "epoch": 4.2516413623307345, "grad_norm": 0.07269345223903656, "learning_rate": 0.01, "loss": 1.9901, "step": 41445 }, { "epoch": 4.251949117767747, "grad_norm": 0.06193891167640686, "learning_rate": 0.01, "loss": 2.0064, "step": 41448 }, { "epoch": 4.25225687320476, "grad_norm": 0.09230732172727585, "learning_rate": 0.01, "loss": 2.0053, "step": 41451 }, { "epoch": 4.252564628641773, "grad_norm": 0.04278898611664772, "learning_rate": 0.01, "loss": 1.9902, "step": 41454 }, { "epoch": 4.252872384078786, "grad_norm": 0.04253445938229561, "learning_rate": 0.01, "loss": 2.0249, "step": 41457 }, { "epoch": 4.253180139515798, "grad_norm": 0.07245481014251709, "learning_rate": 0.01, "loss": 2.0126, "step": 41460 }, { "epoch": 4.253487894952811, "grad_norm": 0.10126817971467972, "learning_rate": 0.01, "loss": 1.9852, "step": 41463 }, { "epoch": 4.253795650389824, "grad_norm": 0.04528803005814552, "learning_rate": 0.01, "loss": 2.0018, "step": 41466 }, { "epoch": 4.254103405826836, "grad_norm": 0.04987543821334839, "learning_rate": 0.01, "loss": 1.9757, "step": 41469 }, { "epoch": 4.254411161263849, "grad_norm": 0.05521645396947861, "learning_rate": 0.01, "loss": 2.0067, "step": 41472 }, { "epoch": 4.254718916700861, "grad_norm": 0.06709643453359604, "learning_rate": 0.01, "loss": 1.9811, "step": 41475 }, { "epoch": 4.255026672137874, "grad_norm": 0.07900220155715942, "learning_rate": 0.01, "loss": 2.0332, "step": 41478 }, { "epoch": 4.255334427574887, "grad_norm": 0.06128143519163132, "learning_rate": 0.01, "loss": 2.0061, "step": 41481 }, { "epoch": 4.2556421830119, "grad_norm": 0.044023312628269196, "learning_rate": 0.01, "loss": 2.0242, "step": 41484 }, { "epoch": 4.255949938448913, "grad_norm": 0.034501735121011734, "learning_rate": 0.01, "loss": 1.9939, "step": 41487 }, { "epoch": 4.256257693885925, "grad_norm": 0.08301688730716705, "learning_rate": 0.01, "loss": 1.9939, "step": 41490 }, { "epoch": 4.256565449322938, "grad_norm": 0.05008229240775108, "learning_rate": 0.01, "loss": 1.9912, "step": 41493 }, { "epoch": 4.256873204759951, "grad_norm": 0.08899793028831482, "learning_rate": 0.01, "loss": 2.0225, "step": 41496 }, { "epoch": 4.257180960196964, "grad_norm": 0.055265627801418304, "learning_rate": 0.01, "loss": 2.0065, "step": 41499 }, { "epoch": 4.2574887156339765, "grad_norm": 0.04912308603525162, "learning_rate": 0.01, "loss": 1.9914, "step": 41502 }, { "epoch": 4.257796471070989, "grad_norm": 0.057400181889534, "learning_rate": 0.01, "loss": 1.9995, "step": 41505 }, { "epoch": 4.258104226508002, "grad_norm": 0.13875073194503784, "learning_rate": 0.01, "loss": 2.0468, "step": 41508 }, { "epoch": 4.258411981945015, "grad_norm": 0.10987017303705215, "learning_rate": 0.01, "loss": 1.9992, "step": 41511 }, { "epoch": 4.258719737382027, "grad_norm": 0.06881477683782578, "learning_rate": 0.01, "loss": 2.0203, "step": 41514 }, { "epoch": 4.2590274928190395, "grad_norm": 0.06515093892812729, "learning_rate": 0.01, "loss": 2.0139, "step": 41517 }, { "epoch": 4.259335248256052, "grad_norm": 0.05030984431505203, "learning_rate": 0.01, "loss": 1.9992, "step": 41520 }, { "epoch": 4.259643003693065, "grad_norm": 0.061900872737169266, "learning_rate": 0.01, "loss": 2.0136, "step": 41523 }, { "epoch": 4.259950759130078, "grad_norm": 0.052526991814374924, "learning_rate": 0.01, "loss": 2.0008, "step": 41526 }, { "epoch": 4.260258514567091, "grad_norm": 0.07355368137359619, "learning_rate": 0.01, "loss": 2.0165, "step": 41529 }, { "epoch": 4.260566270004103, "grad_norm": 0.09191373735666275, "learning_rate": 0.01, "loss": 2.0091, "step": 41532 }, { "epoch": 4.260874025441116, "grad_norm": 0.04627171531319618, "learning_rate": 0.01, "loss": 1.9867, "step": 41535 }, { "epoch": 4.261181780878129, "grad_norm": 0.04639091342687607, "learning_rate": 0.01, "loss": 2.002, "step": 41538 }, { "epoch": 4.261489536315142, "grad_norm": 0.0914679765701294, "learning_rate": 0.01, "loss": 1.9957, "step": 41541 }, { "epoch": 4.2617972917521545, "grad_norm": 0.08489304035902023, "learning_rate": 0.01, "loss": 1.9837, "step": 41544 }, { "epoch": 4.262105047189167, "grad_norm": 0.03301496058702469, "learning_rate": 0.01, "loss": 1.9908, "step": 41547 }, { "epoch": 4.26241280262618, "grad_norm": 0.08343908190727234, "learning_rate": 0.01, "loss": 2.0061, "step": 41550 }, { "epoch": 4.262720558063193, "grad_norm": 0.05923202261328697, "learning_rate": 0.01, "loss": 1.9929, "step": 41553 }, { "epoch": 4.263028313500206, "grad_norm": 0.044505976140499115, "learning_rate": 0.01, "loss": 2.0007, "step": 41556 }, { "epoch": 4.2633360689372175, "grad_norm": 0.05136754736304283, "learning_rate": 0.01, "loss": 1.9989, "step": 41559 }, { "epoch": 4.26364382437423, "grad_norm": 0.0830690786242485, "learning_rate": 0.01, "loss": 2.011, "step": 41562 }, { "epoch": 4.263951579811243, "grad_norm": 0.11335346102714539, "learning_rate": 0.01, "loss": 1.9812, "step": 41565 }, { "epoch": 4.264259335248256, "grad_norm": 0.0629742443561554, "learning_rate": 0.01, "loss": 2.0121, "step": 41568 }, { "epoch": 4.264567090685269, "grad_norm": 0.09297124296426773, "learning_rate": 0.01, "loss": 2.0126, "step": 41571 }, { "epoch": 4.264874846122281, "grad_norm": 0.05171617120504379, "learning_rate": 0.01, "loss": 1.9528, "step": 41574 }, { "epoch": 4.265182601559294, "grad_norm": 0.09433134645223618, "learning_rate": 0.01, "loss": 2.005, "step": 41577 }, { "epoch": 4.265490356996307, "grad_norm": 0.058073755353689194, "learning_rate": 0.01, "loss": 2.0063, "step": 41580 }, { "epoch": 4.26579811243332, "grad_norm": 0.037740156054496765, "learning_rate": 0.01, "loss": 2.0153, "step": 41583 }, { "epoch": 4.2661058678703325, "grad_norm": 0.12522292137145996, "learning_rate": 0.01, "loss": 1.9934, "step": 41586 }, { "epoch": 4.266413623307345, "grad_norm": 0.07263893634080887, "learning_rate": 0.01, "loss": 2.0016, "step": 41589 }, { "epoch": 4.266721378744358, "grad_norm": 0.08800274133682251, "learning_rate": 0.01, "loss": 1.9993, "step": 41592 }, { "epoch": 4.267029134181371, "grad_norm": 0.04018472880125046, "learning_rate": 0.01, "loss": 1.9909, "step": 41595 }, { "epoch": 4.267336889618384, "grad_norm": 0.04245147481560707, "learning_rate": 0.01, "loss": 1.9882, "step": 41598 }, { "epoch": 4.267644645055396, "grad_norm": 0.03626253455877304, "learning_rate": 0.01, "loss": 2.0083, "step": 41601 }, { "epoch": 4.267952400492408, "grad_norm": 0.1148434430360794, "learning_rate": 0.01, "loss": 1.9856, "step": 41604 }, { "epoch": 4.268260155929421, "grad_norm": 0.03992197662591934, "learning_rate": 0.01, "loss": 1.9999, "step": 41607 }, { "epoch": 4.268567911366434, "grad_norm": 0.07781147211790085, "learning_rate": 0.01, "loss": 2.0197, "step": 41610 }, { "epoch": 4.268875666803447, "grad_norm": 0.08838597685098648, "learning_rate": 0.01, "loss": 1.9812, "step": 41613 }, { "epoch": 4.269183422240459, "grad_norm": 0.07930974662303925, "learning_rate": 0.01, "loss": 1.9941, "step": 41616 }, { "epoch": 4.269491177677472, "grad_norm": 0.08094513416290283, "learning_rate": 0.01, "loss": 1.9861, "step": 41619 }, { "epoch": 4.269798933114485, "grad_norm": 0.06766554713249207, "learning_rate": 0.01, "loss": 1.9917, "step": 41622 }, { "epoch": 4.270106688551498, "grad_norm": 0.07384651899337769, "learning_rate": 0.01, "loss": 1.9754, "step": 41625 }, { "epoch": 4.2704144439885106, "grad_norm": 0.043780550360679626, "learning_rate": 0.01, "loss": 2.0226, "step": 41628 }, { "epoch": 4.270722199425523, "grad_norm": 0.06705281883478165, "learning_rate": 0.01, "loss": 2.008, "step": 41631 }, { "epoch": 4.271029954862536, "grad_norm": 0.051532238721847534, "learning_rate": 0.01, "loss": 1.9987, "step": 41634 }, { "epoch": 4.271337710299549, "grad_norm": 0.09668219834566116, "learning_rate": 0.01, "loss": 2.0226, "step": 41637 }, { "epoch": 4.271645465736562, "grad_norm": 0.0774909183382988, "learning_rate": 0.01, "loss": 2.0307, "step": 41640 }, { "epoch": 4.2719532211735745, "grad_norm": 0.11848165839910507, "learning_rate": 0.01, "loss": 2.0057, "step": 41643 }, { "epoch": 4.272260976610587, "grad_norm": 0.09055308997631073, "learning_rate": 0.01, "loss": 1.9871, "step": 41646 }, { "epoch": 4.272568732047599, "grad_norm": 0.06557660549879074, "learning_rate": 0.01, "loss": 1.9897, "step": 41649 }, { "epoch": 4.272876487484612, "grad_norm": 0.055074457079172134, "learning_rate": 0.01, "loss": 1.9692, "step": 41652 }, { "epoch": 4.273184242921625, "grad_norm": 0.07604243606328964, "learning_rate": 0.01, "loss": 1.9998, "step": 41655 }, { "epoch": 4.2734919983586375, "grad_norm": 0.07098755240440369, "learning_rate": 0.01, "loss": 2.0312, "step": 41658 }, { "epoch": 4.27379975379565, "grad_norm": 0.11538244038820267, "learning_rate": 0.01, "loss": 1.9713, "step": 41661 }, { "epoch": 4.274107509232663, "grad_norm": 0.03773383051156998, "learning_rate": 0.01, "loss": 1.999, "step": 41664 }, { "epoch": 4.274415264669676, "grad_norm": 0.03163829445838928, "learning_rate": 0.01, "loss": 1.9699, "step": 41667 }, { "epoch": 4.274723020106689, "grad_norm": 0.0678187757730484, "learning_rate": 0.01, "loss": 1.9944, "step": 41670 }, { "epoch": 4.275030775543701, "grad_norm": 0.0774695947766304, "learning_rate": 0.01, "loss": 1.9878, "step": 41673 }, { "epoch": 4.275338530980714, "grad_norm": 0.05830325931310654, "learning_rate": 0.01, "loss": 2.0059, "step": 41676 }, { "epoch": 4.275646286417727, "grad_norm": 0.11642345786094666, "learning_rate": 0.01, "loss": 1.993, "step": 41679 }, { "epoch": 4.27595404185474, "grad_norm": 0.12688972055912018, "learning_rate": 0.01, "loss": 1.9887, "step": 41682 }, { "epoch": 4.2762617972917525, "grad_norm": 0.11521381884813309, "learning_rate": 0.01, "loss": 2.0166, "step": 41685 }, { "epoch": 4.276569552728765, "grad_norm": 0.1203092560172081, "learning_rate": 0.01, "loss": 2.0148, "step": 41688 }, { "epoch": 4.276877308165778, "grad_norm": 0.09710691124200821, "learning_rate": 0.01, "loss": 2.0192, "step": 41691 }, { "epoch": 4.27718506360279, "grad_norm": 0.07945617288351059, "learning_rate": 0.01, "loss": 2.0106, "step": 41694 }, { "epoch": 4.277492819039803, "grad_norm": 0.07407965511083603, "learning_rate": 0.01, "loss": 2.0017, "step": 41697 }, { "epoch": 4.2778005744768155, "grad_norm": 0.06321244686841965, "learning_rate": 0.01, "loss": 1.9992, "step": 41700 }, { "epoch": 4.278108329913828, "grad_norm": 0.056950200349092484, "learning_rate": 0.01, "loss": 2.0218, "step": 41703 }, { "epoch": 4.278416085350841, "grad_norm": 0.039905134588479996, "learning_rate": 0.01, "loss": 1.9876, "step": 41706 }, { "epoch": 4.278723840787854, "grad_norm": 0.05050415173172951, "learning_rate": 0.01, "loss": 2.0038, "step": 41709 }, { "epoch": 4.279031596224867, "grad_norm": 0.05510304123163223, "learning_rate": 0.01, "loss": 2.0015, "step": 41712 }, { "epoch": 4.279339351661879, "grad_norm": 0.05586516857147217, "learning_rate": 0.01, "loss": 2.0227, "step": 41715 }, { "epoch": 4.279647107098892, "grad_norm": 0.05404721572995186, "learning_rate": 0.01, "loss": 1.985, "step": 41718 }, { "epoch": 4.279954862535905, "grad_norm": 0.04340701550245285, "learning_rate": 0.01, "loss": 2.0015, "step": 41721 }, { "epoch": 4.280262617972918, "grad_norm": 0.053679756820201874, "learning_rate": 0.01, "loss": 1.9983, "step": 41724 }, { "epoch": 4.2805703734099305, "grad_norm": 0.1286962479352951, "learning_rate": 0.01, "loss": 2.0077, "step": 41727 }, { "epoch": 4.280878128846943, "grad_norm": 0.07245181500911713, "learning_rate": 0.01, "loss": 1.9974, "step": 41730 }, { "epoch": 4.281185884283956, "grad_norm": 0.08518802374601364, "learning_rate": 0.01, "loss": 2.0305, "step": 41733 }, { "epoch": 4.281493639720969, "grad_norm": 0.07493653893470764, "learning_rate": 0.01, "loss": 1.9955, "step": 41736 }, { "epoch": 4.281801395157981, "grad_norm": 0.05031900852918625, "learning_rate": 0.01, "loss": 1.981, "step": 41739 }, { "epoch": 4.2821091505949935, "grad_norm": 0.07814056426286697, "learning_rate": 0.01, "loss": 1.9944, "step": 41742 }, { "epoch": 4.282416906032006, "grad_norm": 0.047507043927907944, "learning_rate": 0.01, "loss": 2.0129, "step": 41745 }, { "epoch": 4.282724661469019, "grad_norm": 0.06547687947750092, "learning_rate": 0.01, "loss": 2.0094, "step": 41748 }, { "epoch": 4.283032416906032, "grad_norm": 0.059194277971982956, "learning_rate": 0.01, "loss": 2.003, "step": 41751 }, { "epoch": 4.283340172343045, "grad_norm": 0.04526950791478157, "learning_rate": 0.01, "loss": 1.9877, "step": 41754 }, { "epoch": 4.283647927780057, "grad_norm": 0.031040268018841743, "learning_rate": 0.01, "loss": 1.9665, "step": 41757 }, { "epoch": 4.28395568321707, "grad_norm": 0.09988913685083389, "learning_rate": 0.01, "loss": 2.0104, "step": 41760 }, { "epoch": 4.284263438654083, "grad_norm": 0.04122784361243248, "learning_rate": 0.01, "loss": 1.9875, "step": 41763 }, { "epoch": 4.284571194091096, "grad_norm": 0.055508848279714584, "learning_rate": 0.01, "loss": 2.0457, "step": 41766 }, { "epoch": 4.2848789495281085, "grad_norm": 0.10151500999927521, "learning_rate": 0.01, "loss": 1.9868, "step": 41769 }, { "epoch": 4.285186704965121, "grad_norm": 0.041592396795749664, "learning_rate": 0.01, "loss": 1.9835, "step": 41772 }, { "epoch": 4.285494460402134, "grad_norm": 0.06108968332409859, "learning_rate": 0.01, "loss": 2.0195, "step": 41775 }, { "epoch": 4.285802215839147, "grad_norm": 0.06336821615695953, "learning_rate": 0.01, "loss": 1.9743, "step": 41778 }, { "epoch": 4.28610997127616, "grad_norm": 0.03348274528980255, "learning_rate": 0.01, "loss": 1.9822, "step": 41781 }, { "epoch": 4.286417726713172, "grad_norm": 0.04698178544640541, "learning_rate": 0.01, "loss": 2.0001, "step": 41784 }, { "epoch": 4.286725482150184, "grad_norm": 0.04394696652889252, "learning_rate": 0.01, "loss": 2.0148, "step": 41787 }, { "epoch": 4.287033237587197, "grad_norm": 0.15938106179237366, "learning_rate": 0.01, "loss": 2.012, "step": 41790 }, { "epoch": 4.28734099302421, "grad_norm": 0.08870629966259003, "learning_rate": 0.01, "loss": 1.9863, "step": 41793 }, { "epoch": 4.287648748461223, "grad_norm": 0.051104262471199036, "learning_rate": 0.01, "loss": 1.9954, "step": 41796 }, { "epoch": 4.2879565038982355, "grad_norm": 0.03226442262530327, "learning_rate": 0.01, "loss": 2.0087, "step": 41799 }, { "epoch": 4.288264259335248, "grad_norm": 0.033240124583244324, "learning_rate": 0.01, "loss": 2.0012, "step": 41802 }, { "epoch": 4.288572014772261, "grad_norm": 0.04799087345600128, "learning_rate": 0.01, "loss": 1.9862, "step": 41805 }, { "epoch": 4.288879770209274, "grad_norm": 0.04805191978812218, "learning_rate": 0.01, "loss": 1.998, "step": 41808 }, { "epoch": 4.289187525646287, "grad_norm": 0.10758639872074127, "learning_rate": 0.01, "loss": 2.0237, "step": 41811 }, { "epoch": 4.289495281083299, "grad_norm": 0.06141864135861397, "learning_rate": 0.01, "loss": 1.9953, "step": 41814 }, { "epoch": 4.289803036520312, "grad_norm": 0.057809922844171524, "learning_rate": 0.01, "loss": 1.998, "step": 41817 }, { "epoch": 4.290110791957325, "grad_norm": 0.06831464916467667, "learning_rate": 0.01, "loss": 2.0054, "step": 41820 }, { "epoch": 4.290418547394338, "grad_norm": 0.08271254599094391, "learning_rate": 0.01, "loss": 1.9823, "step": 41823 }, { "epoch": 4.2907263028313505, "grad_norm": 0.04244139790534973, "learning_rate": 0.01, "loss": 1.9897, "step": 41826 }, { "epoch": 4.291034058268362, "grad_norm": 0.04482168331742287, "learning_rate": 0.01, "loss": 2.0228, "step": 41829 }, { "epoch": 4.291341813705375, "grad_norm": 0.04742707312107086, "learning_rate": 0.01, "loss": 2.0172, "step": 41832 }, { "epoch": 4.291649569142388, "grad_norm": 0.12863384187221527, "learning_rate": 0.01, "loss": 1.996, "step": 41835 }, { "epoch": 4.291957324579401, "grad_norm": 0.12212485820055008, "learning_rate": 0.01, "loss": 1.9976, "step": 41838 }, { "epoch": 4.2922650800164135, "grad_norm": 0.0465460866689682, "learning_rate": 0.01, "loss": 2.0078, "step": 41841 }, { "epoch": 4.292572835453426, "grad_norm": 0.052605509757995605, "learning_rate": 0.01, "loss": 2.0019, "step": 41844 }, { "epoch": 4.292880590890439, "grad_norm": 0.046292662620544434, "learning_rate": 0.01, "loss": 2.0077, "step": 41847 }, { "epoch": 4.293188346327452, "grad_norm": 0.04217211529612541, "learning_rate": 0.01, "loss": 2.0, "step": 41850 }, { "epoch": 4.293496101764465, "grad_norm": 0.08655425906181335, "learning_rate": 0.01, "loss": 1.9707, "step": 41853 }, { "epoch": 4.293803857201477, "grad_norm": 0.0645185261964798, "learning_rate": 0.01, "loss": 2.0036, "step": 41856 }, { "epoch": 4.29411161263849, "grad_norm": 0.06806469708681107, "learning_rate": 0.01, "loss": 2.0034, "step": 41859 }, { "epoch": 4.294419368075503, "grad_norm": 0.05178931728005409, "learning_rate": 0.01, "loss": 1.9923, "step": 41862 }, { "epoch": 4.294727123512516, "grad_norm": 0.0440845787525177, "learning_rate": 0.01, "loss": 2.0183, "step": 41865 }, { "epoch": 4.2950348789495285, "grad_norm": 0.04800264537334442, "learning_rate": 0.01, "loss": 1.9908, "step": 41868 }, { "epoch": 4.295342634386541, "grad_norm": 0.03780617192387581, "learning_rate": 0.01, "loss": 2.0051, "step": 41871 }, { "epoch": 4.295650389823553, "grad_norm": 0.0536942295730114, "learning_rate": 0.01, "loss": 1.999, "step": 41874 }, { "epoch": 4.295958145260566, "grad_norm": 0.09988999366760254, "learning_rate": 0.01, "loss": 1.9926, "step": 41877 }, { "epoch": 4.296265900697579, "grad_norm": 0.06863691657781601, "learning_rate": 0.01, "loss": 2.0076, "step": 41880 }, { "epoch": 4.2965736561345915, "grad_norm": 0.04894992709159851, "learning_rate": 0.01, "loss": 1.9836, "step": 41883 }, { "epoch": 4.296881411571604, "grad_norm": 0.041689950972795486, "learning_rate": 0.01, "loss": 2.0101, "step": 41886 }, { "epoch": 4.297189167008617, "grad_norm": 0.10208067297935486, "learning_rate": 0.01, "loss": 1.9957, "step": 41889 }, { "epoch": 4.29749692244563, "grad_norm": 0.030840028077363968, "learning_rate": 0.01, "loss": 1.991, "step": 41892 }, { "epoch": 4.297804677882643, "grad_norm": 0.04242958873510361, "learning_rate": 0.01, "loss": 1.9992, "step": 41895 }, { "epoch": 4.298112433319655, "grad_norm": 0.11578787118196487, "learning_rate": 0.01, "loss": 2.0139, "step": 41898 }, { "epoch": 4.298420188756668, "grad_norm": 0.059238482266664505, "learning_rate": 0.01, "loss": 2.0087, "step": 41901 }, { "epoch": 4.298727944193681, "grad_norm": 0.03906838968396187, "learning_rate": 0.01, "loss": 2.0162, "step": 41904 }, { "epoch": 4.299035699630694, "grad_norm": 0.06461112201213837, "learning_rate": 0.01, "loss": 1.9819, "step": 41907 }, { "epoch": 4.2993434550677065, "grad_norm": 0.1491340696811676, "learning_rate": 0.01, "loss": 2.0193, "step": 41910 }, { "epoch": 4.299651210504719, "grad_norm": 0.06621528416872025, "learning_rate": 0.01, "loss": 1.9906, "step": 41913 }, { "epoch": 4.299958965941732, "grad_norm": 0.06322132796049118, "learning_rate": 0.01, "loss": 1.9737, "step": 41916 }, { "epoch": 4.300266721378744, "grad_norm": 0.04899013042449951, "learning_rate": 0.01, "loss": 1.996, "step": 41919 }, { "epoch": 4.300574476815757, "grad_norm": 0.04941810667514801, "learning_rate": 0.01, "loss": 1.9674, "step": 41922 }, { "epoch": 4.3008822322527696, "grad_norm": 0.04341074824333191, "learning_rate": 0.01, "loss": 1.9842, "step": 41925 }, { "epoch": 4.301189987689782, "grad_norm": 0.08827680349349976, "learning_rate": 0.01, "loss": 2.0119, "step": 41928 }, { "epoch": 4.301497743126795, "grad_norm": 0.05221998319029808, "learning_rate": 0.01, "loss": 2.0007, "step": 41931 }, { "epoch": 4.301805498563808, "grad_norm": 0.10630286484956741, "learning_rate": 0.01, "loss": 2.0174, "step": 41934 }, { "epoch": 4.302113254000821, "grad_norm": 0.043958306312561035, "learning_rate": 0.01, "loss": 1.9803, "step": 41937 }, { "epoch": 4.3024210094378335, "grad_norm": 0.057621411979198456, "learning_rate": 0.01, "loss": 2.01, "step": 41940 }, { "epoch": 4.302728764874846, "grad_norm": 0.09467203170061111, "learning_rate": 0.01, "loss": 2.0104, "step": 41943 }, { "epoch": 4.303036520311859, "grad_norm": 0.06720858812332153, "learning_rate": 0.01, "loss": 1.9772, "step": 41946 }, { "epoch": 4.303344275748872, "grad_norm": 0.06861304491758347, "learning_rate": 0.01, "loss": 1.9955, "step": 41949 }, { "epoch": 4.303652031185885, "grad_norm": 0.038296621292829514, "learning_rate": 0.01, "loss": 1.991, "step": 41952 }, { "epoch": 4.303959786622897, "grad_norm": 0.05808268114924431, "learning_rate": 0.01, "loss": 2.0097, "step": 41955 }, { "epoch": 4.30426754205991, "grad_norm": 0.05521778017282486, "learning_rate": 0.01, "loss": 1.995, "step": 41958 }, { "epoch": 4.304575297496923, "grad_norm": 0.05509946867823601, "learning_rate": 0.01, "loss": 2.0079, "step": 41961 }, { "epoch": 4.304883052933935, "grad_norm": 0.04729253426194191, "learning_rate": 0.01, "loss": 1.9928, "step": 41964 }, { "epoch": 4.305190808370948, "grad_norm": 0.06389732658863068, "learning_rate": 0.01, "loss": 1.9875, "step": 41967 }, { "epoch": 4.30549856380796, "grad_norm": 0.04609325900673866, "learning_rate": 0.01, "loss": 2.0058, "step": 41970 }, { "epoch": 4.305806319244973, "grad_norm": 0.06793837994337082, "learning_rate": 0.01, "loss": 1.9955, "step": 41973 }, { "epoch": 4.306114074681986, "grad_norm": 0.0791841521859169, "learning_rate": 0.01, "loss": 1.9659, "step": 41976 }, { "epoch": 4.306421830118999, "grad_norm": 0.07721048593521118, "learning_rate": 0.01, "loss": 1.9936, "step": 41979 }, { "epoch": 4.3067295855560115, "grad_norm": 0.04971477389335632, "learning_rate": 0.01, "loss": 1.971, "step": 41982 }, { "epoch": 4.307037340993024, "grad_norm": 0.11230115592479706, "learning_rate": 0.01, "loss": 2.013, "step": 41985 }, { "epoch": 4.307345096430037, "grad_norm": 0.08187992870807648, "learning_rate": 0.01, "loss": 2.0102, "step": 41988 }, { "epoch": 4.30765285186705, "grad_norm": 0.07407639175653458, "learning_rate": 0.01, "loss": 1.9617, "step": 41991 }, { "epoch": 4.307960607304063, "grad_norm": 0.051022280007600784, "learning_rate": 0.01, "loss": 1.9964, "step": 41994 }, { "epoch": 4.308268362741075, "grad_norm": 0.041618864983320236, "learning_rate": 0.01, "loss": 1.9961, "step": 41997 }, { "epoch": 4.308576118178088, "grad_norm": 0.0507926307618618, "learning_rate": 0.01, "loss": 1.9752, "step": 42000 }, { "epoch": 4.308883873615101, "grad_norm": 0.07467113435268402, "learning_rate": 0.01, "loss": 2.0045, "step": 42003 }, { "epoch": 4.309191629052114, "grad_norm": 0.06563274562358856, "learning_rate": 0.01, "loss": 2.0079, "step": 42006 }, { "epoch": 4.309499384489126, "grad_norm": 0.05920867621898651, "learning_rate": 0.01, "loss": 1.9677, "step": 42009 }, { "epoch": 4.309807139926138, "grad_norm": 0.12882298231124878, "learning_rate": 0.01, "loss": 2.0038, "step": 42012 }, { "epoch": 4.310114895363151, "grad_norm": 0.07217466831207275, "learning_rate": 0.01, "loss": 2.018, "step": 42015 }, { "epoch": 4.310422650800164, "grad_norm": 0.06030949205160141, "learning_rate": 0.01, "loss": 2.0043, "step": 42018 }, { "epoch": 4.310730406237177, "grad_norm": 0.04477029666304588, "learning_rate": 0.01, "loss": 1.9672, "step": 42021 }, { "epoch": 4.3110381616741895, "grad_norm": 0.042469825595617294, "learning_rate": 0.01, "loss": 2.0094, "step": 42024 }, { "epoch": 4.311345917111202, "grad_norm": 0.061194293200969696, "learning_rate": 0.01, "loss": 1.9986, "step": 42027 }, { "epoch": 4.311653672548215, "grad_norm": 0.07249957323074341, "learning_rate": 0.01, "loss": 1.9978, "step": 42030 }, { "epoch": 4.311961427985228, "grad_norm": 0.1172809898853302, "learning_rate": 0.01, "loss": 1.9815, "step": 42033 }, { "epoch": 4.312269183422241, "grad_norm": 0.05327514931559563, "learning_rate": 0.01, "loss": 2.0115, "step": 42036 }, { "epoch": 4.312576938859253, "grad_norm": 0.06902623176574707, "learning_rate": 0.01, "loss": 1.9916, "step": 42039 }, { "epoch": 4.312884694296266, "grad_norm": 0.039753060787916183, "learning_rate": 0.01, "loss": 1.9833, "step": 42042 }, { "epoch": 4.313192449733279, "grad_norm": 0.047289080917835236, "learning_rate": 0.01, "loss": 2.0063, "step": 42045 }, { "epoch": 4.313500205170292, "grad_norm": 0.11222794651985168, "learning_rate": 0.01, "loss": 2.008, "step": 42048 }, { "epoch": 4.313807960607304, "grad_norm": 0.09836508333683014, "learning_rate": 0.01, "loss": 2.0148, "step": 42051 }, { "epoch": 4.314115716044316, "grad_norm": 0.06988558173179626, "learning_rate": 0.01, "loss": 2.007, "step": 42054 }, { "epoch": 4.314423471481329, "grad_norm": 0.05915610492229462, "learning_rate": 0.01, "loss": 2.0016, "step": 42057 }, { "epoch": 4.314731226918342, "grad_norm": 0.14300785958766937, "learning_rate": 0.01, "loss": 1.9995, "step": 42060 }, { "epoch": 4.315038982355355, "grad_norm": 0.055672645568847656, "learning_rate": 0.01, "loss": 1.9658, "step": 42063 }, { "epoch": 4.3153467377923675, "grad_norm": 0.07753617316484451, "learning_rate": 0.01, "loss": 1.9656, "step": 42066 }, { "epoch": 4.31565449322938, "grad_norm": 0.04788988083600998, "learning_rate": 0.01, "loss": 2.0013, "step": 42069 }, { "epoch": 4.315962248666393, "grad_norm": 0.04761343449354172, "learning_rate": 0.01, "loss": 2.0033, "step": 42072 }, { "epoch": 4.316270004103406, "grad_norm": 0.051400624215602875, "learning_rate": 0.01, "loss": 2.0074, "step": 42075 }, { "epoch": 4.316577759540419, "grad_norm": 0.050774481147527695, "learning_rate": 0.01, "loss": 1.9822, "step": 42078 }, { "epoch": 4.3168855149774314, "grad_norm": 0.08724766224622726, "learning_rate": 0.01, "loss": 1.9984, "step": 42081 }, { "epoch": 4.317193270414444, "grad_norm": 0.07128842920064926, "learning_rate": 0.01, "loss": 1.9701, "step": 42084 }, { "epoch": 4.317501025851457, "grad_norm": 0.16834615170955658, "learning_rate": 0.01, "loss": 1.9971, "step": 42087 }, { "epoch": 4.31780878128847, "grad_norm": 0.07254631817340851, "learning_rate": 0.01, "loss": 1.962, "step": 42090 }, { "epoch": 4.318116536725482, "grad_norm": 0.0355360321700573, "learning_rate": 0.01, "loss": 2.0011, "step": 42093 }, { "epoch": 4.3184242921624945, "grad_norm": 0.03480659797787666, "learning_rate": 0.01, "loss": 2.0197, "step": 42096 }, { "epoch": 4.318732047599507, "grad_norm": 0.04848271980881691, "learning_rate": 0.01, "loss": 2.003, "step": 42099 }, { "epoch": 4.31903980303652, "grad_norm": 0.047225676476955414, "learning_rate": 0.01, "loss": 2.0117, "step": 42102 }, { "epoch": 4.319347558473533, "grad_norm": 0.07466583698987961, "learning_rate": 0.01, "loss": 1.9881, "step": 42105 }, { "epoch": 4.319655313910546, "grad_norm": 0.03580273687839508, "learning_rate": 0.01, "loss": 2.0099, "step": 42108 }, { "epoch": 4.319963069347558, "grad_norm": 0.0513874776661396, "learning_rate": 0.01, "loss": 1.9978, "step": 42111 }, { "epoch": 4.320270824784571, "grad_norm": 0.08838716894388199, "learning_rate": 0.01, "loss": 2.0022, "step": 42114 }, { "epoch": 4.320578580221584, "grad_norm": 0.048829689621925354, "learning_rate": 0.01, "loss": 2.0228, "step": 42117 }, { "epoch": 4.320886335658597, "grad_norm": 0.0572056770324707, "learning_rate": 0.01, "loss": 2.0366, "step": 42120 }, { "epoch": 4.3211940910956095, "grad_norm": 0.0953284278512001, "learning_rate": 0.01, "loss": 1.99, "step": 42123 }, { "epoch": 4.321501846532622, "grad_norm": 0.08970024436712265, "learning_rate": 0.01, "loss": 2.0048, "step": 42126 }, { "epoch": 4.321809601969635, "grad_norm": 0.03790782392024994, "learning_rate": 0.01, "loss": 2.017, "step": 42129 }, { "epoch": 4.322117357406648, "grad_norm": 0.11770451813936234, "learning_rate": 0.01, "loss": 1.9909, "step": 42132 }, { "epoch": 4.322425112843661, "grad_norm": 0.08634433895349503, "learning_rate": 0.01, "loss": 2.0067, "step": 42135 }, { "epoch": 4.3227328682806725, "grad_norm": 0.08693015575408936, "learning_rate": 0.01, "loss": 1.994, "step": 42138 }, { "epoch": 4.323040623717685, "grad_norm": 0.07767558842897415, "learning_rate": 0.01, "loss": 2.0127, "step": 42141 }, { "epoch": 4.323348379154698, "grad_norm": 0.03783591464161873, "learning_rate": 0.01, "loss": 1.9699, "step": 42144 }, { "epoch": 4.323656134591711, "grad_norm": 0.036055248230695724, "learning_rate": 0.01, "loss": 2.0056, "step": 42147 }, { "epoch": 4.323963890028724, "grad_norm": 0.056252893060445786, "learning_rate": 0.01, "loss": 2.0124, "step": 42150 }, { "epoch": 4.324271645465736, "grad_norm": 0.07847966253757477, "learning_rate": 0.01, "loss": 2.007, "step": 42153 }, { "epoch": 4.324579400902749, "grad_norm": 0.07340630888938904, "learning_rate": 0.01, "loss": 2.0161, "step": 42156 }, { "epoch": 4.324887156339762, "grad_norm": 0.0631684884428978, "learning_rate": 0.01, "loss": 1.9991, "step": 42159 }, { "epoch": 4.325194911776775, "grad_norm": 0.03661903738975525, "learning_rate": 0.01, "loss": 2.0048, "step": 42162 }, { "epoch": 4.3255026672137875, "grad_norm": 0.06492697447538376, "learning_rate": 0.01, "loss": 1.9726, "step": 42165 }, { "epoch": 4.3258104226508, "grad_norm": 0.07288751751184464, "learning_rate": 0.01, "loss": 1.9985, "step": 42168 }, { "epoch": 4.326118178087813, "grad_norm": 0.04615149274468422, "learning_rate": 0.01, "loss": 1.9835, "step": 42171 }, { "epoch": 4.326425933524826, "grad_norm": 0.06735312938690186, "learning_rate": 0.01, "loss": 1.9959, "step": 42174 }, { "epoch": 4.326733688961839, "grad_norm": 0.05109792947769165, "learning_rate": 0.01, "loss": 2.0274, "step": 42177 }, { "epoch": 4.327041444398851, "grad_norm": 0.047006141394376755, "learning_rate": 0.01, "loss": 2.0182, "step": 42180 }, { "epoch": 4.327349199835863, "grad_norm": 0.11803962290287018, "learning_rate": 0.01, "loss": 2.0149, "step": 42183 }, { "epoch": 4.327656955272876, "grad_norm": 0.06282762438058853, "learning_rate": 0.01, "loss": 1.9997, "step": 42186 }, { "epoch": 4.327964710709889, "grad_norm": 0.07451837509870529, "learning_rate": 0.01, "loss": 1.9834, "step": 42189 }, { "epoch": 4.328272466146902, "grad_norm": 0.05453307554125786, "learning_rate": 0.01, "loss": 2.0096, "step": 42192 }, { "epoch": 4.328580221583914, "grad_norm": 0.03320490941405296, "learning_rate": 0.01, "loss": 1.97, "step": 42195 }, { "epoch": 4.328887977020927, "grad_norm": 0.03890087082982063, "learning_rate": 0.01, "loss": 2.0277, "step": 42198 }, { "epoch": 4.32919573245794, "grad_norm": 0.04567558318376541, "learning_rate": 0.01, "loss": 2.0203, "step": 42201 }, { "epoch": 4.329503487894953, "grad_norm": 0.11574912816286087, "learning_rate": 0.01, "loss": 2.0025, "step": 42204 }, { "epoch": 4.3298112433319655, "grad_norm": 0.1619519144296646, "learning_rate": 0.01, "loss": 2.0264, "step": 42207 }, { "epoch": 4.330118998768978, "grad_norm": 0.07246614992618561, "learning_rate": 0.01, "loss": 1.9717, "step": 42210 }, { "epoch": 4.330426754205991, "grad_norm": 0.06830492615699768, "learning_rate": 0.01, "loss": 2.0022, "step": 42213 }, { "epoch": 4.330734509643004, "grad_norm": 0.03900605067610741, "learning_rate": 0.01, "loss": 2.005, "step": 42216 }, { "epoch": 4.331042265080017, "grad_norm": 0.04035233333706856, "learning_rate": 0.01, "loss": 1.9802, "step": 42219 }, { "epoch": 4.331350020517029, "grad_norm": 0.08681115508079529, "learning_rate": 0.01, "loss": 1.9944, "step": 42222 }, { "epoch": 4.331657775954042, "grad_norm": 0.10521326214075089, "learning_rate": 0.01, "loss": 1.9983, "step": 42225 }, { "epoch": 4.331965531391054, "grad_norm": 0.05561526492238045, "learning_rate": 0.01, "loss": 1.9738, "step": 42228 }, { "epoch": 4.332273286828067, "grad_norm": 0.10138184577226639, "learning_rate": 0.01, "loss": 2.0263, "step": 42231 }, { "epoch": 4.33258104226508, "grad_norm": 0.05151226371526718, "learning_rate": 0.01, "loss": 1.997, "step": 42234 }, { "epoch": 4.3328887977020925, "grad_norm": 0.04308367893099785, "learning_rate": 0.01, "loss": 1.989, "step": 42237 }, { "epoch": 4.333196553139105, "grad_norm": 0.09749255329370499, "learning_rate": 0.01, "loss": 2.0007, "step": 42240 }, { "epoch": 4.333504308576118, "grad_norm": 0.078557088971138, "learning_rate": 0.01, "loss": 1.9958, "step": 42243 }, { "epoch": 4.333812064013131, "grad_norm": 0.05035284161567688, "learning_rate": 0.01, "loss": 1.9977, "step": 42246 }, { "epoch": 4.334119819450144, "grad_norm": 0.08703385293483734, "learning_rate": 0.01, "loss": 1.9876, "step": 42249 }, { "epoch": 4.334427574887156, "grad_norm": 0.06751354783773422, "learning_rate": 0.01, "loss": 1.9668, "step": 42252 }, { "epoch": 4.334735330324169, "grad_norm": 0.06264498084783554, "learning_rate": 0.01, "loss": 2.0163, "step": 42255 }, { "epoch": 4.335043085761182, "grad_norm": 0.08927709609270096, "learning_rate": 0.01, "loss": 1.9938, "step": 42258 }, { "epoch": 4.335350841198195, "grad_norm": 0.0882975161075592, "learning_rate": 0.01, "loss": 1.982, "step": 42261 }, { "epoch": 4.3356585966352075, "grad_norm": 0.08568523824214935, "learning_rate": 0.01, "loss": 1.9992, "step": 42264 }, { "epoch": 4.33596635207222, "grad_norm": 0.10143941640853882, "learning_rate": 0.01, "loss": 2.0098, "step": 42267 }, { "epoch": 4.336274107509233, "grad_norm": 0.04442453384399414, "learning_rate": 0.01, "loss": 1.9998, "step": 42270 }, { "epoch": 4.336581862946245, "grad_norm": 0.04202549159526825, "learning_rate": 0.01, "loss": 2.0177, "step": 42273 }, { "epoch": 4.336889618383258, "grad_norm": 0.07024986296892166, "learning_rate": 0.01, "loss": 1.9997, "step": 42276 }, { "epoch": 4.3371973738202705, "grad_norm": 0.0714881494641304, "learning_rate": 0.01, "loss": 1.9988, "step": 42279 }, { "epoch": 4.337505129257283, "grad_norm": 0.05677810311317444, "learning_rate": 0.01, "loss": 2.0163, "step": 42282 }, { "epoch": 4.337812884694296, "grad_norm": 0.08475086092948914, "learning_rate": 0.01, "loss": 1.9804, "step": 42285 }, { "epoch": 4.338120640131309, "grad_norm": 0.05561790242791176, "learning_rate": 0.01, "loss": 1.9952, "step": 42288 }, { "epoch": 4.338428395568322, "grad_norm": 0.0845925584435463, "learning_rate": 0.01, "loss": 1.9964, "step": 42291 }, { "epoch": 4.338736151005334, "grad_norm": 0.07267442345619202, "learning_rate": 0.01, "loss": 1.9967, "step": 42294 }, { "epoch": 4.339043906442347, "grad_norm": 0.06486202031373978, "learning_rate": 0.01, "loss": 1.9915, "step": 42297 }, { "epoch": 4.33935166187936, "grad_norm": 0.08498860895633698, "learning_rate": 0.01, "loss": 1.9849, "step": 42300 }, { "epoch": 4.339659417316373, "grad_norm": 0.05737947300076485, "learning_rate": 0.01, "loss": 2.0168, "step": 42303 }, { "epoch": 4.3399671727533855, "grad_norm": 0.1146705150604248, "learning_rate": 0.01, "loss": 1.9874, "step": 42306 }, { "epoch": 4.340274928190398, "grad_norm": 0.06821808218955994, "learning_rate": 0.01, "loss": 2.003, "step": 42309 }, { "epoch": 4.340582683627411, "grad_norm": 0.06554093956947327, "learning_rate": 0.01, "loss": 2.0051, "step": 42312 }, { "epoch": 4.340890439064424, "grad_norm": 0.055730611085891724, "learning_rate": 0.01, "loss": 1.9784, "step": 42315 }, { "epoch": 4.341198194501436, "grad_norm": 0.12713190913200378, "learning_rate": 0.01, "loss": 1.9957, "step": 42318 }, { "epoch": 4.3415059499384485, "grad_norm": 0.04995013028383255, "learning_rate": 0.01, "loss": 1.9892, "step": 42321 }, { "epoch": 4.341813705375461, "grad_norm": 0.05785641819238663, "learning_rate": 0.01, "loss": 2.001, "step": 42324 }, { "epoch": 4.342121460812474, "grad_norm": 0.06933962553739548, "learning_rate": 0.01, "loss": 1.9708, "step": 42327 }, { "epoch": 4.342429216249487, "grad_norm": 0.08787462115287781, "learning_rate": 0.01, "loss": 1.9808, "step": 42330 }, { "epoch": 4.3427369716865, "grad_norm": 0.03759448602795601, "learning_rate": 0.01, "loss": 2.0184, "step": 42333 }, { "epoch": 4.343044727123512, "grad_norm": 0.08787883818149567, "learning_rate": 0.01, "loss": 2.0153, "step": 42336 }, { "epoch": 4.343352482560525, "grad_norm": 0.07424912601709366, "learning_rate": 0.01, "loss": 2.0099, "step": 42339 }, { "epoch": 4.343660237997538, "grad_norm": 0.08268291503190994, "learning_rate": 0.01, "loss": 2.0116, "step": 42342 }, { "epoch": 4.343967993434551, "grad_norm": 0.09832719713449478, "learning_rate": 0.01, "loss": 2.0129, "step": 42345 }, { "epoch": 4.3442757488715635, "grad_norm": 0.08465267717838287, "learning_rate": 0.01, "loss": 1.9878, "step": 42348 }, { "epoch": 4.344583504308576, "grad_norm": 0.08722463995218277, "learning_rate": 0.01, "loss": 2.0249, "step": 42351 }, { "epoch": 4.344891259745589, "grad_norm": 0.03291288763284683, "learning_rate": 0.01, "loss": 1.9878, "step": 42354 }, { "epoch": 4.345199015182602, "grad_norm": 0.05389249697327614, "learning_rate": 0.01, "loss": 1.9989, "step": 42357 }, { "epoch": 4.345506770619615, "grad_norm": 0.057934414595365524, "learning_rate": 0.01, "loss": 2.0047, "step": 42360 }, { "epoch": 4.3458145260566265, "grad_norm": 0.09527049958705902, "learning_rate": 0.01, "loss": 1.996, "step": 42363 }, { "epoch": 4.346122281493639, "grad_norm": 0.09653705358505249, "learning_rate": 0.01, "loss": 2.0058, "step": 42366 }, { "epoch": 4.346430036930652, "grad_norm": 0.050867971032857895, "learning_rate": 0.01, "loss": 2.0043, "step": 42369 }, { "epoch": 4.346737792367665, "grad_norm": 0.17523349821567535, "learning_rate": 0.01, "loss": 1.9933, "step": 42372 }, { "epoch": 4.347045547804678, "grad_norm": 0.04011973738670349, "learning_rate": 0.01, "loss": 2.0147, "step": 42375 }, { "epoch": 4.3473533032416904, "grad_norm": 0.0476989708840847, "learning_rate": 0.01, "loss": 2.0088, "step": 42378 }, { "epoch": 4.347661058678703, "grad_norm": 0.062353190034627914, "learning_rate": 0.01, "loss": 2.0106, "step": 42381 }, { "epoch": 4.347968814115716, "grad_norm": 0.03900708258152008, "learning_rate": 0.01, "loss": 2.0074, "step": 42384 }, { "epoch": 4.348276569552729, "grad_norm": 0.05116381496191025, "learning_rate": 0.01, "loss": 2.0263, "step": 42387 }, { "epoch": 4.348584324989742, "grad_norm": 0.0740213617682457, "learning_rate": 0.01, "loss": 1.9911, "step": 42390 }, { "epoch": 4.348892080426754, "grad_norm": 0.04419323056936264, "learning_rate": 0.01, "loss": 1.9859, "step": 42393 }, { "epoch": 4.349199835863767, "grad_norm": 0.09228520840406418, "learning_rate": 0.01, "loss": 1.9902, "step": 42396 }, { "epoch": 4.34950759130078, "grad_norm": 0.06969837844371796, "learning_rate": 0.01, "loss": 2.0195, "step": 42399 }, { "epoch": 4.349815346737793, "grad_norm": 0.06493277847766876, "learning_rate": 0.01, "loss": 1.9871, "step": 42402 }, { "epoch": 4.3501231021748055, "grad_norm": 0.10470300912857056, "learning_rate": 0.01, "loss": 1.9829, "step": 42405 }, { "epoch": 4.350430857611817, "grad_norm": 0.055797114968299866, "learning_rate": 0.01, "loss": 1.9753, "step": 42408 }, { "epoch": 4.35073861304883, "grad_norm": 0.08295798301696777, "learning_rate": 0.01, "loss": 2.0024, "step": 42411 }, { "epoch": 4.351046368485843, "grad_norm": 0.03319832682609558, "learning_rate": 0.01, "loss": 1.9977, "step": 42414 }, { "epoch": 4.351354123922856, "grad_norm": 0.08333670347929001, "learning_rate": 0.01, "loss": 1.9832, "step": 42417 }, { "epoch": 4.3516618793598685, "grad_norm": 0.15636205673217773, "learning_rate": 0.01, "loss": 2.0047, "step": 42420 }, { "epoch": 4.351969634796881, "grad_norm": 0.04145883768796921, "learning_rate": 0.01, "loss": 1.9993, "step": 42423 }, { "epoch": 4.352277390233894, "grad_norm": 0.06319641321897507, "learning_rate": 0.01, "loss": 2.0072, "step": 42426 }, { "epoch": 4.352585145670907, "grad_norm": 0.10287382453680038, "learning_rate": 0.01, "loss": 2.0008, "step": 42429 }, { "epoch": 4.35289290110792, "grad_norm": 0.05073247849941254, "learning_rate": 0.01, "loss": 2.0024, "step": 42432 }, { "epoch": 4.353200656544932, "grad_norm": 0.05621569603681564, "learning_rate": 0.01, "loss": 2.006, "step": 42435 }, { "epoch": 4.353508411981945, "grad_norm": 0.044255949556827545, "learning_rate": 0.01, "loss": 2.0127, "step": 42438 }, { "epoch": 4.353816167418958, "grad_norm": 0.09396500885486603, "learning_rate": 0.01, "loss": 2.011, "step": 42441 }, { "epoch": 4.354123922855971, "grad_norm": 0.04211452975869179, "learning_rate": 0.01, "loss": 2.0056, "step": 42444 }, { "epoch": 4.3544316782929835, "grad_norm": 0.09612414985895157, "learning_rate": 0.01, "loss": 1.9703, "step": 42447 }, { "epoch": 4.354739433729996, "grad_norm": 0.07747933268547058, "learning_rate": 0.01, "loss": 2.005, "step": 42450 }, { "epoch": 4.355047189167008, "grad_norm": 0.06796756386756897, "learning_rate": 0.01, "loss": 2.006, "step": 42453 }, { "epoch": 4.355354944604021, "grad_norm": 0.052594736218452454, "learning_rate": 0.01, "loss": 2.0158, "step": 42456 }, { "epoch": 4.355662700041034, "grad_norm": 0.09094807505607605, "learning_rate": 0.01, "loss": 1.9884, "step": 42459 }, { "epoch": 4.3559704554780465, "grad_norm": 0.06846702843904495, "learning_rate": 0.01, "loss": 1.9743, "step": 42462 }, { "epoch": 4.356278210915059, "grad_norm": 0.059900566935539246, "learning_rate": 0.01, "loss": 1.9951, "step": 42465 }, { "epoch": 4.356585966352072, "grad_norm": 0.07737037539482117, "learning_rate": 0.01, "loss": 2.005, "step": 42468 }, { "epoch": 4.356893721789085, "grad_norm": 0.10885298252105713, "learning_rate": 0.01, "loss": 1.9983, "step": 42471 }, { "epoch": 4.357201477226098, "grad_norm": 0.12740680575370789, "learning_rate": 0.01, "loss": 2.007, "step": 42474 }, { "epoch": 4.35750923266311, "grad_norm": 0.05705080181360245, "learning_rate": 0.01, "loss": 2.0148, "step": 42477 }, { "epoch": 4.357816988100123, "grad_norm": 0.047689832746982574, "learning_rate": 0.01, "loss": 1.9935, "step": 42480 }, { "epoch": 4.358124743537136, "grad_norm": 0.06346622854471207, "learning_rate": 0.01, "loss": 2.0007, "step": 42483 }, { "epoch": 4.358432498974149, "grad_norm": 0.08776500821113586, "learning_rate": 0.01, "loss": 2.0056, "step": 42486 }, { "epoch": 4.3587402544111615, "grad_norm": 0.08766961097717285, "learning_rate": 0.01, "loss": 1.9841, "step": 42489 }, { "epoch": 4.359048009848174, "grad_norm": 0.045849695801734924, "learning_rate": 0.01, "loss": 1.9967, "step": 42492 }, { "epoch": 4.359355765285187, "grad_norm": 0.10003980994224548, "learning_rate": 0.01, "loss": 2.003, "step": 42495 }, { "epoch": 4.359663520722199, "grad_norm": 0.05734704062342644, "learning_rate": 0.01, "loss": 2.0167, "step": 42498 }, { "epoch": 4.359971276159212, "grad_norm": 0.05776417255401611, "learning_rate": 0.01, "loss": 2.0221, "step": 42501 }, { "epoch": 4.3602790315962245, "grad_norm": 0.03467059135437012, "learning_rate": 0.01, "loss": 1.9826, "step": 42504 }, { "epoch": 4.360586787033237, "grad_norm": 0.03380267322063446, "learning_rate": 0.01, "loss": 2.0056, "step": 42507 }, { "epoch": 4.36089454247025, "grad_norm": 0.06621702015399933, "learning_rate": 0.01, "loss": 2.0018, "step": 42510 }, { "epoch": 4.361202297907263, "grad_norm": 0.05189868062734604, "learning_rate": 0.01, "loss": 1.9983, "step": 42513 }, { "epoch": 4.361510053344276, "grad_norm": 0.05479085072875023, "learning_rate": 0.01, "loss": 1.9818, "step": 42516 }, { "epoch": 4.361817808781288, "grad_norm": 0.07013875991106033, "learning_rate": 0.01, "loss": 2.0069, "step": 42519 }, { "epoch": 4.362125564218301, "grad_norm": 0.07816500216722488, "learning_rate": 0.01, "loss": 1.9825, "step": 42522 }, { "epoch": 4.362433319655314, "grad_norm": 0.05446416139602661, "learning_rate": 0.01, "loss": 1.9871, "step": 42525 }, { "epoch": 4.362741075092327, "grad_norm": 0.05260547250509262, "learning_rate": 0.01, "loss": 1.9789, "step": 42528 }, { "epoch": 4.36304883052934, "grad_norm": 0.042641572654247284, "learning_rate": 0.01, "loss": 1.9998, "step": 42531 }, { "epoch": 4.363356585966352, "grad_norm": 0.05067918077111244, "learning_rate": 0.01, "loss": 1.977, "step": 42534 }, { "epoch": 4.363664341403365, "grad_norm": 0.05734257400035858, "learning_rate": 0.01, "loss": 1.9884, "step": 42537 }, { "epoch": 4.363972096840378, "grad_norm": 0.12982626259326935, "learning_rate": 0.01, "loss": 1.9998, "step": 42540 }, { "epoch": 4.36427985227739, "grad_norm": 0.06049492582678795, "learning_rate": 0.01, "loss": 1.9958, "step": 42543 }, { "epoch": 4.364587607714403, "grad_norm": 0.03683345392346382, "learning_rate": 0.01, "loss": 1.9857, "step": 42546 }, { "epoch": 4.364895363151415, "grad_norm": 0.055864546447992325, "learning_rate": 0.01, "loss": 2.0082, "step": 42549 }, { "epoch": 4.365203118588428, "grad_norm": 0.046534840017557144, "learning_rate": 0.01, "loss": 2.0027, "step": 42552 }, { "epoch": 4.365510874025441, "grad_norm": 0.07829025387763977, "learning_rate": 0.01, "loss": 1.9982, "step": 42555 }, { "epoch": 4.365818629462454, "grad_norm": 0.03704720363020897, "learning_rate": 0.01, "loss": 1.9873, "step": 42558 }, { "epoch": 4.3661263848994665, "grad_norm": 0.03933021053671837, "learning_rate": 0.01, "loss": 1.9813, "step": 42561 }, { "epoch": 4.366434140336479, "grad_norm": 0.05160917341709137, "learning_rate": 0.01, "loss": 1.9743, "step": 42564 }, { "epoch": 4.366741895773492, "grad_norm": 0.16336973011493683, "learning_rate": 0.01, "loss": 2.0207, "step": 42567 }, { "epoch": 4.367049651210505, "grad_norm": 0.15664827823638916, "learning_rate": 0.01, "loss": 1.9948, "step": 42570 }, { "epoch": 4.367357406647518, "grad_norm": 0.06535400450229645, "learning_rate": 0.01, "loss": 1.9707, "step": 42573 }, { "epoch": 4.36766516208453, "grad_norm": 0.10573652386665344, "learning_rate": 0.01, "loss": 2.0185, "step": 42576 }, { "epoch": 4.367972917521543, "grad_norm": 0.06016778200864792, "learning_rate": 0.01, "loss": 1.9967, "step": 42579 }, { "epoch": 4.368280672958556, "grad_norm": 0.07716617733240128, "learning_rate": 0.01, "loss": 2.012, "step": 42582 }, { "epoch": 4.368588428395569, "grad_norm": 0.0795513242483139, "learning_rate": 0.01, "loss": 1.9885, "step": 42585 }, { "epoch": 4.368896183832581, "grad_norm": 0.042184434831142426, "learning_rate": 0.01, "loss": 1.9473, "step": 42588 }, { "epoch": 4.369203939269593, "grad_norm": 0.10784424096345901, "learning_rate": 0.01, "loss": 2.0099, "step": 42591 }, { "epoch": 4.369511694706606, "grad_norm": 0.0906398668885231, "learning_rate": 0.01, "loss": 1.9985, "step": 42594 }, { "epoch": 4.369819450143619, "grad_norm": 0.07714588940143585, "learning_rate": 0.01, "loss": 2.0083, "step": 42597 }, { "epoch": 4.370127205580632, "grad_norm": 0.07262132316827774, "learning_rate": 0.01, "loss": 2.0107, "step": 42600 }, { "epoch": 4.3704349610176445, "grad_norm": 0.04687703400850296, "learning_rate": 0.01, "loss": 1.9916, "step": 42603 }, { "epoch": 4.370742716454657, "grad_norm": 0.03540663421154022, "learning_rate": 0.01, "loss": 1.9784, "step": 42606 }, { "epoch": 4.37105047189167, "grad_norm": 0.0655878484249115, "learning_rate": 0.01, "loss": 2.0104, "step": 42609 }, { "epoch": 4.371358227328683, "grad_norm": 0.14327290654182434, "learning_rate": 0.01, "loss": 1.9783, "step": 42612 }, { "epoch": 4.371665982765696, "grad_norm": 0.051352307200431824, "learning_rate": 0.01, "loss": 1.9952, "step": 42615 }, { "epoch": 4.371973738202708, "grad_norm": 0.03427935391664505, "learning_rate": 0.01, "loss": 1.995, "step": 42618 }, { "epoch": 4.372281493639721, "grad_norm": 0.06677401810884476, "learning_rate": 0.01, "loss": 2.0074, "step": 42621 }, { "epoch": 4.372589249076734, "grad_norm": 0.0740409791469574, "learning_rate": 0.01, "loss": 2.0128, "step": 42624 }, { "epoch": 4.372897004513747, "grad_norm": 0.07356753200292587, "learning_rate": 0.01, "loss": 1.9861, "step": 42627 }, { "epoch": 4.3732047599507595, "grad_norm": 0.0342116579413414, "learning_rate": 0.01, "loss": 2.0028, "step": 42630 }, { "epoch": 4.373512515387771, "grad_norm": 0.04127725213766098, "learning_rate": 0.01, "loss": 2.0058, "step": 42633 }, { "epoch": 4.373820270824784, "grad_norm": 0.05259817838668823, "learning_rate": 0.01, "loss": 1.9704, "step": 42636 }, { "epoch": 4.374128026261797, "grad_norm": 0.041953880339860916, "learning_rate": 0.01, "loss": 1.9922, "step": 42639 }, { "epoch": 4.37443578169881, "grad_norm": 0.042254798114299774, "learning_rate": 0.01, "loss": 1.9852, "step": 42642 }, { "epoch": 4.3747435371358225, "grad_norm": 0.0722968801856041, "learning_rate": 0.01, "loss": 1.9903, "step": 42645 }, { "epoch": 4.375051292572835, "grad_norm": 0.12318527698516846, "learning_rate": 0.01, "loss": 2.0048, "step": 42648 }, { "epoch": 4.375359048009848, "grad_norm": 0.048684168606996536, "learning_rate": 0.01, "loss": 1.9864, "step": 42651 }, { "epoch": 4.375666803446861, "grad_norm": 0.09488745033740997, "learning_rate": 0.01, "loss": 1.9645, "step": 42654 }, { "epoch": 4.375974558883874, "grad_norm": 0.06261547654867172, "learning_rate": 0.01, "loss": 2.0027, "step": 42657 }, { "epoch": 4.376282314320886, "grad_norm": 0.03739078342914581, "learning_rate": 0.01, "loss": 2.0081, "step": 42660 }, { "epoch": 4.376590069757899, "grad_norm": 0.03686247393488884, "learning_rate": 0.01, "loss": 1.9983, "step": 42663 }, { "epoch": 4.376897825194912, "grad_norm": 0.038870710879564285, "learning_rate": 0.01, "loss": 2.012, "step": 42666 }, { "epoch": 4.377205580631925, "grad_norm": 0.04153933376073837, "learning_rate": 0.01, "loss": 1.9836, "step": 42669 }, { "epoch": 4.3775133360689376, "grad_norm": 0.07403891533613205, "learning_rate": 0.01, "loss": 1.9581, "step": 42672 }, { "epoch": 4.37782109150595, "grad_norm": 0.12554992735385895, "learning_rate": 0.01, "loss": 2.0276, "step": 42675 }, { "epoch": 4.378128846942962, "grad_norm": 0.07833071053028107, "learning_rate": 0.01, "loss": 1.9745, "step": 42678 }, { "epoch": 4.378436602379975, "grad_norm": 0.05616781488060951, "learning_rate": 0.01, "loss": 2.0098, "step": 42681 }, { "epoch": 4.378744357816988, "grad_norm": 0.034405291080474854, "learning_rate": 0.01, "loss": 1.9853, "step": 42684 }, { "epoch": 4.379052113254001, "grad_norm": 0.03216303512454033, "learning_rate": 0.01, "loss": 2.0074, "step": 42687 }, { "epoch": 4.379359868691013, "grad_norm": 0.03294415399432182, "learning_rate": 0.01, "loss": 1.9821, "step": 42690 }, { "epoch": 4.379667624128026, "grad_norm": 0.07378646731376648, "learning_rate": 0.01, "loss": 1.9866, "step": 42693 }, { "epoch": 4.379975379565039, "grad_norm": 0.0922391265630722, "learning_rate": 0.01, "loss": 1.9799, "step": 42696 }, { "epoch": 4.380283135002052, "grad_norm": 0.06231202930212021, "learning_rate": 0.01, "loss": 1.9904, "step": 42699 }, { "epoch": 4.3805908904390645, "grad_norm": 0.07775771617889404, "learning_rate": 0.01, "loss": 2.0098, "step": 42702 }, { "epoch": 4.380898645876077, "grad_norm": 0.041460320353507996, "learning_rate": 0.01, "loss": 2.0052, "step": 42705 }, { "epoch": 4.38120640131309, "grad_norm": 0.10348492860794067, "learning_rate": 0.01, "loss": 1.9761, "step": 42708 }, { "epoch": 4.381514156750103, "grad_norm": 0.06981760263442993, "learning_rate": 0.01, "loss": 2.0003, "step": 42711 }, { "epoch": 4.381821912187116, "grad_norm": 0.05024630203843117, "learning_rate": 0.01, "loss": 1.9952, "step": 42714 }, { "epoch": 4.382129667624128, "grad_norm": 0.09696882963180542, "learning_rate": 0.01, "loss": 1.9854, "step": 42717 }, { "epoch": 4.382437423061141, "grad_norm": 0.08955802023410797, "learning_rate": 0.01, "loss": 2.0075, "step": 42720 }, { "epoch": 4.382745178498153, "grad_norm": 0.053882379084825516, "learning_rate": 0.01, "loss": 1.9908, "step": 42723 }, { "epoch": 4.383052933935166, "grad_norm": 0.09641711413860321, "learning_rate": 0.01, "loss": 1.9923, "step": 42726 }, { "epoch": 4.383360689372179, "grad_norm": 0.03148825094103813, "learning_rate": 0.01, "loss": 2.0041, "step": 42729 }, { "epoch": 4.383668444809191, "grad_norm": 0.13528583943843842, "learning_rate": 0.01, "loss": 1.973, "step": 42732 }, { "epoch": 4.383976200246204, "grad_norm": 0.08340415358543396, "learning_rate": 0.01, "loss": 2.0033, "step": 42735 }, { "epoch": 4.384283955683217, "grad_norm": 0.04938989877700806, "learning_rate": 0.01, "loss": 2.0224, "step": 42738 }, { "epoch": 4.38459171112023, "grad_norm": 0.03983978554606438, "learning_rate": 0.01, "loss": 1.9745, "step": 42741 }, { "epoch": 4.3848994665572425, "grad_norm": 0.04310479387640953, "learning_rate": 0.01, "loss": 2.0133, "step": 42744 }, { "epoch": 4.385207221994255, "grad_norm": 0.047569889575242996, "learning_rate": 0.01, "loss": 2.0138, "step": 42747 }, { "epoch": 4.385514977431268, "grad_norm": 0.04927491769194603, "learning_rate": 0.01, "loss": 2.0133, "step": 42750 }, { "epoch": 4.385822732868281, "grad_norm": 0.17855384945869446, "learning_rate": 0.01, "loss": 2.0052, "step": 42753 }, { "epoch": 4.386130488305294, "grad_norm": 0.06133484095335007, "learning_rate": 0.01, "loss": 2.0078, "step": 42756 }, { "epoch": 4.386438243742306, "grad_norm": 0.0671703889966011, "learning_rate": 0.01, "loss": 1.9997, "step": 42759 }, { "epoch": 4.386745999179319, "grad_norm": 0.06793912500143051, "learning_rate": 0.01, "loss": 2.0044, "step": 42762 }, { "epoch": 4.387053754616332, "grad_norm": 0.042816221714019775, "learning_rate": 0.01, "loss": 1.9831, "step": 42765 }, { "epoch": 4.387361510053344, "grad_norm": 0.035412371158599854, "learning_rate": 0.01, "loss": 1.9925, "step": 42768 }, { "epoch": 4.387669265490357, "grad_norm": 0.05142438784241676, "learning_rate": 0.01, "loss": 2.026, "step": 42771 }, { "epoch": 4.387977020927369, "grad_norm": 0.04674524813890457, "learning_rate": 0.01, "loss": 1.9963, "step": 42774 }, { "epoch": 4.388284776364382, "grad_norm": 0.2127211093902588, "learning_rate": 0.01, "loss": 1.9995, "step": 42777 }, { "epoch": 4.388592531801395, "grad_norm": 0.09352391213178635, "learning_rate": 0.01, "loss": 1.9798, "step": 42780 }, { "epoch": 4.388900287238408, "grad_norm": 0.06070404127240181, "learning_rate": 0.01, "loss": 1.9913, "step": 42783 }, { "epoch": 4.3892080426754205, "grad_norm": 0.0433930978178978, "learning_rate": 0.01, "loss": 1.9898, "step": 42786 }, { "epoch": 4.389515798112433, "grad_norm": 0.03960421308875084, "learning_rate": 0.01, "loss": 2.0209, "step": 42789 }, { "epoch": 4.389823553549446, "grad_norm": 0.043349895626306534, "learning_rate": 0.01, "loss": 2.0067, "step": 42792 }, { "epoch": 4.390131308986459, "grad_norm": 0.04139290004968643, "learning_rate": 0.01, "loss": 1.9976, "step": 42795 }, { "epoch": 4.390439064423472, "grad_norm": 0.08798687905073166, "learning_rate": 0.01, "loss": 1.9892, "step": 42798 }, { "epoch": 4.390746819860484, "grad_norm": 0.061553046107292175, "learning_rate": 0.01, "loss": 1.9815, "step": 42801 }, { "epoch": 4.391054575297497, "grad_norm": 0.04880871623754501, "learning_rate": 0.01, "loss": 1.9976, "step": 42804 }, { "epoch": 4.39136233073451, "grad_norm": 0.06029786914587021, "learning_rate": 0.01, "loss": 1.9921, "step": 42807 }, { "epoch": 4.391670086171523, "grad_norm": 0.047968216240406036, "learning_rate": 0.01, "loss": 2.0255, "step": 42810 }, { "epoch": 4.391977841608535, "grad_norm": 0.11133129149675369, "learning_rate": 0.01, "loss": 1.9947, "step": 42813 }, { "epoch": 4.392285597045547, "grad_norm": 0.09831759333610535, "learning_rate": 0.01, "loss": 1.9738, "step": 42816 }, { "epoch": 4.39259335248256, "grad_norm": 0.0829746425151825, "learning_rate": 0.01, "loss": 1.9932, "step": 42819 }, { "epoch": 4.392901107919573, "grad_norm": 0.04381052777171135, "learning_rate": 0.01, "loss": 1.9988, "step": 42822 }, { "epoch": 4.393208863356586, "grad_norm": 0.06539449840784073, "learning_rate": 0.01, "loss": 1.9888, "step": 42825 }, { "epoch": 4.393516618793599, "grad_norm": 0.03784729540348053, "learning_rate": 0.01, "loss": 1.9718, "step": 42828 }, { "epoch": 4.393824374230611, "grad_norm": 0.10152629017829895, "learning_rate": 0.01, "loss": 2.0002, "step": 42831 }, { "epoch": 4.394132129667624, "grad_norm": 0.043561842292547226, "learning_rate": 0.01, "loss": 2.0051, "step": 42834 }, { "epoch": 4.394439885104637, "grad_norm": 0.06225054711103439, "learning_rate": 0.01, "loss": 1.9822, "step": 42837 }, { "epoch": 4.39474764054165, "grad_norm": 0.08047157526016235, "learning_rate": 0.01, "loss": 1.9877, "step": 42840 }, { "epoch": 4.3950553959786625, "grad_norm": 0.09267039597034454, "learning_rate": 0.01, "loss": 2.0022, "step": 42843 }, { "epoch": 4.395363151415675, "grad_norm": 0.10524188727140427, "learning_rate": 0.01, "loss": 1.9832, "step": 42846 }, { "epoch": 4.395670906852688, "grad_norm": 0.0726306214928627, "learning_rate": 0.01, "loss": 2.0324, "step": 42849 }, { "epoch": 4.395978662289701, "grad_norm": 0.036908458918333054, "learning_rate": 0.01, "loss": 1.9911, "step": 42852 }, { "epoch": 4.396286417726714, "grad_norm": 0.052610307931900024, "learning_rate": 0.01, "loss": 1.9566, "step": 42855 }, { "epoch": 4.3965941731637255, "grad_norm": 0.04229322075843811, "learning_rate": 0.01, "loss": 2.0177, "step": 42858 }, { "epoch": 4.396901928600738, "grad_norm": 0.1478923261165619, "learning_rate": 0.01, "loss": 1.9907, "step": 42861 }, { "epoch": 4.397209684037751, "grad_norm": 0.08942049741744995, "learning_rate": 0.01, "loss": 2.001, "step": 42864 }, { "epoch": 4.397517439474764, "grad_norm": 0.051714204251766205, "learning_rate": 0.01, "loss": 2.0194, "step": 42867 }, { "epoch": 4.397825194911777, "grad_norm": 0.03621753305196762, "learning_rate": 0.01, "loss": 1.9926, "step": 42870 }, { "epoch": 4.398132950348789, "grad_norm": 0.03929129242897034, "learning_rate": 0.01, "loss": 1.9912, "step": 42873 }, { "epoch": 4.398440705785802, "grad_norm": 0.04855189844965935, "learning_rate": 0.01, "loss": 1.9873, "step": 42876 }, { "epoch": 4.398748461222815, "grad_norm": 0.060284826904535294, "learning_rate": 0.01, "loss": 1.975, "step": 42879 }, { "epoch": 4.399056216659828, "grad_norm": 0.07179313898086548, "learning_rate": 0.01, "loss": 1.9957, "step": 42882 }, { "epoch": 4.3993639720968405, "grad_norm": 0.10332684218883514, "learning_rate": 0.01, "loss": 2.0003, "step": 42885 }, { "epoch": 4.399671727533853, "grad_norm": 0.13461638987064362, "learning_rate": 0.01, "loss": 1.9792, "step": 42888 }, { "epoch": 4.399979482970866, "grad_norm": 0.08879181742668152, "learning_rate": 0.01, "loss": 2.0071, "step": 42891 }, { "epoch": 4.400287238407879, "grad_norm": 0.064743272960186, "learning_rate": 0.01, "loss": 1.992, "step": 42894 }, { "epoch": 4.400594993844892, "grad_norm": 0.04650283232331276, "learning_rate": 0.01, "loss": 1.9802, "step": 42897 }, { "epoch": 4.400902749281904, "grad_norm": 0.05072256550192833, "learning_rate": 0.01, "loss": 1.9972, "step": 42900 }, { "epoch": 4.401210504718916, "grad_norm": 0.05984727293252945, "learning_rate": 0.01, "loss": 2.0123, "step": 42903 }, { "epoch": 4.401518260155929, "grad_norm": 0.0773845687508583, "learning_rate": 0.01, "loss": 1.9812, "step": 42906 }, { "epoch": 4.401826015592942, "grad_norm": 0.06231288984417915, "learning_rate": 0.01, "loss": 1.9875, "step": 42909 }, { "epoch": 4.402133771029955, "grad_norm": 0.07276583462953568, "learning_rate": 0.01, "loss": 2.0029, "step": 42912 }, { "epoch": 4.402441526466967, "grad_norm": 0.043372802436351776, "learning_rate": 0.01, "loss": 1.9978, "step": 42915 }, { "epoch": 4.40274928190398, "grad_norm": 0.05217559635639191, "learning_rate": 0.01, "loss": 2.021, "step": 42918 }, { "epoch": 4.403057037340993, "grad_norm": 0.058887772262096405, "learning_rate": 0.01, "loss": 2.0104, "step": 42921 }, { "epoch": 4.403364792778006, "grad_norm": 0.04377232864499092, "learning_rate": 0.01, "loss": 1.9981, "step": 42924 }, { "epoch": 4.4036725482150185, "grad_norm": 0.032925624400377274, "learning_rate": 0.01, "loss": 1.9881, "step": 42927 }, { "epoch": 4.403980303652031, "grad_norm": 0.1263015866279602, "learning_rate": 0.01, "loss": 1.9824, "step": 42930 }, { "epoch": 4.404288059089044, "grad_norm": 0.046885982155799866, "learning_rate": 0.01, "loss": 2.002, "step": 42933 }, { "epoch": 4.404595814526057, "grad_norm": 0.10321955382823944, "learning_rate": 0.01, "loss": 2.0221, "step": 42936 }, { "epoch": 4.40490356996307, "grad_norm": 0.06488531827926636, "learning_rate": 0.01, "loss": 2.0114, "step": 42939 }, { "epoch": 4.405211325400082, "grad_norm": 0.09423347562551498, "learning_rate": 0.01, "loss": 1.9653, "step": 42942 }, { "epoch": 4.405519080837095, "grad_norm": 0.06422553211450577, "learning_rate": 0.01, "loss": 2.014, "step": 42945 }, { "epoch": 4.405826836274107, "grad_norm": 0.05872100591659546, "learning_rate": 0.01, "loss": 1.9904, "step": 42948 }, { "epoch": 4.40613459171112, "grad_norm": 0.043210845440626144, "learning_rate": 0.01, "loss": 1.9934, "step": 42951 }, { "epoch": 4.406442347148133, "grad_norm": 0.041857898235321045, "learning_rate": 0.01, "loss": 1.9919, "step": 42954 }, { "epoch": 4.406750102585145, "grad_norm": 0.03347545117139816, "learning_rate": 0.01, "loss": 1.9947, "step": 42957 }, { "epoch": 4.407057858022158, "grad_norm": 0.0377437062561512, "learning_rate": 0.01, "loss": 1.9979, "step": 42960 }, { "epoch": 4.407365613459171, "grad_norm": 0.07432477176189423, "learning_rate": 0.01, "loss": 1.9823, "step": 42963 }, { "epoch": 4.407673368896184, "grad_norm": 0.038358092308044434, "learning_rate": 0.01, "loss": 1.9913, "step": 42966 }, { "epoch": 4.4079811243331966, "grad_norm": 0.09125792980194092, "learning_rate": 0.01, "loss": 1.9757, "step": 42969 }, { "epoch": 4.408288879770209, "grad_norm": 0.11572171747684479, "learning_rate": 0.01, "loss": 1.9858, "step": 42972 }, { "epoch": 4.408596635207222, "grad_norm": 0.05868324264883995, "learning_rate": 0.01, "loss": 1.9868, "step": 42975 }, { "epoch": 4.408904390644235, "grad_norm": 0.0679447203874588, "learning_rate": 0.01, "loss": 2.0116, "step": 42978 }, { "epoch": 4.409212146081248, "grad_norm": 0.050446171313524246, "learning_rate": 0.01, "loss": 1.9923, "step": 42981 }, { "epoch": 4.4095199015182605, "grad_norm": 0.05003255605697632, "learning_rate": 0.01, "loss": 2.0134, "step": 42984 }, { "epoch": 4.409827656955273, "grad_norm": 0.05460989847779274, "learning_rate": 0.01, "loss": 1.9911, "step": 42987 }, { "epoch": 4.410135412392286, "grad_norm": 0.04039241746068001, "learning_rate": 0.01, "loss": 1.981, "step": 42990 }, { "epoch": 4.410443167829298, "grad_norm": 0.04114096984267235, "learning_rate": 0.01, "loss": 1.9696, "step": 42993 }, { "epoch": 4.410750923266311, "grad_norm": 0.1269209384918213, "learning_rate": 0.01, "loss": 1.995, "step": 42996 }, { "epoch": 4.4110586787033235, "grad_norm": 0.059638604521751404, "learning_rate": 0.01, "loss": 2.0176, "step": 42999 }, { "epoch": 4.411366434140336, "grad_norm": 0.09658347070217133, "learning_rate": 0.01, "loss": 1.984, "step": 43002 }, { "epoch": 4.411674189577349, "grad_norm": 0.05749305710196495, "learning_rate": 0.01, "loss": 1.982, "step": 43005 }, { "epoch": 4.411981945014362, "grad_norm": 0.04522128403186798, "learning_rate": 0.01, "loss": 1.987, "step": 43008 }, { "epoch": 4.412289700451375, "grad_norm": 0.034170012921094894, "learning_rate": 0.01, "loss": 1.9874, "step": 43011 }, { "epoch": 4.412597455888387, "grad_norm": 0.04310225695371628, "learning_rate": 0.01, "loss": 1.9923, "step": 43014 }, { "epoch": 4.4129052113254, "grad_norm": 0.15212641656398773, "learning_rate": 0.01, "loss": 1.9796, "step": 43017 }, { "epoch": 4.413212966762413, "grad_norm": 0.12978756427764893, "learning_rate": 0.01, "loss": 1.99, "step": 43020 }, { "epoch": 4.413520722199426, "grad_norm": 0.07762716710567474, "learning_rate": 0.01, "loss": 1.9977, "step": 43023 }, { "epoch": 4.4138284776364385, "grad_norm": 0.049494557082653046, "learning_rate": 0.01, "loss": 1.9936, "step": 43026 }, { "epoch": 4.414136233073451, "grad_norm": 0.060781434178352356, "learning_rate": 0.01, "loss": 1.9916, "step": 43029 }, { "epoch": 4.414443988510464, "grad_norm": 0.03154407814145088, "learning_rate": 0.01, "loss": 1.9888, "step": 43032 }, { "epoch": 4.414751743947477, "grad_norm": 0.048733990639448166, "learning_rate": 0.01, "loss": 1.9783, "step": 43035 }, { "epoch": 4.415059499384489, "grad_norm": 0.04283260181546211, "learning_rate": 0.01, "loss": 1.9817, "step": 43038 }, { "epoch": 4.4153672548215015, "grad_norm": 0.06798703223466873, "learning_rate": 0.01, "loss": 1.9858, "step": 43041 }, { "epoch": 4.415675010258514, "grad_norm": 0.10504204034805298, "learning_rate": 0.01, "loss": 1.9933, "step": 43044 }, { "epoch": 4.415982765695527, "grad_norm": 0.1270654797554016, "learning_rate": 0.01, "loss": 2.0083, "step": 43047 }, { "epoch": 4.41629052113254, "grad_norm": 0.06907479465007782, "learning_rate": 0.01, "loss": 1.9641, "step": 43050 }, { "epoch": 4.416598276569553, "grad_norm": 0.034706976264715195, "learning_rate": 0.01, "loss": 2.0081, "step": 43053 }, { "epoch": 4.416906032006565, "grad_norm": 0.033550575375556946, "learning_rate": 0.01, "loss": 2.0029, "step": 43056 }, { "epoch": 4.417213787443578, "grad_norm": 0.03790535405278206, "learning_rate": 0.01, "loss": 2.009, "step": 43059 }, { "epoch": 4.417521542880591, "grad_norm": 0.04516111686825752, "learning_rate": 0.01, "loss": 2.0134, "step": 43062 }, { "epoch": 4.417829298317604, "grad_norm": 0.07906030863523483, "learning_rate": 0.01, "loss": 1.9944, "step": 43065 }, { "epoch": 4.4181370537546165, "grad_norm": 0.11393613368272781, "learning_rate": 0.01, "loss": 2.0164, "step": 43068 }, { "epoch": 4.418444809191629, "grad_norm": 0.04494154825806618, "learning_rate": 0.01, "loss": 1.9909, "step": 43071 }, { "epoch": 4.418752564628642, "grad_norm": 0.04806877672672272, "learning_rate": 0.01, "loss": 2.0064, "step": 43074 }, { "epoch": 4.419060320065655, "grad_norm": 0.09511521458625793, "learning_rate": 0.01, "loss": 1.9999, "step": 43077 }, { "epoch": 4.419368075502668, "grad_norm": 0.036587249487638474, "learning_rate": 0.01, "loss": 2.0116, "step": 43080 }, { "epoch": 4.4196758309396795, "grad_norm": 0.03894515708088875, "learning_rate": 0.01, "loss": 1.9858, "step": 43083 }, { "epoch": 4.419983586376692, "grad_norm": 0.06960659474134445, "learning_rate": 0.01, "loss": 2.0064, "step": 43086 }, { "epoch": 4.420291341813705, "grad_norm": 0.04635101556777954, "learning_rate": 0.01, "loss": 1.9889, "step": 43089 }, { "epoch": 4.420599097250718, "grad_norm": 0.09964662045240402, "learning_rate": 0.01, "loss": 2.0055, "step": 43092 }, { "epoch": 4.420906852687731, "grad_norm": 0.10087699443101883, "learning_rate": 0.01, "loss": 2.0002, "step": 43095 }, { "epoch": 4.421214608124743, "grad_norm": 0.09748293459415436, "learning_rate": 0.01, "loss": 1.9937, "step": 43098 }, { "epoch": 4.421522363561756, "grad_norm": 0.04654933884739876, "learning_rate": 0.01, "loss": 1.9927, "step": 43101 }, { "epoch": 4.421830118998769, "grad_norm": 0.05607482045888901, "learning_rate": 0.01, "loss": 2.0018, "step": 43104 }, { "epoch": 4.422137874435782, "grad_norm": 0.0513363815844059, "learning_rate": 0.01, "loss": 1.9793, "step": 43107 }, { "epoch": 4.4224456298727945, "grad_norm": 0.048932064324617386, "learning_rate": 0.01, "loss": 1.9764, "step": 43110 }, { "epoch": 4.422753385309807, "grad_norm": 0.05465288087725639, "learning_rate": 0.01, "loss": 1.9965, "step": 43113 }, { "epoch": 4.42306114074682, "grad_norm": 0.04400217905640602, "learning_rate": 0.01, "loss": 2.0257, "step": 43116 }, { "epoch": 4.423368896183833, "grad_norm": 0.03483066335320473, "learning_rate": 0.01, "loss": 1.9716, "step": 43119 }, { "epoch": 4.423676651620846, "grad_norm": 0.04909312352538109, "learning_rate": 0.01, "loss": 2.0137, "step": 43122 }, { "epoch": 4.4239844070578584, "grad_norm": 0.09361930191516876, "learning_rate": 0.01, "loss": 2.0031, "step": 43125 }, { "epoch": 4.42429216249487, "grad_norm": 0.06980263441801071, "learning_rate": 0.01, "loss": 1.9741, "step": 43128 }, { "epoch": 4.424599917931883, "grad_norm": 0.06628268212080002, "learning_rate": 0.01, "loss": 2.0002, "step": 43131 }, { "epoch": 4.424907673368896, "grad_norm": 0.10180728137493134, "learning_rate": 0.01, "loss": 2.0153, "step": 43134 }, { "epoch": 4.425215428805909, "grad_norm": 0.10624035447835922, "learning_rate": 0.01, "loss": 2.0225, "step": 43137 }, { "epoch": 4.4255231842429215, "grad_norm": 0.08775760233402252, "learning_rate": 0.01, "loss": 2.0051, "step": 43140 }, { "epoch": 4.425830939679934, "grad_norm": 0.06270244717597961, "learning_rate": 0.01, "loss": 1.9939, "step": 43143 }, { "epoch": 4.426138695116947, "grad_norm": 0.06912422925233841, "learning_rate": 0.01, "loss": 1.9912, "step": 43146 }, { "epoch": 4.42644645055396, "grad_norm": 0.05379674583673477, "learning_rate": 0.01, "loss": 1.9942, "step": 43149 }, { "epoch": 4.426754205990973, "grad_norm": 0.041072819381952286, "learning_rate": 0.01, "loss": 1.9982, "step": 43152 }, { "epoch": 4.427061961427985, "grad_norm": 0.05191728100180626, "learning_rate": 0.01, "loss": 1.9799, "step": 43155 }, { "epoch": 4.427369716864998, "grad_norm": 0.03773737698793411, "learning_rate": 0.01, "loss": 2.001, "step": 43158 }, { "epoch": 4.427677472302011, "grad_norm": 0.10148506611585617, "learning_rate": 0.01, "loss": 1.9877, "step": 43161 }, { "epoch": 4.427985227739024, "grad_norm": 0.071526899933815, "learning_rate": 0.01, "loss": 1.9992, "step": 43164 }, { "epoch": 4.4282929831760365, "grad_norm": 0.07906235009431839, "learning_rate": 0.01, "loss": 1.9976, "step": 43167 }, { "epoch": 4.428600738613049, "grad_norm": 0.18193936347961426, "learning_rate": 0.01, "loss": 2.0095, "step": 43170 }, { "epoch": 4.428908494050061, "grad_norm": 0.1734820157289505, "learning_rate": 0.01, "loss": 1.9997, "step": 43173 }, { "epoch": 4.429216249487074, "grad_norm": 0.09781987965106964, "learning_rate": 0.01, "loss": 2.0113, "step": 43176 }, { "epoch": 4.429524004924087, "grad_norm": 0.06472992151975632, "learning_rate": 0.01, "loss": 1.9971, "step": 43179 }, { "epoch": 4.4298317603610995, "grad_norm": 0.04425714537501335, "learning_rate": 0.01, "loss": 1.9961, "step": 43182 }, { "epoch": 4.430139515798112, "grad_norm": 0.04678405821323395, "learning_rate": 0.01, "loss": 1.9983, "step": 43185 }, { "epoch": 4.430447271235125, "grad_norm": 0.04311143234372139, "learning_rate": 0.01, "loss": 2.0079, "step": 43188 }, { "epoch": 4.430755026672138, "grad_norm": 0.060729045420885086, "learning_rate": 0.01, "loss": 2.0093, "step": 43191 }, { "epoch": 4.431062782109151, "grad_norm": 0.07931876182556152, "learning_rate": 0.01, "loss": 1.9979, "step": 43194 }, { "epoch": 4.431370537546163, "grad_norm": 0.048263415694236755, "learning_rate": 0.01, "loss": 2.012, "step": 43197 }, { "epoch": 4.431678292983176, "grad_norm": 0.05372344329953194, "learning_rate": 0.01, "loss": 1.9636, "step": 43200 }, { "epoch": 4.431986048420189, "grad_norm": 0.06506495922803879, "learning_rate": 0.01, "loss": 1.9934, "step": 43203 }, { "epoch": 4.432293803857202, "grad_norm": 0.03484785556793213, "learning_rate": 0.01, "loss": 2.0067, "step": 43206 }, { "epoch": 4.4326015592942145, "grad_norm": 0.18487481772899628, "learning_rate": 0.01, "loss": 1.9852, "step": 43209 }, { "epoch": 4.432909314731227, "grad_norm": 0.11683176457881927, "learning_rate": 0.01, "loss": 2.0134, "step": 43212 }, { "epoch": 4.43321707016824, "grad_norm": 0.08598039299249649, "learning_rate": 0.01, "loss": 2.0051, "step": 43215 }, { "epoch": 4.433524825605252, "grad_norm": 0.0889374166727066, "learning_rate": 0.01, "loss": 1.982, "step": 43218 }, { "epoch": 4.433832581042265, "grad_norm": 0.061477046459913254, "learning_rate": 0.01, "loss": 1.9836, "step": 43221 }, { "epoch": 4.4341403364792775, "grad_norm": 0.046285588294267654, "learning_rate": 0.01, "loss": 1.9982, "step": 43224 }, { "epoch": 4.43444809191629, "grad_norm": 0.06310747563838959, "learning_rate": 0.01, "loss": 2.0178, "step": 43227 }, { "epoch": 4.434755847353303, "grad_norm": 0.04792320355772972, "learning_rate": 0.01, "loss": 2.0126, "step": 43230 }, { "epoch": 4.435063602790316, "grad_norm": 0.03359830752015114, "learning_rate": 0.01, "loss": 1.9831, "step": 43233 }, { "epoch": 4.435371358227329, "grad_norm": 0.04137792810797691, "learning_rate": 0.01, "loss": 2.0146, "step": 43236 }, { "epoch": 4.435679113664341, "grad_norm": 0.04756288602948189, "learning_rate": 0.01, "loss": 2.0042, "step": 43239 }, { "epoch": 4.435986869101354, "grad_norm": 0.054949600249528885, "learning_rate": 0.01, "loss": 2.0041, "step": 43242 }, { "epoch": 4.436294624538367, "grad_norm": 0.14858773350715637, "learning_rate": 0.01, "loss": 2.0114, "step": 43245 }, { "epoch": 4.43660237997538, "grad_norm": 0.08089875429868698, "learning_rate": 0.01, "loss": 2.0079, "step": 43248 }, { "epoch": 4.4369101354123925, "grad_norm": 0.0340421199798584, "learning_rate": 0.01, "loss": 1.9996, "step": 43251 }, { "epoch": 4.437217890849405, "grad_norm": 0.04666365310549736, "learning_rate": 0.01, "loss": 1.9918, "step": 43254 }, { "epoch": 4.437525646286418, "grad_norm": 0.04860776662826538, "learning_rate": 0.01, "loss": 2.0152, "step": 43257 }, { "epoch": 4.437833401723431, "grad_norm": 0.04814871773123741, "learning_rate": 0.01, "loss": 2.0135, "step": 43260 }, { "epoch": 4.438141157160443, "grad_norm": 0.07159804552793503, "learning_rate": 0.01, "loss": 1.9873, "step": 43263 }, { "epoch": 4.4384489125974556, "grad_norm": 0.08109670132398605, "learning_rate": 0.01, "loss": 1.9799, "step": 43266 }, { "epoch": 4.438756668034468, "grad_norm": 0.10254354774951935, "learning_rate": 0.01, "loss": 1.991, "step": 43269 }, { "epoch": 4.439064423471481, "grad_norm": 0.10418622195720673, "learning_rate": 0.01, "loss": 1.9699, "step": 43272 }, { "epoch": 4.439372178908494, "grad_norm": 0.04432861879467964, "learning_rate": 0.01, "loss": 2.0032, "step": 43275 }, { "epoch": 4.439679934345507, "grad_norm": 0.05000505968928337, "learning_rate": 0.01, "loss": 2.016, "step": 43278 }, { "epoch": 4.4399876897825195, "grad_norm": 0.0718022808432579, "learning_rate": 0.01, "loss": 2.0209, "step": 43281 }, { "epoch": 4.440295445219532, "grad_norm": 0.038088515400886536, "learning_rate": 0.01, "loss": 1.9808, "step": 43284 }, { "epoch": 4.440603200656545, "grad_norm": 0.04395698755979538, "learning_rate": 0.01, "loss": 1.9987, "step": 43287 }, { "epoch": 4.440910956093558, "grad_norm": 0.03977638855576515, "learning_rate": 0.01, "loss": 1.9909, "step": 43290 }, { "epoch": 4.441218711530571, "grad_norm": 0.11403058469295502, "learning_rate": 0.01, "loss": 1.9967, "step": 43293 }, { "epoch": 4.441526466967583, "grad_norm": 0.03809293359518051, "learning_rate": 0.01, "loss": 2.0028, "step": 43296 }, { "epoch": 4.441834222404596, "grad_norm": 0.04494985193014145, "learning_rate": 0.01, "loss": 1.9854, "step": 43299 }, { "epoch": 4.442141977841609, "grad_norm": 0.11755307763814926, "learning_rate": 0.01, "loss": 2.0114, "step": 43302 }, { "epoch": 4.442449733278622, "grad_norm": 0.046333327889442444, "learning_rate": 0.01, "loss": 1.9655, "step": 43305 }, { "epoch": 4.442757488715634, "grad_norm": 0.05719512701034546, "learning_rate": 0.01, "loss": 1.9976, "step": 43308 }, { "epoch": 4.443065244152646, "grad_norm": 0.0707264393568039, "learning_rate": 0.01, "loss": 1.9761, "step": 43311 }, { "epoch": 4.443372999589659, "grad_norm": 0.043551988899707794, "learning_rate": 0.01, "loss": 2.0078, "step": 43314 }, { "epoch": 4.443680755026672, "grad_norm": 0.11364515870809555, "learning_rate": 0.01, "loss": 1.9897, "step": 43317 }, { "epoch": 4.443988510463685, "grad_norm": 0.15514688193798065, "learning_rate": 0.01, "loss": 1.9957, "step": 43320 }, { "epoch": 4.4442962659006975, "grad_norm": 0.10261968523263931, "learning_rate": 0.01, "loss": 1.9876, "step": 43323 }, { "epoch": 4.44460402133771, "grad_norm": 0.09683386236429214, "learning_rate": 0.01, "loss": 1.9961, "step": 43326 }, { "epoch": 4.444911776774723, "grad_norm": 0.055726949125528336, "learning_rate": 0.01, "loss": 1.9981, "step": 43329 }, { "epoch": 4.445219532211736, "grad_norm": 0.053106311708688736, "learning_rate": 0.01, "loss": 2.0087, "step": 43332 }, { "epoch": 4.445527287648749, "grad_norm": 0.043625880032777786, "learning_rate": 0.01, "loss": 2.023, "step": 43335 }, { "epoch": 4.445835043085761, "grad_norm": 0.05958040803670883, "learning_rate": 0.01, "loss": 1.987, "step": 43338 }, { "epoch": 4.446142798522774, "grad_norm": 0.043522778898477554, "learning_rate": 0.01, "loss": 2.0004, "step": 43341 }, { "epoch": 4.446450553959787, "grad_norm": 0.038740675896406174, "learning_rate": 0.01, "loss": 1.991, "step": 43344 }, { "epoch": 4.4467583093968, "grad_norm": 0.043132517486810684, "learning_rate": 0.01, "loss": 2.006, "step": 43347 }, { "epoch": 4.4470660648338125, "grad_norm": 0.07173844426870346, "learning_rate": 0.01, "loss": 2.0257, "step": 43350 }, { "epoch": 4.447373820270824, "grad_norm": 0.06787187606096268, "learning_rate": 0.01, "loss": 1.9997, "step": 43353 }, { "epoch": 4.447681575707837, "grad_norm": 0.1125207170844078, "learning_rate": 0.01, "loss": 1.9932, "step": 43356 }, { "epoch": 4.44798933114485, "grad_norm": 0.037585411220788956, "learning_rate": 0.01, "loss": 2.0055, "step": 43359 }, { "epoch": 4.448297086581863, "grad_norm": 0.09652412682771683, "learning_rate": 0.01, "loss": 1.9937, "step": 43362 }, { "epoch": 4.4486048420188755, "grad_norm": 0.0548916831612587, "learning_rate": 0.01, "loss": 2.0251, "step": 43365 }, { "epoch": 4.448912597455888, "grad_norm": 0.07648541778326035, "learning_rate": 0.01, "loss": 2.0098, "step": 43368 }, { "epoch": 4.449220352892901, "grad_norm": 0.07968877255916595, "learning_rate": 0.01, "loss": 1.9836, "step": 43371 }, { "epoch": 4.449528108329914, "grad_norm": 0.046214036643505096, "learning_rate": 0.01, "loss": 1.9994, "step": 43374 }, { "epoch": 4.449835863766927, "grad_norm": 0.03589135408401489, "learning_rate": 0.01, "loss": 2.0122, "step": 43377 }, { "epoch": 4.450143619203939, "grad_norm": 0.1217019185423851, "learning_rate": 0.01, "loss": 1.9895, "step": 43380 }, { "epoch": 4.450451374640952, "grad_norm": 0.14711309969425201, "learning_rate": 0.01, "loss": 1.9824, "step": 43383 }, { "epoch": 4.450759130077965, "grad_norm": 0.15004196763038635, "learning_rate": 0.01, "loss": 1.9774, "step": 43386 }, { "epoch": 4.451066885514978, "grad_norm": 0.143090158700943, "learning_rate": 0.01, "loss": 2.0206, "step": 43389 }, { "epoch": 4.4513746409519905, "grad_norm": 0.06957674771547318, "learning_rate": 0.01, "loss": 1.9934, "step": 43392 }, { "epoch": 4.451682396389003, "grad_norm": 0.10823699831962585, "learning_rate": 0.01, "loss": 2.0141, "step": 43395 }, { "epoch": 4.451990151826015, "grad_norm": 0.0608796700835228, "learning_rate": 0.01, "loss": 1.9998, "step": 43398 }, { "epoch": 4.452297907263028, "grad_norm": 0.038806330412626266, "learning_rate": 0.01, "loss": 1.9815, "step": 43401 }, { "epoch": 4.452605662700041, "grad_norm": 0.053961463272571564, "learning_rate": 0.01, "loss": 2.0196, "step": 43404 }, { "epoch": 4.4529134181370535, "grad_norm": 0.03717347979545593, "learning_rate": 0.01, "loss": 1.9702, "step": 43407 }, { "epoch": 4.453221173574066, "grad_norm": 0.04289384186267853, "learning_rate": 0.01, "loss": 1.9965, "step": 43410 }, { "epoch": 4.453528929011079, "grad_norm": 0.08649279922246933, "learning_rate": 0.01, "loss": 2.0135, "step": 43413 }, { "epoch": 4.453836684448092, "grad_norm": 0.1384621560573578, "learning_rate": 0.01, "loss": 1.9779, "step": 43416 }, { "epoch": 4.454144439885105, "grad_norm": 0.09169614315032959, "learning_rate": 0.01, "loss": 1.968, "step": 43419 }, { "epoch": 4.4544521953221174, "grad_norm": 0.07004016637802124, "learning_rate": 0.01, "loss": 1.9986, "step": 43422 }, { "epoch": 4.45475995075913, "grad_norm": 0.0574488528072834, "learning_rate": 0.01, "loss": 1.9666, "step": 43425 }, { "epoch": 4.455067706196143, "grad_norm": 0.048521172255277634, "learning_rate": 0.01, "loss": 1.9769, "step": 43428 }, { "epoch": 4.455375461633156, "grad_norm": 0.11481918394565582, "learning_rate": 0.01, "loss": 1.9951, "step": 43431 }, { "epoch": 4.455683217070169, "grad_norm": 0.09158429503440857, "learning_rate": 0.01, "loss": 1.9884, "step": 43434 }, { "epoch": 4.455990972507181, "grad_norm": 0.039962366223335266, "learning_rate": 0.01, "loss": 1.9947, "step": 43437 }, { "epoch": 4.456298727944194, "grad_norm": 0.04112045839428902, "learning_rate": 0.01, "loss": 1.9806, "step": 43440 }, { "epoch": 4.456606483381206, "grad_norm": 0.057623397558927536, "learning_rate": 0.01, "loss": 2.0006, "step": 43443 }, { "epoch": 4.456914238818219, "grad_norm": 0.0683390349149704, "learning_rate": 0.01, "loss": 2.0118, "step": 43446 }, { "epoch": 4.457221994255232, "grad_norm": 0.09766959398984909, "learning_rate": 0.01, "loss": 2.0217, "step": 43449 }, { "epoch": 4.457529749692244, "grad_norm": 0.041423216462135315, "learning_rate": 0.01, "loss": 1.9732, "step": 43452 }, { "epoch": 4.457837505129257, "grad_norm": 0.08813819289207458, "learning_rate": 0.01, "loss": 1.9777, "step": 43455 }, { "epoch": 4.45814526056627, "grad_norm": 0.06834255903959274, "learning_rate": 0.01, "loss": 2.0154, "step": 43458 }, { "epoch": 4.458453016003283, "grad_norm": 0.08782140165567398, "learning_rate": 0.01, "loss": 2.0052, "step": 43461 }, { "epoch": 4.4587607714402955, "grad_norm": 0.08695351332426071, "learning_rate": 0.01, "loss": 1.9848, "step": 43464 }, { "epoch": 4.459068526877308, "grad_norm": 0.09302899986505508, "learning_rate": 0.01, "loss": 1.9906, "step": 43467 }, { "epoch": 4.459376282314321, "grad_norm": 0.06920462846755981, "learning_rate": 0.01, "loss": 1.9963, "step": 43470 }, { "epoch": 4.459684037751334, "grad_norm": 0.04809924215078354, "learning_rate": 0.01, "loss": 2.0079, "step": 43473 }, { "epoch": 4.459991793188347, "grad_norm": 0.06642390787601471, "learning_rate": 0.01, "loss": 1.9854, "step": 43476 }, { "epoch": 4.460299548625359, "grad_norm": 0.04986566677689552, "learning_rate": 0.01, "loss": 1.9835, "step": 43479 }, { "epoch": 4.460607304062372, "grad_norm": 0.061649248003959656, "learning_rate": 0.01, "loss": 2.0061, "step": 43482 }, { "epoch": 4.460915059499385, "grad_norm": 0.049117859452962875, "learning_rate": 0.01, "loss": 2.0257, "step": 43485 }, { "epoch": 4.461222814936397, "grad_norm": 0.049163367599248886, "learning_rate": 0.01, "loss": 1.9871, "step": 43488 }, { "epoch": 4.46153057037341, "grad_norm": 0.036282382905483246, "learning_rate": 0.01, "loss": 2.0089, "step": 43491 }, { "epoch": 4.461838325810422, "grad_norm": 0.10942814499139786, "learning_rate": 0.01, "loss": 2.0058, "step": 43494 }, { "epoch": 4.462146081247435, "grad_norm": 0.06589425355195999, "learning_rate": 0.01, "loss": 1.9831, "step": 43497 }, { "epoch": 4.462453836684448, "grad_norm": 0.0967121571302414, "learning_rate": 0.01, "loss": 2.0149, "step": 43500 }, { "epoch": 4.462761592121461, "grad_norm": 0.0673070177435875, "learning_rate": 0.01, "loss": 1.9889, "step": 43503 }, { "epoch": 4.4630693475584735, "grad_norm": 0.09439177066087723, "learning_rate": 0.01, "loss": 1.9928, "step": 43506 }, { "epoch": 4.463377102995486, "grad_norm": 0.06875397264957428, "learning_rate": 0.01, "loss": 1.9826, "step": 43509 }, { "epoch": 4.463684858432499, "grad_norm": 0.0774875059723854, "learning_rate": 0.01, "loss": 1.9729, "step": 43512 }, { "epoch": 4.463992613869512, "grad_norm": 0.07138268649578094, "learning_rate": 0.01, "loss": 2.0024, "step": 43515 }, { "epoch": 4.464300369306525, "grad_norm": 0.03174857050180435, "learning_rate": 0.01, "loss": 1.9842, "step": 43518 }, { "epoch": 4.464608124743537, "grad_norm": 0.11445671319961548, "learning_rate": 0.01, "loss": 1.967, "step": 43521 }, { "epoch": 4.46491588018055, "grad_norm": 0.041139326989650726, "learning_rate": 0.01, "loss": 1.9953, "step": 43524 }, { "epoch": 4.465223635617563, "grad_norm": 0.043187763541936874, "learning_rate": 0.01, "loss": 2.024, "step": 43527 }, { "epoch": 4.465531391054576, "grad_norm": 0.09367438405752182, "learning_rate": 0.01, "loss": 2.0089, "step": 43530 }, { "epoch": 4.465839146491588, "grad_norm": 0.07866624742746353, "learning_rate": 0.01, "loss": 1.9997, "step": 43533 }, { "epoch": 4.4661469019286, "grad_norm": 0.08221796900033951, "learning_rate": 0.01, "loss": 1.9908, "step": 43536 }, { "epoch": 4.466454657365613, "grad_norm": 0.07687985897064209, "learning_rate": 0.01, "loss": 1.9927, "step": 43539 }, { "epoch": 4.466762412802626, "grad_norm": 0.042440030723810196, "learning_rate": 0.01, "loss": 2.0002, "step": 43542 }, { "epoch": 4.467070168239639, "grad_norm": 0.03422633558511734, "learning_rate": 0.01, "loss": 1.9868, "step": 43545 }, { "epoch": 4.4673779236766515, "grad_norm": 0.05047105997800827, "learning_rate": 0.01, "loss": 1.9935, "step": 43548 }, { "epoch": 4.467685679113664, "grad_norm": 0.0817263275384903, "learning_rate": 0.01, "loss": 2.0029, "step": 43551 }, { "epoch": 4.467993434550677, "grad_norm": 0.07055595517158508, "learning_rate": 0.01, "loss": 2.008, "step": 43554 }, { "epoch": 4.46830118998769, "grad_norm": 0.12304790318012238, "learning_rate": 0.01, "loss": 2.0027, "step": 43557 }, { "epoch": 4.468608945424703, "grad_norm": 0.057413533329963684, "learning_rate": 0.01, "loss": 2.0168, "step": 43560 }, { "epoch": 4.468916700861715, "grad_norm": 0.1333860456943512, "learning_rate": 0.01, "loss": 2.0154, "step": 43563 }, { "epoch": 4.469224456298728, "grad_norm": 0.04758629575371742, "learning_rate": 0.01, "loss": 1.9882, "step": 43566 }, { "epoch": 4.469532211735741, "grad_norm": 0.0403926707804203, "learning_rate": 0.01, "loss": 2.0074, "step": 43569 }, { "epoch": 4.469839967172754, "grad_norm": 0.05051876977086067, "learning_rate": 0.01, "loss": 1.9798, "step": 43572 }, { "epoch": 4.470147722609767, "grad_norm": 0.12117879837751389, "learning_rate": 0.01, "loss": 1.9886, "step": 43575 }, { "epoch": 4.4704554780467785, "grad_norm": 0.0969235822558403, "learning_rate": 0.01, "loss": 1.9754, "step": 43578 }, { "epoch": 4.470763233483791, "grad_norm": 0.10259240120649338, "learning_rate": 0.01, "loss": 2.0028, "step": 43581 }, { "epoch": 4.471070988920804, "grad_norm": 0.056327935308218, "learning_rate": 0.01, "loss": 2.0223, "step": 43584 }, { "epoch": 4.471378744357817, "grad_norm": 0.09567862004041672, "learning_rate": 0.01, "loss": 2.026, "step": 43587 }, { "epoch": 4.47168649979483, "grad_norm": 0.0687853991985321, "learning_rate": 0.01, "loss": 2.011, "step": 43590 }, { "epoch": 4.471994255231842, "grad_norm": 0.03400299698114395, "learning_rate": 0.01, "loss": 2.0126, "step": 43593 }, { "epoch": 4.472302010668855, "grad_norm": 0.05291087552905083, "learning_rate": 0.01, "loss": 2.0251, "step": 43596 }, { "epoch": 4.472609766105868, "grad_norm": 0.09208089858293533, "learning_rate": 0.01, "loss": 1.9933, "step": 43599 }, { "epoch": 4.472917521542881, "grad_norm": 0.08073403686285019, "learning_rate": 0.01, "loss": 1.9934, "step": 43602 }, { "epoch": 4.4732252769798935, "grad_norm": 0.04130372405052185, "learning_rate": 0.01, "loss": 1.9883, "step": 43605 }, { "epoch": 4.473533032416906, "grad_norm": 0.06413860619068146, "learning_rate": 0.01, "loss": 1.9817, "step": 43608 }, { "epoch": 4.473840787853919, "grad_norm": 0.10064514726400375, "learning_rate": 0.01, "loss": 1.9879, "step": 43611 }, { "epoch": 4.474148543290932, "grad_norm": 0.04522353783249855, "learning_rate": 0.01, "loss": 1.9923, "step": 43614 }, { "epoch": 4.474456298727945, "grad_norm": 0.03851405903697014, "learning_rate": 0.01, "loss": 1.9828, "step": 43617 }, { "epoch": 4.474764054164957, "grad_norm": 0.03857644274830818, "learning_rate": 0.01, "loss": 1.9754, "step": 43620 }, { "epoch": 4.475071809601969, "grad_norm": 0.07934223115444183, "learning_rate": 0.01, "loss": 1.9901, "step": 43623 }, { "epoch": 4.475379565038982, "grad_norm": 0.034822918474674225, "learning_rate": 0.01, "loss": 1.9949, "step": 43626 }, { "epoch": 4.475687320475995, "grad_norm": 0.07185374945402145, "learning_rate": 0.01, "loss": 1.9969, "step": 43629 }, { "epoch": 4.475995075913008, "grad_norm": 0.03542499616742134, "learning_rate": 0.01, "loss": 2.0171, "step": 43632 }, { "epoch": 4.47630283135002, "grad_norm": 0.034001853317022324, "learning_rate": 0.01, "loss": 2.0091, "step": 43635 }, { "epoch": 4.476610586787033, "grad_norm": 0.04940172657370567, "learning_rate": 0.01, "loss": 2.0006, "step": 43638 }, { "epoch": 4.476918342224046, "grad_norm": 0.04434162750840187, "learning_rate": 0.01, "loss": 1.983, "step": 43641 }, { "epoch": 4.477226097661059, "grad_norm": 0.08915964514017105, "learning_rate": 0.01, "loss": 1.9974, "step": 43644 }, { "epoch": 4.4775338530980715, "grad_norm": 0.05530424416065216, "learning_rate": 0.01, "loss": 1.98, "step": 43647 }, { "epoch": 4.477841608535084, "grad_norm": 0.06746593117713928, "learning_rate": 0.01, "loss": 1.9997, "step": 43650 }, { "epoch": 4.478149363972097, "grad_norm": 0.05016857758164406, "learning_rate": 0.01, "loss": 1.9764, "step": 43653 }, { "epoch": 4.47845711940911, "grad_norm": 0.04478998854756355, "learning_rate": 0.01, "loss": 1.9982, "step": 43656 }, { "epoch": 4.478764874846123, "grad_norm": 0.04121081531047821, "learning_rate": 0.01, "loss": 1.9814, "step": 43659 }, { "epoch": 4.479072630283135, "grad_norm": 0.0476783849298954, "learning_rate": 0.01, "loss": 2.0231, "step": 43662 }, { "epoch": 4.479380385720148, "grad_norm": 0.15095461905002594, "learning_rate": 0.01, "loss": 1.9695, "step": 43665 }, { "epoch": 4.47968814115716, "grad_norm": 0.06961385160684586, "learning_rate": 0.01, "loss": 2.0014, "step": 43668 }, { "epoch": 4.479995896594173, "grad_norm": 0.040564242750406265, "learning_rate": 0.01, "loss": 1.9983, "step": 43671 }, { "epoch": 4.480303652031186, "grad_norm": 0.07302725315093994, "learning_rate": 0.01, "loss": 2.0106, "step": 43674 }, { "epoch": 4.480611407468198, "grad_norm": 0.12494261562824249, "learning_rate": 0.01, "loss": 2.0155, "step": 43677 }, { "epoch": 4.480919162905211, "grad_norm": 0.06394487619400024, "learning_rate": 0.01, "loss": 2.0087, "step": 43680 }, { "epoch": 4.481226918342224, "grad_norm": 0.0740828663110733, "learning_rate": 0.01, "loss": 1.9907, "step": 43683 }, { "epoch": 4.481534673779237, "grad_norm": 0.06996148079633713, "learning_rate": 0.01, "loss": 2.0093, "step": 43686 }, { "epoch": 4.4818424292162495, "grad_norm": 0.041247058659791946, "learning_rate": 0.01, "loss": 1.9849, "step": 43689 }, { "epoch": 4.482150184653262, "grad_norm": 0.04092543199658394, "learning_rate": 0.01, "loss": 1.9997, "step": 43692 }, { "epoch": 4.482457940090275, "grad_norm": 0.05542224645614624, "learning_rate": 0.01, "loss": 2.0067, "step": 43695 }, { "epoch": 4.482765695527288, "grad_norm": 0.0780789703130722, "learning_rate": 0.01, "loss": 1.9686, "step": 43698 }, { "epoch": 4.483073450964301, "grad_norm": 0.06363578885793686, "learning_rate": 0.01, "loss": 1.9807, "step": 43701 }, { "epoch": 4.483381206401313, "grad_norm": 0.12707744538784027, "learning_rate": 0.01, "loss": 1.9427, "step": 43704 }, { "epoch": 4.483688961838326, "grad_norm": 0.14165590703487396, "learning_rate": 0.01, "loss": 1.9958, "step": 43707 }, { "epoch": 4.483996717275339, "grad_norm": 0.08245092630386353, "learning_rate": 0.01, "loss": 2.0143, "step": 43710 }, { "epoch": 4.484304472712351, "grad_norm": 0.048774946480989456, "learning_rate": 0.01, "loss": 1.9703, "step": 43713 }, { "epoch": 4.484612228149364, "grad_norm": 0.05654727295041084, "learning_rate": 0.01, "loss": 1.999, "step": 43716 }, { "epoch": 4.4849199835863764, "grad_norm": 0.046862781047821045, "learning_rate": 0.01, "loss": 1.9948, "step": 43719 }, { "epoch": 4.485227739023389, "grad_norm": 0.06660104542970657, "learning_rate": 0.01, "loss": 1.9756, "step": 43722 }, { "epoch": 4.485535494460402, "grad_norm": 0.05910905450582504, "learning_rate": 0.01, "loss": 1.992, "step": 43725 }, { "epoch": 4.485843249897415, "grad_norm": 0.05047675967216492, "learning_rate": 0.01, "loss": 2.0072, "step": 43728 }, { "epoch": 4.486151005334428, "grad_norm": 0.03190279006958008, "learning_rate": 0.01, "loss": 2.0058, "step": 43731 }, { "epoch": 4.48645876077144, "grad_norm": 0.04960602894425392, "learning_rate": 0.01, "loss": 1.9991, "step": 43734 }, { "epoch": 4.486766516208453, "grad_norm": 0.10073108971118927, "learning_rate": 0.01, "loss": 1.9863, "step": 43737 }, { "epoch": 4.487074271645466, "grad_norm": 0.051219914108514786, "learning_rate": 0.01, "loss": 1.9923, "step": 43740 }, { "epoch": 4.487382027082479, "grad_norm": 0.051645878702402115, "learning_rate": 0.01, "loss": 1.9838, "step": 43743 }, { "epoch": 4.4876897825194915, "grad_norm": 0.08252865076065063, "learning_rate": 0.01, "loss": 2.0034, "step": 43746 }, { "epoch": 4.487997537956504, "grad_norm": 0.06515108048915863, "learning_rate": 0.01, "loss": 1.9843, "step": 43749 }, { "epoch": 4.488305293393517, "grad_norm": 0.038027096539735794, "learning_rate": 0.01, "loss": 1.9856, "step": 43752 }, { "epoch": 4.48861304883053, "grad_norm": 0.06444455683231354, "learning_rate": 0.01, "loss": 1.9978, "step": 43755 }, { "epoch": 4.488920804267542, "grad_norm": 0.03724157065153122, "learning_rate": 0.01, "loss": 1.9947, "step": 43758 }, { "epoch": 4.4892285597045545, "grad_norm": 0.04831653833389282, "learning_rate": 0.01, "loss": 2.02, "step": 43761 }, { "epoch": 4.489536315141567, "grad_norm": 0.062199629843235016, "learning_rate": 0.01, "loss": 1.9885, "step": 43764 }, { "epoch": 4.48984407057858, "grad_norm": 0.047005314379930496, "learning_rate": 0.01, "loss": 2.0018, "step": 43767 }, { "epoch": 4.490151826015593, "grad_norm": 0.1316227912902832, "learning_rate": 0.01, "loss": 1.9741, "step": 43770 }, { "epoch": 4.490459581452606, "grad_norm": 0.05741405114531517, "learning_rate": 0.01, "loss": 2.0053, "step": 43773 }, { "epoch": 4.490767336889618, "grad_norm": 0.10407622158527374, "learning_rate": 0.01, "loss": 1.9765, "step": 43776 }, { "epoch": 4.491075092326631, "grad_norm": 0.05979277566075325, "learning_rate": 0.01, "loss": 1.9882, "step": 43779 }, { "epoch": 4.491382847763644, "grad_norm": 0.033753860741853714, "learning_rate": 0.01, "loss": 1.9822, "step": 43782 }, { "epoch": 4.491690603200657, "grad_norm": 0.03481614217162132, "learning_rate": 0.01, "loss": 2.0178, "step": 43785 }, { "epoch": 4.4919983586376695, "grad_norm": 0.05884877219796181, "learning_rate": 0.01, "loss": 1.98, "step": 43788 }, { "epoch": 4.492306114074682, "grad_norm": 0.10501875728368759, "learning_rate": 0.01, "loss": 2.0148, "step": 43791 }, { "epoch": 4.492613869511695, "grad_norm": 0.07319078594446182, "learning_rate": 0.01, "loss": 1.9872, "step": 43794 }, { "epoch": 4.492921624948708, "grad_norm": 0.04055263102054596, "learning_rate": 0.01, "loss": 2.0002, "step": 43797 }, { "epoch": 4.493229380385721, "grad_norm": 0.1268715113401413, "learning_rate": 0.01, "loss": 2.016, "step": 43800 }, { "epoch": 4.4935371358227325, "grad_norm": 0.0518607497215271, "learning_rate": 0.01, "loss": 1.9931, "step": 43803 }, { "epoch": 4.493844891259745, "grad_norm": 0.08158674836158752, "learning_rate": 0.01, "loss": 1.9974, "step": 43806 }, { "epoch": 4.494152646696758, "grad_norm": 0.07762011885643005, "learning_rate": 0.01, "loss": 2.0026, "step": 43809 }, { "epoch": 4.494460402133771, "grad_norm": 0.09235764294862747, "learning_rate": 0.01, "loss": 1.9937, "step": 43812 }, { "epoch": 4.494768157570784, "grad_norm": 0.050333961844444275, "learning_rate": 0.01, "loss": 2.0071, "step": 43815 }, { "epoch": 4.495075913007796, "grad_norm": 0.0546598955988884, "learning_rate": 0.01, "loss": 2.011, "step": 43818 }, { "epoch": 4.495383668444809, "grad_norm": 0.07909304648637772, "learning_rate": 0.01, "loss": 2.002, "step": 43821 }, { "epoch": 4.495691423881822, "grad_norm": 0.10644279420375824, "learning_rate": 0.01, "loss": 2.0013, "step": 43824 }, { "epoch": 4.495999179318835, "grad_norm": 0.06348330527544022, "learning_rate": 0.01, "loss": 1.9957, "step": 43827 }, { "epoch": 4.4963069347558475, "grad_norm": 0.060767900198698044, "learning_rate": 0.01, "loss": 1.9772, "step": 43830 }, { "epoch": 4.49661469019286, "grad_norm": 0.050787363201379776, "learning_rate": 0.01, "loss": 1.9858, "step": 43833 }, { "epoch": 4.496922445629873, "grad_norm": 0.0362878292798996, "learning_rate": 0.01, "loss": 2.0033, "step": 43836 }, { "epoch": 4.497230201066886, "grad_norm": 0.07702784240245819, "learning_rate": 0.01, "loss": 2.0046, "step": 43839 }, { "epoch": 4.497537956503899, "grad_norm": 0.05007346719503403, "learning_rate": 0.01, "loss": 2.0105, "step": 43842 }, { "epoch": 4.4978457119409105, "grad_norm": 0.04794207960367203, "learning_rate": 0.01, "loss": 2.0027, "step": 43845 }, { "epoch": 4.498153467377923, "grad_norm": 0.04176037013530731, "learning_rate": 0.01, "loss": 1.9922, "step": 43848 }, { "epoch": 4.498461222814936, "grad_norm": 0.06070488691329956, "learning_rate": 0.01, "loss": 1.9889, "step": 43851 }, { "epoch": 4.498768978251949, "grad_norm": 0.0750289261341095, "learning_rate": 0.01, "loss": 1.9803, "step": 43854 }, { "epoch": 4.499076733688962, "grad_norm": 0.1204972118139267, "learning_rate": 0.01, "loss": 1.9801, "step": 43857 }, { "epoch": 4.499384489125974, "grad_norm": 0.03909352049231529, "learning_rate": 0.01, "loss": 1.9919, "step": 43860 }, { "epoch": 4.499692244562987, "grad_norm": 0.060046713799238205, "learning_rate": 0.01, "loss": 2.0008, "step": 43863 }, { "epoch": 4.5, "grad_norm": 0.03170366585254669, "learning_rate": 0.01, "loss": 2.0012, "step": 43866 }, { "epoch": 4.500307755437013, "grad_norm": 0.07817841321229935, "learning_rate": 0.01, "loss": 1.9851, "step": 43869 }, { "epoch": 4.500615510874026, "grad_norm": 0.06809035688638687, "learning_rate": 0.01, "loss": 2.0199, "step": 43872 }, { "epoch": 4.500923266311038, "grad_norm": 0.0353827029466629, "learning_rate": 0.01, "loss": 2.0217, "step": 43875 }, { "epoch": 4.501231021748051, "grad_norm": 0.05174241214990616, "learning_rate": 0.01, "loss": 1.9882, "step": 43878 }, { "epoch": 4.501538777185064, "grad_norm": 0.05049923434853554, "learning_rate": 0.01, "loss": 2.0102, "step": 43881 }, { "epoch": 4.501846532622077, "grad_norm": 0.04014773294329643, "learning_rate": 0.01, "loss": 2.0177, "step": 43884 }, { "epoch": 4.502154288059089, "grad_norm": 0.08103401213884354, "learning_rate": 0.01, "loss": 1.9977, "step": 43887 }, { "epoch": 4.502462043496102, "grad_norm": 0.04270040616393089, "learning_rate": 0.01, "loss": 2.0035, "step": 43890 }, { "epoch": 4.502769798933114, "grad_norm": 0.05039724335074425, "learning_rate": 0.01, "loss": 1.9908, "step": 43893 }, { "epoch": 4.503077554370127, "grad_norm": 0.10098660737276077, "learning_rate": 0.01, "loss": 1.9876, "step": 43896 }, { "epoch": 4.50338530980714, "grad_norm": 0.08645161241292953, "learning_rate": 0.01, "loss": 1.9762, "step": 43899 }, { "epoch": 4.5036930652441525, "grad_norm": 0.06836593151092529, "learning_rate": 0.01, "loss": 2.0081, "step": 43902 }, { "epoch": 4.504000820681165, "grad_norm": 0.09137406945228577, "learning_rate": 0.01, "loss": 1.9875, "step": 43905 }, { "epoch": 4.504308576118178, "grad_norm": 0.04432569816708565, "learning_rate": 0.01, "loss": 1.985, "step": 43908 }, { "epoch": 4.504616331555191, "grad_norm": 0.03743622079491615, "learning_rate": 0.01, "loss": 2.0019, "step": 43911 }, { "epoch": 4.504924086992204, "grad_norm": 0.09280388057231903, "learning_rate": 0.01, "loss": 1.9984, "step": 43914 }, { "epoch": 4.505231842429216, "grad_norm": 0.06827981770038605, "learning_rate": 0.01, "loss": 2.017, "step": 43917 }, { "epoch": 4.505539597866229, "grad_norm": 0.13798165321350098, "learning_rate": 0.01, "loss": 1.9852, "step": 43920 }, { "epoch": 4.505847353303242, "grad_norm": 0.046148303896188736, "learning_rate": 0.01, "loss": 1.9979, "step": 43923 }, { "epoch": 4.506155108740255, "grad_norm": 0.06119319424033165, "learning_rate": 0.01, "loss": 1.9993, "step": 43926 }, { "epoch": 4.5064628641772675, "grad_norm": 0.04381044954061508, "learning_rate": 0.01, "loss": 1.9884, "step": 43929 }, { "epoch": 4.506770619614279, "grad_norm": 0.043389271944761276, "learning_rate": 0.01, "loss": 2.0023, "step": 43932 }, { "epoch": 4.507078375051293, "grad_norm": 0.0379410944879055, "learning_rate": 0.01, "loss": 2.002, "step": 43935 }, { "epoch": 4.507386130488305, "grad_norm": 0.043701719492673874, "learning_rate": 0.01, "loss": 1.9863, "step": 43938 }, { "epoch": 4.507693885925318, "grad_norm": 0.11455623805522919, "learning_rate": 0.01, "loss": 1.9864, "step": 43941 }, { "epoch": 4.5080016413623305, "grad_norm": 0.06367077678442001, "learning_rate": 0.01, "loss": 2.0068, "step": 43944 }, { "epoch": 4.508309396799343, "grad_norm": 0.12104804813861847, "learning_rate": 0.01, "loss": 2.0108, "step": 43947 }, { "epoch": 4.508617152236356, "grad_norm": 0.08374746888875961, "learning_rate": 0.01, "loss": 1.9676, "step": 43950 }, { "epoch": 4.508924907673369, "grad_norm": 0.12107446789741516, "learning_rate": 0.01, "loss": 2.0093, "step": 43953 }, { "epoch": 4.509232663110382, "grad_norm": 0.043210193514823914, "learning_rate": 0.01, "loss": 1.972, "step": 43956 }, { "epoch": 4.509540418547394, "grad_norm": 0.05984468385577202, "learning_rate": 0.01, "loss": 2.0054, "step": 43959 }, { "epoch": 4.509848173984407, "grad_norm": 0.04827868938446045, "learning_rate": 0.01, "loss": 2.0216, "step": 43962 }, { "epoch": 4.51015592942142, "grad_norm": 0.04763716831803322, "learning_rate": 0.01, "loss": 1.9968, "step": 43965 }, { "epoch": 4.510463684858433, "grad_norm": 0.045888885855674744, "learning_rate": 0.01, "loss": 1.9974, "step": 43968 }, { "epoch": 4.5107714402954455, "grad_norm": 0.06434401869773865, "learning_rate": 0.01, "loss": 2.0009, "step": 43971 }, { "epoch": 4.511079195732458, "grad_norm": 0.09480862319469452, "learning_rate": 0.01, "loss": 1.9885, "step": 43974 }, { "epoch": 4.51138695116947, "grad_norm": 0.10739507526159286, "learning_rate": 0.01, "loss": 1.9996, "step": 43977 }, { "epoch": 4.511694706606484, "grad_norm": 0.06446696072816849, "learning_rate": 0.01, "loss": 1.9798, "step": 43980 }, { "epoch": 4.512002462043496, "grad_norm": 0.0567287839949131, "learning_rate": 0.01, "loss": 1.9813, "step": 43983 }, { "epoch": 4.5123102174805085, "grad_norm": 0.0359378419816494, "learning_rate": 0.01, "loss": 1.9888, "step": 43986 }, { "epoch": 4.512617972917521, "grad_norm": 0.052227430045604706, "learning_rate": 0.01, "loss": 1.9993, "step": 43989 }, { "epoch": 4.512925728354534, "grad_norm": 0.058354392647743225, "learning_rate": 0.01, "loss": 2.0029, "step": 43992 }, { "epoch": 4.513233483791547, "grad_norm": 0.06100015714764595, "learning_rate": 0.01, "loss": 1.996, "step": 43995 }, { "epoch": 4.51354123922856, "grad_norm": 0.0857095718383789, "learning_rate": 0.01, "loss": 1.9899, "step": 43998 }, { "epoch": 4.513848994665572, "grad_norm": 0.11230607330799103, "learning_rate": 0.01, "loss": 1.9907, "step": 44001 }, { "epoch": 4.514156750102585, "grad_norm": 0.082217276096344, "learning_rate": 0.01, "loss": 1.9945, "step": 44004 }, { "epoch": 4.514464505539598, "grad_norm": 0.07966562360525131, "learning_rate": 0.01, "loss": 2.0134, "step": 44007 }, { "epoch": 4.514772260976611, "grad_norm": 0.0608987994492054, "learning_rate": 0.01, "loss": 1.9969, "step": 44010 }, { "epoch": 4.5150800164136236, "grad_norm": 0.029933054000139236, "learning_rate": 0.01, "loss": 1.9937, "step": 44013 }, { "epoch": 4.515387771850636, "grad_norm": 0.04696916788816452, "learning_rate": 0.01, "loss": 1.9903, "step": 44016 }, { "epoch": 4.515695527287649, "grad_norm": 0.0494072362780571, "learning_rate": 0.01, "loss": 2.003, "step": 44019 }, { "epoch": 4.516003282724661, "grad_norm": 0.06069015711545944, "learning_rate": 0.01, "loss": 1.9764, "step": 44022 }, { "epoch": 4.516311038161675, "grad_norm": 0.05014580115675926, "learning_rate": 0.01, "loss": 1.9922, "step": 44025 }, { "epoch": 4.516618793598687, "grad_norm": 0.04753846675157547, "learning_rate": 0.01, "loss": 1.9968, "step": 44028 }, { "epoch": 4.516926549035699, "grad_norm": 0.053366005420684814, "learning_rate": 0.01, "loss": 2.0075, "step": 44031 }, { "epoch": 4.517234304472712, "grad_norm": 0.09559368342161179, "learning_rate": 0.01, "loss": 1.9848, "step": 44034 }, { "epoch": 4.517542059909725, "grad_norm": 0.12470517307519913, "learning_rate": 0.01, "loss": 2.0018, "step": 44037 }, { "epoch": 4.517849815346738, "grad_norm": 0.0809641033411026, "learning_rate": 0.01, "loss": 2.0019, "step": 44040 }, { "epoch": 4.5181575707837505, "grad_norm": 0.04899699613451958, "learning_rate": 0.01, "loss": 1.9809, "step": 44043 }, { "epoch": 4.518465326220763, "grad_norm": 0.06368394196033478, "learning_rate": 0.01, "loss": 1.9935, "step": 44046 }, { "epoch": 4.518773081657776, "grad_norm": 0.06713195890188217, "learning_rate": 0.01, "loss": 1.9813, "step": 44049 }, { "epoch": 4.519080837094789, "grad_norm": 0.12240014225244522, "learning_rate": 0.01, "loss": 1.983, "step": 44052 }, { "epoch": 4.519388592531802, "grad_norm": 0.159604012966156, "learning_rate": 0.01, "loss": 2.0038, "step": 44055 }, { "epoch": 4.519696347968814, "grad_norm": 0.09968844056129456, "learning_rate": 0.01, "loss": 1.9851, "step": 44058 }, { "epoch": 4.520004103405827, "grad_norm": 0.1151421070098877, "learning_rate": 0.01, "loss": 1.999, "step": 44061 }, { "epoch": 4.52031185884284, "grad_norm": 0.117409847676754, "learning_rate": 0.01, "loss": 1.977, "step": 44064 }, { "epoch": 4.520619614279852, "grad_norm": 0.05031849071383476, "learning_rate": 0.01, "loss": 1.9929, "step": 44067 }, { "epoch": 4.5209273697168655, "grad_norm": 0.04156704619526863, "learning_rate": 0.01, "loss": 2.0103, "step": 44070 }, { "epoch": 4.521235125153877, "grad_norm": 0.04768043011426926, "learning_rate": 0.01, "loss": 2.0129, "step": 44073 }, { "epoch": 4.52154288059089, "grad_norm": 0.04492770880460739, "learning_rate": 0.01, "loss": 1.9951, "step": 44076 }, { "epoch": 4.521850636027903, "grad_norm": 0.0356624610722065, "learning_rate": 0.01, "loss": 1.9866, "step": 44079 }, { "epoch": 4.522158391464916, "grad_norm": 0.06383637338876724, "learning_rate": 0.01, "loss": 2.0088, "step": 44082 }, { "epoch": 4.5224661469019285, "grad_norm": 0.09323472529649734, "learning_rate": 0.01, "loss": 1.989, "step": 44085 }, { "epoch": 4.522773902338941, "grad_norm": 0.08748291432857513, "learning_rate": 0.01, "loss": 1.9946, "step": 44088 }, { "epoch": 4.523081657775954, "grad_norm": 0.12068930268287659, "learning_rate": 0.01, "loss": 1.9963, "step": 44091 }, { "epoch": 4.523389413212967, "grad_norm": 0.12926380336284637, "learning_rate": 0.01, "loss": 2.0079, "step": 44094 }, { "epoch": 4.52369716864998, "grad_norm": 0.053785938769578934, "learning_rate": 0.01, "loss": 1.9889, "step": 44097 }, { "epoch": 4.524004924086992, "grad_norm": 0.056706249713897705, "learning_rate": 0.01, "loss": 1.9529, "step": 44100 }, { "epoch": 4.524312679524005, "grad_norm": 0.040739428251981735, "learning_rate": 0.01, "loss": 1.9819, "step": 44103 }, { "epoch": 4.524620434961018, "grad_norm": 0.07002007216215134, "learning_rate": 0.01, "loss": 1.988, "step": 44106 }, { "epoch": 4.524928190398031, "grad_norm": 0.08624965697526932, "learning_rate": 0.01, "loss": 1.9939, "step": 44109 }, { "epoch": 4.525235945835043, "grad_norm": 0.05654805898666382, "learning_rate": 0.01, "loss": 1.9924, "step": 44112 }, { "epoch": 4.525543701272056, "grad_norm": 0.0629354789853096, "learning_rate": 0.01, "loss": 1.9798, "step": 44115 }, { "epoch": 4.525851456709068, "grad_norm": 0.10447383671998978, "learning_rate": 0.01, "loss": 1.9912, "step": 44118 }, { "epoch": 4.526159212146081, "grad_norm": 0.08281921595335007, "learning_rate": 0.01, "loss": 1.9917, "step": 44121 }, { "epoch": 4.526466967583094, "grad_norm": 0.10570526868104935, "learning_rate": 0.01, "loss": 2.0012, "step": 44124 }, { "epoch": 4.5267747230201065, "grad_norm": 0.040542326867580414, "learning_rate": 0.01, "loss": 1.9644, "step": 44127 }, { "epoch": 4.527082478457119, "grad_norm": 0.060009557753801346, "learning_rate": 0.01, "loss": 1.9859, "step": 44130 }, { "epoch": 4.527390233894132, "grad_norm": 0.09265922009944916, "learning_rate": 0.01, "loss": 1.9737, "step": 44133 }, { "epoch": 4.527697989331145, "grad_norm": 0.0457131452858448, "learning_rate": 0.01, "loss": 1.9718, "step": 44136 }, { "epoch": 4.528005744768158, "grad_norm": 0.038852475583553314, "learning_rate": 0.01, "loss": 1.9957, "step": 44139 }, { "epoch": 4.52831350020517, "grad_norm": 0.03757132962346077, "learning_rate": 0.01, "loss": 1.9932, "step": 44142 }, { "epoch": 4.528621255642183, "grad_norm": 0.04073426127433777, "learning_rate": 0.01, "loss": 1.988, "step": 44145 }, { "epoch": 4.528929011079196, "grad_norm": 0.05840800702571869, "learning_rate": 0.01, "loss": 1.9882, "step": 44148 }, { "epoch": 4.529236766516209, "grad_norm": 0.06951642036437988, "learning_rate": 0.01, "loss": 1.9842, "step": 44151 }, { "epoch": 4.5295445219532215, "grad_norm": 0.062115710228681564, "learning_rate": 0.01, "loss": 2.0055, "step": 44154 }, { "epoch": 4.529852277390233, "grad_norm": 0.0439622737467289, "learning_rate": 0.01, "loss": 1.969, "step": 44157 }, { "epoch": 4.530160032827247, "grad_norm": 0.06429772078990936, "learning_rate": 0.01, "loss": 2.0265, "step": 44160 }, { "epoch": 4.530467788264259, "grad_norm": 0.1072159856557846, "learning_rate": 0.01, "loss": 1.9889, "step": 44163 }, { "epoch": 4.530775543701272, "grad_norm": 0.08709017932415009, "learning_rate": 0.01, "loss": 1.9846, "step": 44166 }, { "epoch": 4.531083299138285, "grad_norm": 0.07494400441646576, "learning_rate": 0.01, "loss": 1.9814, "step": 44169 }, { "epoch": 4.531391054575297, "grad_norm": 0.09518608450889587, "learning_rate": 0.01, "loss": 1.9748, "step": 44172 }, { "epoch": 4.53169881001231, "grad_norm": 0.04578256234526634, "learning_rate": 0.01, "loss": 1.9758, "step": 44175 }, { "epoch": 4.532006565449323, "grad_norm": 0.03707175329327583, "learning_rate": 0.01, "loss": 1.9988, "step": 44178 }, { "epoch": 4.532314320886336, "grad_norm": 0.042071472853422165, "learning_rate": 0.01, "loss": 1.9992, "step": 44181 }, { "epoch": 4.5326220763233485, "grad_norm": 0.05591168627142906, "learning_rate": 0.01, "loss": 2.0053, "step": 44184 }, { "epoch": 4.532929831760361, "grad_norm": 0.04541923105716705, "learning_rate": 0.01, "loss": 1.9837, "step": 44187 }, { "epoch": 4.533237587197374, "grad_norm": 0.07019765675067902, "learning_rate": 0.01, "loss": 2.009, "step": 44190 }, { "epoch": 4.533545342634387, "grad_norm": 0.04877987504005432, "learning_rate": 0.01, "loss": 2.0081, "step": 44193 }, { "epoch": 4.5338530980714, "grad_norm": 0.04220633581280708, "learning_rate": 0.01, "loss": 2.0013, "step": 44196 }, { "epoch": 4.534160853508412, "grad_norm": 0.17436882853507996, "learning_rate": 0.01, "loss": 1.9924, "step": 44199 }, { "epoch": 4.534468608945424, "grad_norm": 0.03670203685760498, "learning_rate": 0.01, "loss": 1.9786, "step": 44202 }, { "epoch": 4.534776364382438, "grad_norm": 0.04544707387685776, "learning_rate": 0.01, "loss": 2.0046, "step": 44205 }, { "epoch": 4.53508411981945, "grad_norm": 0.03833407908678055, "learning_rate": 0.01, "loss": 2.0196, "step": 44208 }, { "epoch": 4.535391875256463, "grad_norm": 0.03395809605717659, "learning_rate": 0.01, "loss": 1.9846, "step": 44211 }, { "epoch": 4.535699630693475, "grad_norm": 0.04812290519475937, "learning_rate": 0.01, "loss": 2.0027, "step": 44214 }, { "epoch": 4.536007386130488, "grad_norm": 0.04580771178007126, "learning_rate": 0.01, "loss": 2.0093, "step": 44217 }, { "epoch": 4.536315141567501, "grad_norm": 0.04926560819149017, "learning_rate": 0.01, "loss": 2.0011, "step": 44220 }, { "epoch": 4.536622897004514, "grad_norm": 0.08950170129537582, "learning_rate": 0.01, "loss": 2.0266, "step": 44223 }, { "epoch": 4.5369306524415265, "grad_norm": 0.04416525736451149, "learning_rate": 0.01, "loss": 2.0037, "step": 44226 }, { "epoch": 4.537238407878539, "grad_norm": 0.04912101849913597, "learning_rate": 0.01, "loss": 2.0068, "step": 44229 }, { "epoch": 4.537546163315552, "grad_norm": 0.039088208228349686, "learning_rate": 0.01, "loss": 1.9946, "step": 44232 }, { "epoch": 4.537853918752565, "grad_norm": 0.03769220411777496, "learning_rate": 0.01, "loss": 1.9635, "step": 44235 }, { "epoch": 4.538161674189578, "grad_norm": 0.14437700808048248, "learning_rate": 0.01, "loss": 1.9917, "step": 44238 }, { "epoch": 4.53846942962659, "grad_norm": 0.09406454116106033, "learning_rate": 0.01, "loss": 1.997, "step": 44241 }, { "epoch": 4.538777185063603, "grad_norm": 0.0553220696747303, "learning_rate": 0.01, "loss": 1.999, "step": 44244 }, { "epoch": 4.539084940500615, "grad_norm": 0.04257743060588837, "learning_rate": 0.01, "loss": 2.0079, "step": 44247 }, { "epoch": 4.539392695937629, "grad_norm": 0.05212993547320366, "learning_rate": 0.01, "loss": 1.9864, "step": 44250 }, { "epoch": 4.539700451374641, "grad_norm": 0.050963662564754486, "learning_rate": 0.01, "loss": 1.9904, "step": 44253 }, { "epoch": 4.540008206811653, "grad_norm": 0.04554356262087822, "learning_rate": 0.01, "loss": 2.0002, "step": 44256 }, { "epoch": 4.540315962248666, "grad_norm": 0.11272426694631577, "learning_rate": 0.01, "loss": 1.9897, "step": 44259 }, { "epoch": 4.540623717685679, "grad_norm": 0.06912017613649368, "learning_rate": 0.01, "loss": 1.9837, "step": 44262 }, { "epoch": 4.540931473122692, "grad_norm": 0.057526472955942154, "learning_rate": 0.01, "loss": 1.9848, "step": 44265 }, { "epoch": 4.5412392285597045, "grad_norm": 0.035935260355472565, "learning_rate": 0.01, "loss": 1.9816, "step": 44268 }, { "epoch": 4.541546983996717, "grad_norm": 0.06651175767183304, "learning_rate": 0.01, "loss": 1.9753, "step": 44271 }, { "epoch": 4.54185473943373, "grad_norm": 0.11073578149080276, "learning_rate": 0.01, "loss": 1.9857, "step": 44274 }, { "epoch": 4.542162494870743, "grad_norm": 0.08459115028381348, "learning_rate": 0.01, "loss": 1.9724, "step": 44277 }, { "epoch": 4.542470250307756, "grad_norm": 0.04882695525884628, "learning_rate": 0.01, "loss": 2.0182, "step": 44280 }, { "epoch": 4.542778005744768, "grad_norm": 0.0500728003680706, "learning_rate": 0.01, "loss": 1.9624, "step": 44283 }, { "epoch": 4.543085761181781, "grad_norm": 0.03797118738293648, "learning_rate": 0.01, "loss": 1.9984, "step": 44286 }, { "epoch": 4.543393516618794, "grad_norm": 0.046217989176511765, "learning_rate": 0.01, "loss": 2.0108, "step": 44289 }, { "epoch": 4.543701272055806, "grad_norm": 0.057793501764535904, "learning_rate": 0.01, "loss": 1.9717, "step": 44292 }, { "epoch": 4.544009027492819, "grad_norm": 0.03469870239496231, "learning_rate": 0.01, "loss": 1.9874, "step": 44295 }, { "epoch": 4.544316782929831, "grad_norm": 0.11007340252399445, "learning_rate": 0.01, "loss": 2.0101, "step": 44298 }, { "epoch": 4.544624538366844, "grad_norm": 0.07559779286384583, "learning_rate": 0.01, "loss": 1.9773, "step": 44301 }, { "epoch": 4.544932293803857, "grad_norm": 0.10915376991033554, "learning_rate": 0.01, "loss": 2.0, "step": 44304 }, { "epoch": 4.54524004924087, "grad_norm": 0.07973102480173111, "learning_rate": 0.01, "loss": 1.9778, "step": 44307 }, { "epoch": 4.5455478046778826, "grad_norm": 0.06999395787715912, "learning_rate": 0.01, "loss": 2.0185, "step": 44310 }, { "epoch": 4.545855560114895, "grad_norm": 0.05275473743677139, "learning_rate": 0.01, "loss": 2.0153, "step": 44313 }, { "epoch": 4.546163315551908, "grad_norm": 0.054669059813022614, "learning_rate": 0.01, "loss": 1.9909, "step": 44316 }, { "epoch": 4.546471070988921, "grad_norm": 0.046913836151361465, "learning_rate": 0.01, "loss": 2.041, "step": 44319 }, { "epoch": 4.546778826425934, "grad_norm": 0.043503716588020325, "learning_rate": 0.01, "loss": 1.9777, "step": 44322 }, { "epoch": 4.5470865818629465, "grad_norm": 0.058872975409030914, "learning_rate": 0.01, "loss": 1.9936, "step": 44325 }, { "epoch": 4.547394337299959, "grad_norm": 0.08830869197845459, "learning_rate": 0.01, "loss": 2.022, "step": 44328 }, { "epoch": 4.547702092736972, "grad_norm": 0.0850677490234375, "learning_rate": 0.01, "loss": 1.9808, "step": 44331 }, { "epoch": 4.548009848173985, "grad_norm": 0.058975327759981155, "learning_rate": 0.01, "loss": 1.9956, "step": 44334 }, { "epoch": 4.548317603610997, "grad_norm": 0.04586830735206604, "learning_rate": 0.01, "loss": 1.9823, "step": 44337 }, { "epoch": 4.5486253590480095, "grad_norm": 0.046169981360435486, "learning_rate": 0.01, "loss": 2.0032, "step": 44340 }, { "epoch": 4.548933114485022, "grad_norm": 0.06343907862901688, "learning_rate": 0.01, "loss": 1.9647, "step": 44343 }, { "epoch": 4.549240869922035, "grad_norm": 0.03225269168615341, "learning_rate": 0.01, "loss": 1.9993, "step": 44346 }, { "epoch": 4.549548625359048, "grad_norm": 0.0852704718708992, "learning_rate": 0.01, "loss": 1.9803, "step": 44349 }, { "epoch": 4.549856380796061, "grad_norm": 0.058122653514146805, "learning_rate": 0.01, "loss": 1.9746, "step": 44352 }, { "epoch": 4.550164136233073, "grad_norm": 0.08065648376941681, "learning_rate": 0.01, "loss": 2.0035, "step": 44355 }, { "epoch": 4.550471891670086, "grad_norm": 0.04599373787641525, "learning_rate": 0.01, "loss": 2.0209, "step": 44358 }, { "epoch": 4.550779647107099, "grad_norm": 0.03361526504158974, "learning_rate": 0.01, "loss": 2.0019, "step": 44361 }, { "epoch": 4.551087402544112, "grad_norm": 0.04159310460090637, "learning_rate": 0.01, "loss": 2.006, "step": 44364 }, { "epoch": 4.5513951579811245, "grad_norm": 0.06349869817495346, "learning_rate": 0.01, "loss": 2.0035, "step": 44367 }, { "epoch": 4.551702913418137, "grad_norm": 0.08402734994888306, "learning_rate": 0.01, "loss": 1.9991, "step": 44370 }, { "epoch": 4.55201066885515, "grad_norm": 0.0695672482252121, "learning_rate": 0.01, "loss": 2.0002, "step": 44373 }, { "epoch": 4.552318424292163, "grad_norm": 0.038909196853637695, "learning_rate": 0.01, "loss": 1.9777, "step": 44376 }, { "epoch": 4.552626179729176, "grad_norm": 0.032846856862306595, "learning_rate": 0.01, "loss": 1.9896, "step": 44379 }, { "epoch": 4.5529339351661875, "grad_norm": 0.03906947746872902, "learning_rate": 0.01, "loss": 1.9829, "step": 44382 }, { "epoch": 4.5532416906032, "grad_norm": 0.06816279888153076, "learning_rate": 0.01, "loss": 1.9935, "step": 44385 }, { "epoch": 4.553549446040213, "grad_norm": 0.13728150725364685, "learning_rate": 0.01, "loss": 1.9883, "step": 44388 }, { "epoch": 4.553857201477226, "grad_norm": 0.11365284025669098, "learning_rate": 0.01, "loss": 2.0001, "step": 44391 }, { "epoch": 4.554164956914239, "grad_norm": 0.058936670422554016, "learning_rate": 0.01, "loss": 1.978, "step": 44394 }, { "epoch": 4.554472712351251, "grad_norm": 0.044918566942214966, "learning_rate": 0.01, "loss": 1.9966, "step": 44397 }, { "epoch": 4.554780467788264, "grad_norm": 0.04904935136437416, "learning_rate": 0.01, "loss": 2.0085, "step": 44400 }, { "epoch": 4.555088223225277, "grad_norm": 0.04912208393216133, "learning_rate": 0.01, "loss": 2.0077, "step": 44403 }, { "epoch": 4.55539597866229, "grad_norm": 0.0992012619972229, "learning_rate": 0.01, "loss": 1.9942, "step": 44406 }, { "epoch": 4.5557037340993025, "grad_norm": 0.05080341920256615, "learning_rate": 0.01, "loss": 2.004, "step": 44409 }, { "epoch": 4.556011489536315, "grad_norm": 0.10849186778068542, "learning_rate": 0.01, "loss": 2.0349, "step": 44412 }, { "epoch": 4.556319244973328, "grad_norm": 0.04914950951933861, "learning_rate": 0.01, "loss": 2.0075, "step": 44415 }, { "epoch": 4.556627000410341, "grad_norm": 0.040149789303541183, "learning_rate": 0.01, "loss": 2.0072, "step": 44418 }, { "epoch": 4.556934755847354, "grad_norm": 0.04807689040899277, "learning_rate": 0.01, "loss": 2.0102, "step": 44421 }, { "epoch": 4.557242511284366, "grad_norm": 0.03420715779066086, "learning_rate": 0.01, "loss": 2.0242, "step": 44424 }, { "epoch": 4.557550266721378, "grad_norm": 0.033837419003248215, "learning_rate": 0.01, "loss": 1.9849, "step": 44427 }, { "epoch": 4.557858022158391, "grad_norm": 0.05432615429162979, "learning_rate": 0.01, "loss": 1.9907, "step": 44430 }, { "epoch": 4.558165777595404, "grad_norm": 0.09718727320432663, "learning_rate": 0.01, "loss": 2.0001, "step": 44433 }, { "epoch": 4.558473533032417, "grad_norm": 0.09546732157468796, "learning_rate": 0.01, "loss": 2.0192, "step": 44436 }, { "epoch": 4.558781288469429, "grad_norm": 0.04132693633437157, "learning_rate": 0.01, "loss": 1.9634, "step": 44439 }, { "epoch": 4.559089043906442, "grad_norm": 0.07769662886857986, "learning_rate": 0.01, "loss": 1.9836, "step": 44442 }, { "epoch": 4.559396799343455, "grad_norm": 0.08077646046876907, "learning_rate": 0.01, "loss": 2.0083, "step": 44445 }, { "epoch": 4.559704554780468, "grad_norm": 0.0781002938747406, "learning_rate": 0.01, "loss": 1.976, "step": 44448 }, { "epoch": 4.5600123102174805, "grad_norm": 0.057897116988897324, "learning_rate": 0.01, "loss": 1.9946, "step": 44451 }, { "epoch": 4.560320065654493, "grad_norm": 0.06867967545986176, "learning_rate": 0.01, "loss": 1.9638, "step": 44454 }, { "epoch": 4.560627821091506, "grad_norm": 0.06109149381518364, "learning_rate": 0.01, "loss": 1.9623, "step": 44457 }, { "epoch": 4.560935576528519, "grad_norm": 0.038964226841926575, "learning_rate": 0.01, "loss": 2.0091, "step": 44460 }, { "epoch": 4.561243331965532, "grad_norm": 0.0609222911298275, "learning_rate": 0.01, "loss": 1.9984, "step": 44463 }, { "epoch": 4.5615510874025444, "grad_norm": 0.09428851306438446, "learning_rate": 0.01, "loss": 2.0005, "step": 44466 }, { "epoch": 4.561858842839557, "grad_norm": 0.09745607525110245, "learning_rate": 0.01, "loss": 1.9877, "step": 44469 }, { "epoch": 4.562166598276569, "grad_norm": 0.09413721412420273, "learning_rate": 0.01, "loss": 2.0004, "step": 44472 }, { "epoch": 4.562474353713582, "grad_norm": 0.0537935309112072, "learning_rate": 0.01, "loss": 2.0012, "step": 44475 }, { "epoch": 4.562782109150595, "grad_norm": 0.03104214183986187, "learning_rate": 0.01, "loss": 1.995, "step": 44478 }, { "epoch": 4.5630898645876075, "grad_norm": 0.028320681303739548, "learning_rate": 0.01, "loss": 1.9681, "step": 44481 }, { "epoch": 4.56339762002462, "grad_norm": 0.09736626595258713, "learning_rate": 0.01, "loss": 2.0077, "step": 44484 }, { "epoch": 4.563705375461633, "grad_norm": 0.03548412024974823, "learning_rate": 0.01, "loss": 2.008, "step": 44487 }, { "epoch": 4.564013130898646, "grad_norm": 0.11299438029527664, "learning_rate": 0.01, "loss": 1.9814, "step": 44490 }, { "epoch": 4.564320886335659, "grad_norm": 0.07769119739532471, "learning_rate": 0.01, "loss": 2.0133, "step": 44493 }, { "epoch": 4.564628641772671, "grad_norm": 0.04660061001777649, "learning_rate": 0.01, "loss": 1.9923, "step": 44496 }, { "epoch": 4.564936397209684, "grad_norm": 0.045153431594371796, "learning_rate": 0.01, "loss": 2.012, "step": 44499 }, { "epoch": 4.565244152646697, "grad_norm": 0.040483929216861725, "learning_rate": 0.01, "loss": 2.0136, "step": 44502 }, { "epoch": 4.56555190808371, "grad_norm": 0.04947364702820778, "learning_rate": 0.01, "loss": 1.9962, "step": 44505 }, { "epoch": 4.5658596635207225, "grad_norm": 0.0688960924744606, "learning_rate": 0.01, "loss": 1.9869, "step": 44508 }, { "epoch": 4.566167418957735, "grad_norm": 0.11255665123462677, "learning_rate": 0.01, "loss": 1.9831, "step": 44511 }, { "epoch": 4.566475174394748, "grad_norm": 0.07215286791324615, "learning_rate": 0.01, "loss": 2.0106, "step": 44514 }, { "epoch": 4.56678292983176, "grad_norm": 0.07593537867069244, "learning_rate": 0.01, "loss": 1.9867, "step": 44517 }, { "epoch": 4.567090685268773, "grad_norm": 0.053777799010276794, "learning_rate": 0.01, "loss": 1.9999, "step": 44520 }, { "epoch": 4.5673984407057855, "grad_norm": 0.04630263149738312, "learning_rate": 0.01, "loss": 1.9829, "step": 44523 }, { "epoch": 4.567706196142798, "grad_norm": 0.1870114654302597, "learning_rate": 0.01, "loss": 2.0189, "step": 44526 }, { "epoch": 4.568013951579811, "grad_norm": 0.04411546140909195, "learning_rate": 0.01, "loss": 1.9801, "step": 44529 }, { "epoch": 4.568321707016824, "grad_norm": 0.04254088178277016, "learning_rate": 0.01, "loss": 1.9802, "step": 44532 }, { "epoch": 4.568629462453837, "grad_norm": 0.048210930079221725, "learning_rate": 0.01, "loss": 1.9613, "step": 44535 }, { "epoch": 4.568937217890849, "grad_norm": 0.046062175184488297, "learning_rate": 0.01, "loss": 2.009, "step": 44538 }, { "epoch": 4.569244973327862, "grad_norm": 0.04656999930739403, "learning_rate": 0.01, "loss": 2.0012, "step": 44541 }, { "epoch": 4.569552728764875, "grad_norm": 0.04990769177675247, "learning_rate": 0.01, "loss": 1.9798, "step": 44544 }, { "epoch": 4.569860484201888, "grad_norm": 0.06637271493673325, "learning_rate": 0.01, "loss": 1.9974, "step": 44547 }, { "epoch": 4.5701682396389005, "grad_norm": 0.0446067713201046, "learning_rate": 0.01, "loss": 2.0068, "step": 44550 }, { "epoch": 4.570475995075913, "grad_norm": 0.044182851910591125, "learning_rate": 0.01, "loss": 1.9897, "step": 44553 }, { "epoch": 4.570783750512926, "grad_norm": 0.03427280858159065, "learning_rate": 0.01, "loss": 2.0013, "step": 44556 }, { "epoch": 4.571091505949939, "grad_norm": 0.0435144379734993, "learning_rate": 0.01, "loss": 1.9985, "step": 44559 }, { "epoch": 4.571399261386951, "grad_norm": 0.046480659395456314, "learning_rate": 0.01, "loss": 1.9871, "step": 44562 }, { "epoch": 4.5717070168239635, "grad_norm": 0.047410015016794205, "learning_rate": 0.01, "loss": 1.9871, "step": 44565 }, { "epoch": 4.572014772260976, "grad_norm": 0.0613541379570961, "learning_rate": 0.01, "loss": 1.967, "step": 44568 }, { "epoch": 4.572322527697989, "grad_norm": 0.04605888947844505, "learning_rate": 0.01, "loss": 1.9939, "step": 44571 }, { "epoch": 4.572630283135002, "grad_norm": 0.08377258479595184, "learning_rate": 0.01, "loss": 2.0123, "step": 44574 }, { "epoch": 4.572938038572015, "grad_norm": 0.09191520512104034, "learning_rate": 0.01, "loss": 1.984, "step": 44577 }, { "epoch": 4.573245794009027, "grad_norm": 0.11382446438074112, "learning_rate": 0.01, "loss": 1.9947, "step": 44580 }, { "epoch": 4.57355354944604, "grad_norm": 0.05597241222858429, "learning_rate": 0.01, "loss": 1.9919, "step": 44583 }, { "epoch": 4.573861304883053, "grad_norm": 0.044153302907943726, "learning_rate": 0.01, "loss": 1.9974, "step": 44586 }, { "epoch": 4.574169060320066, "grad_norm": 0.0449075847864151, "learning_rate": 0.01, "loss": 1.9689, "step": 44589 }, { "epoch": 4.5744768157570785, "grad_norm": 0.06252507120370865, "learning_rate": 0.01, "loss": 2.0106, "step": 44592 }, { "epoch": 4.574784571194091, "grad_norm": 0.047335173934698105, "learning_rate": 0.01, "loss": 1.9913, "step": 44595 }, { "epoch": 4.575092326631104, "grad_norm": 0.046538159251213074, "learning_rate": 0.01, "loss": 1.9753, "step": 44598 }, { "epoch": 4.575400082068117, "grad_norm": 0.043892424553632736, "learning_rate": 0.01, "loss": 1.9841, "step": 44601 }, { "epoch": 4.57570783750513, "grad_norm": 0.14133551716804504, "learning_rate": 0.01, "loss": 1.992, "step": 44604 }, { "epoch": 4.5760155929421416, "grad_norm": 0.06392577290534973, "learning_rate": 0.01, "loss": 1.9972, "step": 44607 }, { "epoch": 4.576323348379154, "grad_norm": 0.07599660754203796, "learning_rate": 0.01, "loss": 2.0064, "step": 44610 }, { "epoch": 4.576631103816167, "grad_norm": 0.0688132718205452, "learning_rate": 0.01, "loss": 1.9937, "step": 44613 }, { "epoch": 4.57693885925318, "grad_norm": 0.05262705683708191, "learning_rate": 0.01, "loss": 2.0085, "step": 44616 }, { "epoch": 4.577246614690193, "grad_norm": 0.04527880623936653, "learning_rate": 0.01, "loss": 1.9785, "step": 44619 }, { "epoch": 4.5775543701272055, "grad_norm": 0.05383728817105293, "learning_rate": 0.01, "loss": 2.0036, "step": 44622 }, { "epoch": 4.577862125564218, "grad_norm": 0.07166964560747147, "learning_rate": 0.01, "loss": 1.9983, "step": 44625 }, { "epoch": 4.578169881001231, "grad_norm": 0.04192749038338661, "learning_rate": 0.01, "loss": 1.9627, "step": 44628 }, { "epoch": 4.578477636438244, "grad_norm": 0.06395778805017471, "learning_rate": 0.01, "loss": 2.0003, "step": 44631 }, { "epoch": 4.578785391875257, "grad_norm": 0.05522435903549194, "learning_rate": 0.01, "loss": 2.0137, "step": 44634 }, { "epoch": 4.579093147312269, "grad_norm": 0.0383891724050045, "learning_rate": 0.01, "loss": 1.9754, "step": 44637 }, { "epoch": 4.579400902749282, "grad_norm": 0.11526113003492355, "learning_rate": 0.01, "loss": 2.0159, "step": 44640 }, { "epoch": 4.579708658186295, "grad_norm": 0.04987471178174019, "learning_rate": 0.01, "loss": 1.9878, "step": 44643 }, { "epoch": 4.580016413623308, "grad_norm": 0.055467016994953156, "learning_rate": 0.01, "loss": 1.991, "step": 44646 }, { "epoch": 4.5803241690603205, "grad_norm": 0.08823923766613007, "learning_rate": 0.01, "loss": 1.9787, "step": 44649 }, { "epoch": 4.580631924497332, "grad_norm": 0.06108856573700905, "learning_rate": 0.01, "loss": 1.9844, "step": 44652 }, { "epoch": 4.580939679934345, "grad_norm": 0.046790335327386856, "learning_rate": 0.01, "loss": 2.021, "step": 44655 }, { "epoch": 4.581247435371358, "grad_norm": 0.04152047634124756, "learning_rate": 0.01, "loss": 1.9674, "step": 44658 }, { "epoch": 4.581555190808371, "grad_norm": 0.06719448417425156, "learning_rate": 0.01, "loss": 1.9794, "step": 44661 }, { "epoch": 4.5818629462453835, "grad_norm": 0.14338631927967072, "learning_rate": 0.01, "loss": 2.0153, "step": 44664 }, { "epoch": 4.582170701682396, "grad_norm": 0.06660101562738419, "learning_rate": 0.01, "loss": 2.0095, "step": 44667 }, { "epoch": 4.582478457119409, "grad_norm": 0.0552043579518795, "learning_rate": 0.01, "loss": 2.0049, "step": 44670 }, { "epoch": 4.582786212556422, "grad_norm": 0.08016394078731537, "learning_rate": 0.01, "loss": 1.9783, "step": 44673 }, { "epoch": 4.583093967993435, "grad_norm": 0.05147711932659149, "learning_rate": 0.01, "loss": 1.9912, "step": 44676 }, { "epoch": 4.583401723430447, "grad_norm": 0.044893477112054825, "learning_rate": 0.01, "loss": 2.0132, "step": 44679 }, { "epoch": 4.58370947886746, "grad_norm": 0.1312035769224167, "learning_rate": 0.01, "loss": 1.994, "step": 44682 }, { "epoch": 4.584017234304473, "grad_norm": 0.0376642569899559, "learning_rate": 0.01, "loss": 1.951, "step": 44685 }, { "epoch": 4.584324989741486, "grad_norm": 0.042604606598615646, "learning_rate": 0.01, "loss": 1.9933, "step": 44688 }, { "epoch": 4.5846327451784985, "grad_norm": 0.04277000203728676, "learning_rate": 0.01, "loss": 2.0092, "step": 44691 }, { "epoch": 4.584940500615511, "grad_norm": 0.07913113385438919, "learning_rate": 0.01, "loss": 1.9953, "step": 44694 }, { "epoch": 4.585248256052523, "grad_norm": 0.08000600337982178, "learning_rate": 0.01, "loss": 1.9983, "step": 44697 }, { "epoch": 4.585556011489536, "grad_norm": 0.05239587649703026, "learning_rate": 0.01, "loss": 1.9665, "step": 44700 }, { "epoch": 4.585863766926549, "grad_norm": 0.04813678562641144, "learning_rate": 0.01, "loss": 1.9891, "step": 44703 }, { "epoch": 4.5861715223635615, "grad_norm": 0.11590283364057541, "learning_rate": 0.01, "loss": 2.0043, "step": 44706 }, { "epoch": 4.586479277800574, "grad_norm": 0.19828790426254272, "learning_rate": 0.01, "loss": 1.9754, "step": 44709 }, { "epoch": 4.586787033237587, "grad_norm": 0.14478129148483276, "learning_rate": 0.01, "loss": 1.9801, "step": 44712 }, { "epoch": 4.5870947886746, "grad_norm": 0.042516618967056274, "learning_rate": 0.01, "loss": 1.9996, "step": 44715 }, { "epoch": 4.587402544111613, "grad_norm": 0.04690965265035629, "learning_rate": 0.01, "loss": 2.0056, "step": 44718 }, { "epoch": 4.587710299548625, "grad_norm": 0.035743579268455505, "learning_rate": 0.01, "loss": 1.9923, "step": 44721 }, { "epoch": 4.588018054985638, "grad_norm": 0.041939638555049896, "learning_rate": 0.01, "loss": 2.0091, "step": 44724 }, { "epoch": 4.588325810422651, "grad_norm": 0.039647046476602554, "learning_rate": 0.01, "loss": 1.9768, "step": 44727 }, { "epoch": 4.588633565859664, "grad_norm": 0.04442642256617546, "learning_rate": 0.01, "loss": 1.9663, "step": 44730 }, { "epoch": 4.5889413212966765, "grad_norm": 0.08688386529684067, "learning_rate": 0.01, "loss": 1.9929, "step": 44733 }, { "epoch": 4.589249076733689, "grad_norm": 0.03309828042984009, "learning_rate": 0.01, "loss": 1.9997, "step": 44736 }, { "epoch": 4.589556832170702, "grad_norm": 0.1094844713807106, "learning_rate": 0.01, "loss": 1.9716, "step": 44739 }, { "epoch": 4.589864587607714, "grad_norm": 0.04613770171999931, "learning_rate": 0.01, "loss": 1.9908, "step": 44742 }, { "epoch": 4.590172343044727, "grad_norm": 0.10529785603284836, "learning_rate": 0.01, "loss": 2.0468, "step": 44745 }, { "epoch": 4.5904800984817395, "grad_norm": 0.09901240468025208, "learning_rate": 0.01, "loss": 1.9821, "step": 44748 }, { "epoch": 4.590787853918752, "grad_norm": 0.08486896753311157, "learning_rate": 0.01, "loss": 1.9962, "step": 44751 }, { "epoch": 4.591095609355765, "grad_norm": 0.05477188900113106, "learning_rate": 0.01, "loss": 1.9898, "step": 44754 }, { "epoch": 4.591403364792778, "grad_norm": 0.03555532172322273, "learning_rate": 0.01, "loss": 1.9894, "step": 44757 }, { "epoch": 4.591711120229791, "grad_norm": 0.0637480840086937, "learning_rate": 0.01, "loss": 2.0144, "step": 44760 }, { "epoch": 4.5920188756668034, "grad_norm": 0.09844323992729187, "learning_rate": 0.01, "loss": 1.9993, "step": 44763 }, { "epoch": 4.592326631103816, "grad_norm": 0.06749750673770905, "learning_rate": 0.01, "loss": 1.9775, "step": 44766 }, { "epoch": 4.592634386540829, "grad_norm": 0.0942719504237175, "learning_rate": 0.01, "loss": 1.9971, "step": 44769 }, { "epoch": 4.592942141977842, "grad_norm": 0.043663688004016876, "learning_rate": 0.01, "loss": 1.9633, "step": 44772 }, { "epoch": 4.593249897414855, "grad_norm": 0.1031889095902443, "learning_rate": 0.01, "loss": 1.9833, "step": 44775 }, { "epoch": 4.593557652851867, "grad_norm": 0.060684684664011, "learning_rate": 0.01, "loss": 2.0057, "step": 44778 }, { "epoch": 4.59386540828888, "grad_norm": 0.08744547516107559, "learning_rate": 0.01, "loss": 2.0255, "step": 44781 }, { "epoch": 4.594173163725893, "grad_norm": 0.05547311529517174, "learning_rate": 0.01, "loss": 1.9884, "step": 44784 }, { "epoch": 4.594480919162905, "grad_norm": 0.05069347843527794, "learning_rate": 0.01, "loss": 2.0019, "step": 44787 }, { "epoch": 4.594788674599918, "grad_norm": 0.11832018941640854, "learning_rate": 0.01, "loss": 2.0006, "step": 44790 }, { "epoch": 4.59509643003693, "grad_norm": 0.09115731716156006, "learning_rate": 0.01, "loss": 2.0098, "step": 44793 }, { "epoch": 4.595404185473943, "grad_norm": 0.03262989968061447, "learning_rate": 0.01, "loss": 2.0148, "step": 44796 }, { "epoch": 4.595711940910956, "grad_norm": 0.08880288898944855, "learning_rate": 0.01, "loss": 2.0165, "step": 44799 }, { "epoch": 4.596019696347969, "grad_norm": 0.059965986758470535, "learning_rate": 0.01, "loss": 2.0037, "step": 44802 }, { "epoch": 4.5963274517849815, "grad_norm": 0.040046051144599915, "learning_rate": 0.01, "loss": 2.0233, "step": 44805 }, { "epoch": 4.596635207221994, "grad_norm": 0.05327828601002693, "learning_rate": 0.01, "loss": 1.9821, "step": 44808 }, { "epoch": 4.596942962659007, "grad_norm": 0.0532514713704586, "learning_rate": 0.01, "loss": 2.0157, "step": 44811 }, { "epoch": 4.59725071809602, "grad_norm": 0.0502878837287426, "learning_rate": 0.01, "loss": 1.9821, "step": 44814 }, { "epoch": 4.597558473533033, "grad_norm": 0.05360707640647888, "learning_rate": 0.01, "loss": 2.0084, "step": 44817 }, { "epoch": 4.597866228970045, "grad_norm": 0.08648170530796051, "learning_rate": 0.01, "loss": 2.0006, "step": 44820 }, { "epoch": 4.598173984407058, "grad_norm": 0.056780677288770676, "learning_rate": 0.01, "loss": 2.0159, "step": 44823 }, { "epoch": 4.598481739844071, "grad_norm": 0.032956380397081375, "learning_rate": 0.01, "loss": 2.0065, "step": 44826 }, { "epoch": 4.598789495281084, "grad_norm": 0.04322326183319092, "learning_rate": 0.01, "loss": 1.9687, "step": 44829 }, { "epoch": 4.599097250718096, "grad_norm": 0.10878471285104752, "learning_rate": 0.01, "loss": 2.0089, "step": 44832 }, { "epoch": 4.599405006155108, "grad_norm": 0.041027724742889404, "learning_rate": 0.01, "loss": 1.9751, "step": 44835 }, { "epoch": 4.599712761592121, "grad_norm": 0.09541984647512436, "learning_rate": 0.01, "loss": 1.9968, "step": 44838 }, { "epoch": 4.600020517029134, "grad_norm": 0.0648130476474762, "learning_rate": 0.01, "loss": 2.0191, "step": 44841 }, { "epoch": 4.600328272466147, "grad_norm": 0.10848550498485565, "learning_rate": 0.01, "loss": 2.0216, "step": 44844 }, { "epoch": 4.6006360279031595, "grad_norm": 0.062398817390203476, "learning_rate": 0.01, "loss": 1.9728, "step": 44847 }, { "epoch": 4.600943783340172, "grad_norm": 0.05789678171277046, "learning_rate": 0.01, "loss": 1.999, "step": 44850 }, { "epoch": 4.601251538777185, "grad_norm": 0.03198247775435448, "learning_rate": 0.01, "loss": 1.9765, "step": 44853 }, { "epoch": 4.601559294214198, "grad_norm": 0.05636049434542656, "learning_rate": 0.01, "loss": 1.9965, "step": 44856 }, { "epoch": 4.601867049651211, "grad_norm": 0.07471849024295807, "learning_rate": 0.01, "loss": 1.997, "step": 44859 }, { "epoch": 4.602174805088223, "grad_norm": 0.09839174151420593, "learning_rate": 0.01, "loss": 1.9864, "step": 44862 }, { "epoch": 4.602482560525236, "grad_norm": 0.08253321796655655, "learning_rate": 0.01, "loss": 2.003, "step": 44865 }, { "epoch": 4.602790315962249, "grad_norm": 0.06502492725849152, "learning_rate": 0.01, "loss": 1.9946, "step": 44868 }, { "epoch": 4.603098071399262, "grad_norm": 0.04798365756869316, "learning_rate": 0.01, "loss": 1.9914, "step": 44871 }, { "epoch": 4.6034058268362745, "grad_norm": 0.04243507608771324, "learning_rate": 0.01, "loss": 1.9924, "step": 44874 }, { "epoch": 4.603713582273286, "grad_norm": 0.03410327062010765, "learning_rate": 0.01, "loss": 1.9984, "step": 44877 }, { "epoch": 4.604021337710299, "grad_norm": 0.09597765654325485, "learning_rate": 0.01, "loss": 1.9802, "step": 44880 }, { "epoch": 4.604329093147312, "grad_norm": 0.06745591014623642, "learning_rate": 0.01, "loss": 2.0062, "step": 44883 }, { "epoch": 4.604636848584325, "grad_norm": 0.10422816127538681, "learning_rate": 0.01, "loss": 2.0042, "step": 44886 }, { "epoch": 4.6049446040213375, "grad_norm": 0.05743027105927467, "learning_rate": 0.01, "loss": 2.0116, "step": 44889 }, { "epoch": 4.60525235945835, "grad_norm": 0.04410535469651222, "learning_rate": 0.01, "loss": 1.9713, "step": 44892 }, { "epoch": 4.605560114895363, "grad_norm": 0.07853147387504578, "learning_rate": 0.01, "loss": 1.9984, "step": 44895 }, { "epoch": 4.605867870332376, "grad_norm": 0.056344617158174515, "learning_rate": 0.01, "loss": 2.0026, "step": 44898 }, { "epoch": 4.606175625769389, "grad_norm": 0.045788802206516266, "learning_rate": 0.01, "loss": 2.0154, "step": 44901 }, { "epoch": 4.606483381206401, "grad_norm": 0.07213722914457321, "learning_rate": 0.01, "loss": 1.9746, "step": 44904 }, { "epoch": 4.606791136643414, "grad_norm": 0.08182168006896973, "learning_rate": 0.01, "loss": 1.9972, "step": 44907 }, { "epoch": 4.607098892080427, "grad_norm": 0.06299345195293427, "learning_rate": 0.01, "loss": 2.0124, "step": 44910 }, { "epoch": 4.60740664751744, "grad_norm": 0.046955596655607224, "learning_rate": 0.01, "loss": 1.9883, "step": 44913 }, { "epoch": 4.607714402954453, "grad_norm": 0.05844559147953987, "learning_rate": 0.01, "loss": 1.988, "step": 44916 }, { "epoch": 4.608022158391465, "grad_norm": 0.04892998933792114, "learning_rate": 0.01, "loss": 2.0062, "step": 44919 }, { "epoch": 4.608329913828477, "grad_norm": 0.040492888540029526, "learning_rate": 0.01, "loss": 1.9933, "step": 44922 }, { "epoch": 4.60863766926549, "grad_norm": 0.043649546802043915, "learning_rate": 0.01, "loss": 2.0103, "step": 44925 }, { "epoch": 4.608945424702503, "grad_norm": 0.19346511363983154, "learning_rate": 0.01, "loss": 2.0006, "step": 44928 }, { "epoch": 4.609253180139516, "grad_norm": 0.0724545568227768, "learning_rate": 0.01, "loss": 1.9877, "step": 44931 }, { "epoch": 4.609560935576528, "grad_norm": 0.048261623829603195, "learning_rate": 0.01, "loss": 1.9988, "step": 44934 }, { "epoch": 4.609868691013541, "grad_norm": 0.06115753576159477, "learning_rate": 0.01, "loss": 1.9829, "step": 44937 }, { "epoch": 4.610176446450554, "grad_norm": 0.05957823619246483, "learning_rate": 0.01, "loss": 2.0031, "step": 44940 }, { "epoch": 4.610484201887567, "grad_norm": 0.08261608332395554, "learning_rate": 0.01, "loss": 1.9724, "step": 44943 }, { "epoch": 4.6107919573245795, "grad_norm": 0.04762962833046913, "learning_rate": 0.01, "loss": 1.9538, "step": 44946 }, { "epoch": 4.611099712761592, "grad_norm": 0.06040159985423088, "learning_rate": 0.01, "loss": 1.9893, "step": 44949 }, { "epoch": 4.611407468198605, "grad_norm": 0.06975025683641434, "learning_rate": 0.01, "loss": 2.0012, "step": 44952 }, { "epoch": 4.611715223635618, "grad_norm": 0.06275621056556702, "learning_rate": 0.01, "loss": 1.9817, "step": 44955 }, { "epoch": 4.612022979072631, "grad_norm": 0.04177596792578697, "learning_rate": 0.01, "loss": 1.9904, "step": 44958 }, { "epoch": 4.612330734509643, "grad_norm": 0.05913246050477028, "learning_rate": 0.01, "loss": 1.9898, "step": 44961 }, { "epoch": 4.612638489946656, "grad_norm": 0.05337538942694664, "learning_rate": 0.01, "loss": 2.0009, "step": 44964 }, { "epoch": 4.612946245383668, "grad_norm": 0.04543198645114899, "learning_rate": 0.01, "loss": 1.9813, "step": 44967 }, { "epoch": 4.613254000820681, "grad_norm": 0.05056144669651985, "learning_rate": 0.01, "loss": 1.9628, "step": 44970 }, { "epoch": 4.613561756257694, "grad_norm": 0.2109542340040207, "learning_rate": 0.01, "loss": 1.9783, "step": 44973 }, { "epoch": 4.613869511694706, "grad_norm": 0.11741621792316437, "learning_rate": 0.01, "loss": 1.9883, "step": 44976 }, { "epoch": 4.614177267131719, "grad_norm": 0.06218573451042175, "learning_rate": 0.01, "loss": 1.9846, "step": 44979 }, { "epoch": 4.614485022568732, "grad_norm": 0.04318585991859436, "learning_rate": 0.01, "loss": 1.9903, "step": 44982 }, { "epoch": 4.614792778005745, "grad_norm": 0.05122276023030281, "learning_rate": 0.01, "loss": 2.0001, "step": 44985 }, { "epoch": 4.6151005334427575, "grad_norm": 0.06790675967931747, "learning_rate": 0.01, "loss": 1.9929, "step": 44988 }, { "epoch": 4.61540828887977, "grad_norm": 0.06424128264188766, "learning_rate": 0.01, "loss": 1.9874, "step": 44991 }, { "epoch": 4.615716044316783, "grad_norm": 0.055609673261642456, "learning_rate": 0.01, "loss": 1.9943, "step": 44994 }, { "epoch": 4.616023799753796, "grad_norm": 0.04479989409446716, "learning_rate": 0.01, "loss": 1.9931, "step": 44997 }, { "epoch": 4.616331555190809, "grad_norm": 0.046773526817560196, "learning_rate": 0.01, "loss": 1.9935, "step": 45000 }, { "epoch": 4.616639310627821, "grad_norm": 0.044277340173721313, "learning_rate": 0.01, "loss": 2.0009, "step": 45003 }, { "epoch": 4.616947066064834, "grad_norm": 0.06604879349470139, "learning_rate": 0.01, "loss": 2.0009, "step": 45006 }, { "epoch": 4.617254821501847, "grad_norm": 0.07454725354909897, "learning_rate": 0.01, "loss": 2.0105, "step": 45009 }, { "epoch": 4.617562576938859, "grad_norm": 0.09837469458580017, "learning_rate": 0.01, "loss": 1.998, "step": 45012 }, { "epoch": 4.617870332375872, "grad_norm": 0.12640568614006042, "learning_rate": 0.01, "loss": 1.9884, "step": 45015 }, { "epoch": 4.618178087812884, "grad_norm": 0.06357913464307785, "learning_rate": 0.01, "loss": 1.9416, "step": 45018 }, { "epoch": 4.618485843249897, "grad_norm": 0.05319363251328468, "learning_rate": 0.01, "loss": 2.001, "step": 45021 }, { "epoch": 4.61879359868691, "grad_norm": 0.05755499005317688, "learning_rate": 0.01, "loss": 1.9872, "step": 45024 }, { "epoch": 4.619101354123923, "grad_norm": 0.03921099379658699, "learning_rate": 0.01, "loss": 2.007, "step": 45027 }, { "epoch": 4.6194091095609355, "grad_norm": 0.062433432787656784, "learning_rate": 0.01, "loss": 1.9991, "step": 45030 }, { "epoch": 4.619716864997948, "grad_norm": 0.048542365431785583, "learning_rate": 0.01, "loss": 1.934, "step": 45033 }, { "epoch": 4.620024620434961, "grad_norm": 0.12152877449989319, "learning_rate": 0.01, "loss": 2.0085, "step": 45036 }, { "epoch": 4.620332375871974, "grad_norm": 0.052054520696401596, "learning_rate": 0.01, "loss": 1.9778, "step": 45039 }, { "epoch": 4.620640131308987, "grad_norm": 0.11336644738912582, "learning_rate": 0.01, "loss": 2.0224, "step": 45042 }, { "epoch": 4.620947886745999, "grad_norm": 0.05044134706258774, "learning_rate": 0.01, "loss": 1.9957, "step": 45045 }, { "epoch": 4.621255642183012, "grad_norm": 0.06728419661521912, "learning_rate": 0.01, "loss": 1.9987, "step": 45048 }, { "epoch": 4.621563397620025, "grad_norm": 0.0826968103647232, "learning_rate": 0.01, "loss": 1.989, "step": 45051 }, { "epoch": 4.621871153057038, "grad_norm": 0.07135532796382904, "learning_rate": 0.01, "loss": 1.9807, "step": 45054 }, { "epoch": 4.62217890849405, "grad_norm": 0.05112279951572418, "learning_rate": 0.01, "loss": 1.9866, "step": 45057 }, { "epoch": 4.6224866639310624, "grad_norm": 0.11058257520198822, "learning_rate": 0.01, "loss": 2.0042, "step": 45060 }, { "epoch": 4.622794419368075, "grad_norm": 0.12000041455030441, "learning_rate": 0.01, "loss": 1.9503, "step": 45063 }, { "epoch": 4.623102174805088, "grad_norm": 0.06073766201734543, "learning_rate": 0.01, "loss": 1.9722, "step": 45066 }, { "epoch": 4.623409930242101, "grad_norm": 0.055321842432022095, "learning_rate": 0.01, "loss": 1.9971, "step": 45069 }, { "epoch": 4.623717685679114, "grad_norm": 0.04220004379749298, "learning_rate": 0.01, "loss": 1.9731, "step": 45072 }, { "epoch": 4.624025441116126, "grad_norm": 0.03700163960456848, "learning_rate": 0.01, "loss": 1.9781, "step": 45075 }, { "epoch": 4.624333196553139, "grad_norm": 0.108041912317276, "learning_rate": 0.01, "loss": 1.9753, "step": 45078 }, { "epoch": 4.624640951990152, "grad_norm": 0.04682699963450432, "learning_rate": 0.01, "loss": 1.9974, "step": 45081 }, { "epoch": 4.624948707427165, "grad_norm": 0.07901785522699356, "learning_rate": 0.01, "loss": 1.9806, "step": 45084 }, { "epoch": 4.6252564628641775, "grad_norm": 0.07119090855121613, "learning_rate": 0.01, "loss": 1.9928, "step": 45087 }, { "epoch": 4.62556421830119, "grad_norm": 0.08904542028903961, "learning_rate": 0.01, "loss": 2.0217, "step": 45090 }, { "epoch": 4.625871973738203, "grad_norm": 0.06656675785779953, "learning_rate": 0.01, "loss": 1.9986, "step": 45093 }, { "epoch": 4.626179729175216, "grad_norm": 0.06560304015874863, "learning_rate": 0.01, "loss": 1.9716, "step": 45096 }, { "epoch": 4.626487484612229, "grad_norm": 0.05664901062846184, "learning_rate": 0.01, "loss": 1.9716, "step": 45099 }, { "epoch": 4.6267952400492405, "grad_norm": 0.0491751953959465, "learning_rate": 0.01, "loss": 1.986, "step": 45102 }, { "epoch": 4.627102995486253, "grad_norm": 0.11787789314985275, "learning_rate": 0.01, "loss": 1.9853, "step": 45105 }, { "epoch": 4.627410750923266, "grad_norm": 0.04068392515182495, "learning_rate": 0.01, "loss": 1.9926, "step": 45108 }, { "epoch": 4.627718506360279, "grad_norm": 0.03958519548177719, "learning_rate": 0.01, "loss": 1.9939, "step": 45111 }, { "epoch": 4.628026261797292, "grad_norm": 0.05667821317911148, "learning_rate": 0.01, "loss": 2.0201, "step": 45114 }, { "epoch": 4.628334017234304, "grad_norm": 0.04660872370004654, "learning_rate": 0.01, "loss": 1.9892, "step": 45117 }, { "epoch": 4.628641772671317, "grad_norm": 0.05565042048692703, "learning_rate": 0.01, "loss": 1.9992, "step": 45120 }, { "epoch": 4.62894952810833, "grad_norm": 0.05158378556370735, "learning_rate": 0.01, "loss": 1.9672, "step": 45123 }, { "epoch": 4.629257283545343, "grad_norm": 0.04500264301896095, "learning_rate": 0.01, "loss": 1.9597, "step": 45126 }, { "epoch": 4.6295650389823555, "grad_norm": 0.03781789168715477, "learning_rate": 0.01, "loss": 1.9804, "step": 45129 }, { "epoch": 4.629872794419368, "grad_norm": 0.09032827615737915, "learning_rate": 0.01, "loss": 1.9739, "step": 45132 }, { "epoch": 4.630180549856381, "grad_norm": 0.09161376953125, "learning_rate": 0.01, "loss": 1.9864, "step": 45135 }, { "epoch": 4.630488305293394, "grad_norm": 0.07650554925203323, "learning_rate": 0.01, "loss": 1.9775, "step": 45138 }, { "epoch": 4.630796060730407, "grad_norm": 0.07098524272441864, "learning_rate": 0.01, "loss": 2.0106, "step": 45141 }, { "epoch": 4.631103816167419, "grad_norm": 0.053486138582229614, "learning_rate": 0.01, "loss": 1.9687, "step": 45144 }, { "epoch": 4.631411571604431, "grad_norm": 0.06696584820747375, "learning_rate": 0.01, "loss": 1.9744, "step": 45147 }, { "epoch": 4.631719327041444, "grad_norm": 0.08330658078193665, "learning_rate": 0.01, "loss": 2.0043, "step": 45150 }, { "epoch": 4.632027082478457, "grad_norm": 0.05537624657154083, "learning_rate": 0.01, "loss": 1.99, "step": 45153 }, { "epoch": 4.63233483791547, "grad_norm": 0.07297627627849579, "learning_rate": 0.01, "loss": 1.9885, "step": 45156 }, { "epoch": 4.632642593352482, "grad_norm": 0.07492797076702118, "learning_rate": 0.01, "loss": 1.976, "step": 45159 }, { "epoch": 4.632950348789495, "grad_norm": 0.06871726363897324, "learning_rate": 0.01, "loss": 1.9877, "step": 45162 }, { "epoch": 4.633258104226508, "grad_norm": 0.04638645797967911, "learning_rate": 0.01, "loss": 1.9722, "step": 45165 }, { "epoch": 4.633565859663521, "grad_norm": 0.10735375434160233, "learning_rate": 0.01, "loss": 2.0048, "step": 45168 }, { "epoch": 4.6338736151005335, "grad_norm": 0.1550443023443222, "learning_rate": 0.01, "loss": 1.9663, "step": 45171 }, { "epoch": 4.634181370537546, "grad_norm": 0.14652806520462036, "learning_rate": 0.01, "loss": 2.0133, "step": 45174 }, { "epoch": 4.634489125974559, "grad_norm": 0.06344394385814667, "learning_rate": 0.01, "loss": 2.0081, "step": 45177 }, { "epoch": 4.634796881411572, "grad_norm": 0.03594981133937836, "learning_rate": 0.01, "loss": 2.0124, "step": 45180 }, { "epoch": 4.635104636848585, "grad_norm": 0.034205902367830276, "learning_rate": 0.01, "loss": 1.9808, "step": 45183 }, { "epoch": 4.635412392285597, "grad_norm": 0.041966572403907776, "learning_rate": 0.01, "loss": 1.9958, "step": 45186 }, { "epoch": 4.63572014772261, "grad_norm": 0.05192418769001961, "learning_rate": 0.01, "loss": 2.0068, "step": 45189 }, { "epoch": 4.636027903159622, "grad_norm": 0.04771101847290993, "learning_rate": 0.01, "loss": 1.9812, "step": 45192 }, { "epoch": 4.636335658596635, "grad_norm": 0.05367758497595787, "learning_rate": 0.01, "loss": 1.9885, "step": 45195 }, { "epoch": 4.636643414033648, "grad_norm": 0.08973217755556107, "learning_rate": 0.01, "loss": 2.0129, "step": 45198 }, { "epoch": 4.63695116947066, "grad_norm": 0.06793297082185745, "learning_rate": 0.01, "loss": 1.9893, "step": 45201 }, { "epoch": 4.637258924907673, "grad_norm": 0.06299318373203278, "learning_rate": 0.01, "loss": 2.0075, "step": 45204 }, { "epoch": 4.637566680344686, "grad_norm": 0.106409452855587, "learning_rate": 0.01, "loss": 1.9757, "step": 45207 }, { "epoch": 4.637874435781699, "grad_norm": 0.044573575258255005, "learning_rate": 0.01, "loss": 1.9644, "step": 45210 }, { "epoch": 4.638182191218712, "grad_norm": 0.04855922982096672, "learning_rate": 0.01, "loss": 1.9863, "step": 45213 }, { "epoch": 4.638489946655724, "grad_norm": 0.05310840159654617, "learning_rate": 0.01, "loss": 2.0067, "step": 45216 }, { "epoch": 4.638797702092737, "grad_norm": 0.1408100426197052, "learning_rate": 0.01, "loss": 2.016, "step": 45219 }, { "epoch": 4.63910545752975, "grad_norm": 0.07140195369720459, "learning_rate": 0.01, "loss": 1.9978, "step": 45222 }, { "epoch": 4.639413212966763, "grad_norm": 0.0864189863204956, "learning_rate": 0.01, "loss": 2.0122, "step": 45225 }, { "epoch": 4.6397209684037755, "grad_norm": 0.0877571702003479, "learning_rate": 0.01, "loss": 1.9844, "step": 45228 }, { "epoch": 4.640028723840788, "grad_norm": 0.06890790909528732, "learning_rate": 0.01, "loss": 1.9915, "step": 45231 }, { "epoch": 4.640336479277801, "grad_norm": 0.04007607325911522, "learning_rate": 0.01, "loss": 1.9682, "step": 45234 }, { "epoch": 4.640644234714813, "grad_norm": 0.09311459213495255, "learning_rate": 0.01, "loss": 1.9888, "step": 45237 }, { "epoch": 4.640951990151826, "grad_norm": 0.0721447765827179, "learning_rate": 0.01, "loss": 1.954, "step": 45240 }, { "epoch": 4.6412597455888385, "grad_norm": 0.07306293398141861, "learning_rate": 0.01, "loss": 2.02, "step": 45243 }, { "epoch": 4.641567501025851, "grad_norm": 0.044194191694259644, "learning_rate": 0.01, "loss": 1.9821, "step": 45246 }, { "epoch": 4.641875256462864, "grad_norm": 0.07482686638832092, "learning_rate": 0.01, "loss": 1.9969, "step": 45249 }, { "epoch": 4.642183011899877, "grad_norm": 0.05982416495680809, "learning_rate": 0.01, "loss": 1.9808, "step": 45252 }, { "epoch": 4.64249076733689, "grad_norm": 0.06082857400178909, "learning_rate": 0.01, "loss": 1.9854, "step": 45255 }, { "epoch": 4.642798522773902, "grad_norm": 0.12249299138784409, "learning_rate": 0.01, "loss": 1.9866, "step": 45258 }, { "epoch": 4.643106278210915, "grad_norm": 0.04660319164395332, "learning_rate": 0.01, "loss": 1.9974, "step": 45261 }, { "epoch": 4.643414033647928, "grad_norm": 0.13198639452457428, "learning_rate": 0.01, "loss": 2.0052, "step": 45264 }, { "epoch": 4.643721789084941, "grad_norm": 0.11743400990962982, "learning_rate": 0.01, "loss": 1.9944, "step": 45267 }, { "epoch": 4.6440295445219535, "grad_norm": 0.08904967457056046, "learning_rate": 0.01, "loss": 1.9585, "step": 45270 }, { "epoch": 4.644337299958966, "grad_norm": 0.051379457116127014, "learning_rate": 0.01, "loss": 1.9859, "step": 45273 }, { "epoch": 4.644645055395979, "grad_norm": 0.053696874529123306, "learning_rate": 0.01, "loss": 1.9972, "step": 45276 }, { "epoch": 4.644952810832992, "grad_norm": 0.03924264386296272, "learning_rate": 0.01, "loss": 1.9757, "step": 45279 }, { "epoch": 4.645260566270004, "grad_norm": 0.04208714887499809, "learning_rate": 0.01, "loss": 1.9968, "step": 45282 }, { "epoch": 4.6455683217070165, "grad_norm": 0.11654502153396606, "learning_rate": 0.01, "loss": 2.0116, "step": 45285 }, { "epoch": 4.645876077144029, "grad_norm": 0.06177401542663574, "learning_rate": 0.01, "loss": 1.9877, "step": 45288 }, { "epoch": 4.646183832581042, "grad_norm": 0.04343295469880104, "learning_rate": 0.01, "loss": 2.0025, "step": 45291 }, { "epoch": 4.646491588018055, "grad_norm": 0.036727067083120346, "learning_rate": 0.01, "loss": 1.9948, "step": 45294 }, { "epoch": 4.646799343455068, "grad_norm": 0.06670525670051575, "learning_rate": 0.01, "loss": 2.0094, "step": 45297 }, { "epoch": 4.64710709889208, "grad_norm": 0.08610829710960388, "learning_rate": 0.01, "loss": 1.9921, "step": 45300 }, { "epoch": 4.647414854329093, "grad_norm": 0.04402744024991989, "learning_rate": 0.01, "loss": 2.0245, "step": 45303 }, { "epoch": 4.647722609766106, "grad_norm": 0.09110675007104874, "learning_rate": 0.01, "loss": 1.9827, "step": 45306 }, { "epoch": 4.648030365203119, "grad_norm": 0.12931224703788757, "learning_rate": 0.01, "loss": 2.0143, "step": 45309 }, { "epoch": 4.6483381206401315, "grad_norm": 0.03825344517827034, "learning_rate": 0.01, "loss": 2.0175, "step": 45312 }, { "epoch": 4.648645876077144, "grad_norm": 0.04855644330382347, "learning_rate": 0.01, "loss": 1.992, "step": 45315 }, { "epoch": 4.648953631514157, "grad_norm": 0.08731474727392197, "learning_rate": 0.01, "loss": 2.0059, "step": 45318 }, { "epoch": 4.649261386951169, "grad_norm": 0.06591195613145828, "learning_rate": 0.01, "loss": 2.0107, "step": 45321 }, { "epoch": 4.649569142388183, "grad_norm": 0.051368821412324905, "learning_rate": 0.01, "loss": 2.0055, "step": 45324 }, { "epoch": 4.6498768978251945, "grad_norm": 0.09959837794303894, "learning_rate": 0.01, "loss": 2.0111, "step": 45327 }, { "epoch": 4.650184653262207, "grad_norm": 0.08669820427894592, "learning_rate": 0.01, "loss": 2.0138, "step": 45330 }, { "epoch": 4.65049240869922, "grad_norm": 0.03989667445421219, "learning_rate": 0.01, "loss": 1.996, "step": 45333 }, { "epoch": 4.650800164136233, "grad_norm": 0.033241745084524155, "learning_rate": 0.01, "loss": 2.0052, "step": 45336 }, { "epoch": 4.651107919573246, "grad_norm": 0.03395982086658478, "learning_rate": 0.01, "loss": 1.9674, "step": 45339 }, { "epoch": 4.651415675010258, "grad_norm": 0.038975682109594345, "learning_rate": 0.01, "loss": 1.9894, "step": 45342 }, { "epoch": 4.651723430447271, "grad_norm": 0.04904354363679886, "learning_rate": 0.01, "loss": 1.9875, "step": 45345 }, { "epoch": 4.652031185884284, "grad_norm": 0.11245663464069366, "learning_rate": 0.01, "loss": 1.9929, "step": 45348 }, { "epoch": 4.652338941321297, "grad_norm": 0.11784044653177261, "learning_rate": 0.01, "loss": 1.9836, "step": 45351 }, { "epoch": 4.6526466967583096, "grad_norm": 0.054707154631614685, "learning_rate": 0.01, "loss": 2.0161, "step": 45354 }, { "epoch": 4.652954452195322, "grad_norm": 0.07143942266702652, "learning_rate": 0.01, "loss": 1.9921, "step": 45357 }, { "epoch": 4.653262207632335, "grad_norm": 0.05629519745707512, "learning_rate": 0.01, "loss": 1.9809, "step": 45360 }, { "epoch": 4.653569963069348, "grad_norm": 0.03609561547636986, "learning_rate": 0.01, "loss": 1.9773, "step": 45363 }, { "epoch": 4.65387771850636, "grad_norm": 0.049254827201366425, "learning_rate": 0.01, "loss": 1.9866, "step": 45366 }, { "epoch": 4.6541854739433735, "grad_norm": 0.049836140125989914, "learning_rate": 0.01, "loss": 2.0302, "step": 45369 }, { "epoch": 4.654493229380385, "grad_norm": 0.043429117649793625, "learning_rate": 0.01, "loss": 2.0064, "step": 45372 }, { "epoch": 4.654800984817398, "grad_norm": 0.09842117130756378, "learning_rate": 0.01, "loss": 1.9911, "step": 45375 }, { "epoch": 4.655108740254411, "grad_norm": 0.06700047105550766, "learning_rate": 0.01, "loss": 1.9877, "step": 45378 }, { "epoch": 4.655416495691424, "grad_norm": 0.0717921331524849, "learning_rate": 0.01, "loss": 1.9809, "step": 45381 }, { "epoch": 4.6557242511284365, "grad_norm": 0.11065293103456497, "learning_rate": 0.01, "loss": 1.9845, "step": 45384 }, { "epoch": 4.656032006565449, "grad_norm": 0.07875839620828629, "learning_rate": 0.01, "loss": 1.9864, "step": 45387 }, { "epoch": 4.656339762002462, "grad_norm": 0.07618141919374466, "learning_rate": 0.01, "loss": 1.9818, "step": 45390 }, { "epoch": 4.656647517439475, "grad_norm": 0.05607747659087181, "learning_rate": 0.01, "loss": 2.007, "step": 45393 }, { "epoch": 4.656955272876488, "grad_norm": 0.08270974457263947, "learning_rate": 0.01, "loss": 2.0041, "step": 45396 }, { "epoch": 4.6572630283135, "grad_norm": 0.10751419514417648, "learning_rate": 0.01, "loss": 1.9982, "step": 45399 }, { "epoch": 4.657570783750513, "grad_norm": 0.07406135648488998, "learning_rate": 0.01, "loss": 1.9857, "step": 45402 }, { "epoch": 4.657878539187526, "grad_norm": 0.06641979515552521, "learning_rate": 0.01, "loss": 2.0221, "step": 45405 }, { "epoch": 4.658186294624539, "grad_norm": 0.04510427638888359, "learning_rate": 0.01, "loss": 1.9863, "step": 45408 }, { "epoch": 4.658494050061551, "grad_norm": 0.0381624698638916, "learning_rate": 0.01, "loss": 2.013, "step": 45411 }, { "epoch": 4.658801805498564, "grad_norm": 0.08912669867277145, "learning_rate": 0.01, "loss": 1.9966, "step": 45414 }, { "epoch": 4.659109560935576, "grad_norm": 0.04501614347100258, "learning_rate": 0.01, "loss": 1.9865, "step": 45417 }, { "epoch": 4.659417316372589, "grad_norm": 0.10741306841373444, "learning_rate": 0.01, "loss": 1.9793, "step": 45420 }, { "epoch": 4.659725071809602, "grad_norm": 0.05976792797446251, "learning_rate": 0.01, "loss": 2.0269, "step": 45423 }, { "epoch": 4.6600328272466145, "grad_norm": 0.09859906136989594, "learning_rate": 0.01, "loss": 2.0081, "step": 45426 }, { "epoch": 4.660340582683627, "grad_norm": 0.050599753856658936, "learning_rate": 0.01, "loss": 1.9862, "step": 45429 }, { "epoch": 4.66064833812064, "grad_norm": 0.04848974943161011, "learning_rate": 0.01, "loss": 2.0066, "step": 45432 }, { "epoch": 4.660956093557653, "grad_norm": 0.07970554381608963, "learning_rate": 0.01, "loss": 2.0102, "step": 45435 }, { "epoch": 4.661263848994666, "grad_norm": 0.06040119752287865, "learning_rate": 0.01, "loss": 2.001, "step": 45438 }, { "epoch": 4.661571604431678, "grad_norm": 0.05686302110552788, "learning_rate": 0.01, "loss": 1.9845, "step": 45441 }, { "epoch": 4.661879359868691, "grad_norm": 0.0757538229227066, "learning_rate": 0.01, "loss": 2.0019, "step": 45444 }, { "epoch": 4.662187115305704, "grad_norm": 0.07825610786676407, "learning_rate": 0.01, "loss": 2.0211, "step": 45447 }, { "epoch": 4.662494870742717, "grad_norm": 0.048520684242248535, "learning_rate": 0.01, "loss": 1.9991, "step": 45450 }, { "epoch": 4.6628026261797295, "grad_norm": 0.046711694449186325, "learning_rate": 0.01, "loss": 2.0003, "step": 45453 }, { "epoch": 4.663110381616741, "grad_norm": 0.058854326605796814, "learning_rate": 0.01, "loss": 1.9781, "step": 45456 }, { "epoch": 4.663418137053755, "grad_norm": 0.04749085754156113, "learning_rate": 0.01, "loss": 1.9882, "step": 45459 }, { "epoch": 4.663725892490767, "grad_norm": 0.05710771679878235, "learning_rate": 0.01, "loss": 1.9909, "step": 45462 }, { "epoch": 4.66403364792778, "grad_norm": 0.10407797247171402, "learning_rate": 0.01, "loss": 1.9993, "step": 45465 }, { "epoch": 4.6643414033647925, "grad_norm": 0.07726982980966568, "learning_rate": 0.01, "loss": 1.9727, "step": 45468 }, { "epoch": 4.664649158801805, "grad_norm": 0.11478970944881439, "learning_rate": 0.01, "loss": 2.0055, "step": 45471 }, { "epoch": 4.664956914238818, "grad_norm": 0.1078820750117302, "learning_rate": 0.01, "loss": 1.9855, "step": 45474 }, { "epoch": 4.665264669675831, "grad_norm": 0.06113171949982643, "learning_rate": 0.01, "loss": 1.9661, "step": 45477 }, { "epoch": 4.665572425112844, "grad_norm": 0.04639894515275955, "learning_rate": 0.01, "loss": 1.9616, "step": 45480 }, { "epoch": 4.665880180549856, "grad_norm": 0.04308944195508957, "learning_rate": 0.01, "loss": 1.9689, "step": 45483 }, { "epoch": 4.666187935986869, "grad_norm": 0.036519356071949005, "learning_rate": 0.01, "loss": 2.0034, "step": 45486 }, { "epoch": 4.666495691423882, "grad_norm": 0.04407155141234398, "learning_rate": 0.01, "loss": 1.9773, "step": 45489 }, { "epoch": 4.666803446860895, "grad_norm": 0.03955180197954178, "learning_rate": 0.01, "loss": 1.9971, "step": 45492 }, { "epoch": 4.6671112022979075, "grad_norm": 0.06779368221759796, "learning_rate": 0.01, "loss": 2.0009, "step": 45495 }, { "epoch": 4.66741895773492, "grad_norm": 0.11751384288072586, "learning_rate": 0.01, "loss": 2.0014, "step": 45498 }, { "epoch": 4.667726713171932, "grad_norm": 0.05809053033590317, "learning_rate": 0.01, "loss": 1.9988, "step": 45501 }, { "epoch": 4.668034468608946, "grad_norm": 0.07685140520334244, "learning_rate": 0.01, "loss": 1.9825, "step": 45504 }, { "epoch": 4.668342224045958, "grad_norm": 0.055087387561798096, "learning_rate": 0.01, "loss": 2.0133, "step": 45507 }, { "epoch": 4.668649979482971, "grad_norm": 0.05234852805733681, "learning_rate": 0.01, "loss": 1.997, "step": 45510 }, { "epoch": 4.668957734919983, "grad_norm": 0.048037078231573105, "learning_rate": 0.01, "loss": 1.9717, "step": 45513 }, { "epoch": 4.669265490356996, "grad_norm": 0.03744732588529587, "learning_rate": 0.01, "loss": 2.0247, "step": 45516 }, { "epoch": 4.669573245794009, "grad_norm": 0.053839731961488724, "learning_rate": 0.01, "loss": 1.9802, "step": 45519 }, { "epoch": 4.669881001231022, "grad_norm": 0.06782069802284241, "learning_rate": 0.01, "loss": 2.003, "step": 45522 }, { "epoch": 4.6701887566680345, "grad_norm": 0.05053957551717758, "learning_rate": 0.01, "loss": 2.004, "step": 45525 }, { "epoch": 4.670496512105047, "grad_norm": 0.07486964762210846, "learning_rate": 0.01, "loss": 2.0026, "step": 45528 }, { "epoch": 4.67080426754206, "grad_norm": 0.07367604970932007, "learning_rate": 0.01, "loss": 1.9788, "step": 45531 }, { "epoch": 4.671112022979073, "grad_norm": 0.06390465050935745, "learning_rate": 0.01, "loss": 1.981, "step": 45534 }, { "epoch": 4.671419778416086, "grad_norm": 0.1271253079175949, "learning_rate": 0.01, "loss": 1.9969, "step": 45537 }, { "epoch": 4.671727533853098, "grad_norm": 0.08515705168247223, "learning_rate": 0.01, "loss": 2.0074, "step": 45540 }, { "epoch": 4.672035289290111, "grad_norm": 0.03478894755244255, "learning_rate": 0.01, "loss": 1.9899, "step": 45543 }, { "epoch": 4.672343044727123, "grad_norm": 0.051871661096811295, "learning_rate": 0.01, "loss": 2.0114, "step": 45546 }, { "epoch": 4.672650800164137, "grad_norm": 0.0924333781003952, "learning_rate": 0.01, "loss": 1.9917, "step": 45549 }, { "epoch": 4.672958555601149, "grad_norm": 0.13955476880073547, "learning_rate": 0.01, "loss": 1.9923, "step": 45552 }, { "epoch": 4.673266311038161, "grad_norm": 0.07893053442239761, "learning_rate": 0.01, "loss": 1.9826, "step": 45555 }, { "epoch": 4.673574066475174, "grad_norm": 0.048040106892585754, "learning_rate": 0.01, "loss": 2.0011, "step": 45558 }, { "epoch": 4.673881821912187, "grad_norm": 0.05462590605020523, "learning_rate": 0.01, "loss": 1.9969, "step": 45561 }, { "epoch": 4.6741895773492, "grad_norm": 0.07224661111831665, "learning_rate": 0.01, "loss": 1.9815, "step": 45564 }, { "epoch": 4.6744973327862125, "grad_norm": 0.09311232715845108, "learning_rate": 0.01, "loss": 1.9797, "step": 45567 }, { "epoch": 4.674805088223225, "grad_norm": 0.051444876939058304, "learning_rate": 0.01, "loss": 1.9804, "step": 45570 }, { "epoch": 4.675112843660238, "grad_norm": 0.04749058559536934, "learning_rate": 0.01, "loss": 2.0038, "step": 45573 }, { "epoch": 4.675420599097251, "grad_norm": 0.04723438620567322, "learning_rate": 0.01, "loss": 2.0038, "step": 45576 }, { "epoch": 4.675728354534264, "grad_norm": 0.10198043286800385, "learning_rate": 0.01, "loss": 1.9845, "step": 45579 }, { "epoch": 4.676036109971276, "grad_norm": 0.07877392321825027, "learning_rate": 0.01, "loss": 1.9752, "step": 45582 }, { "epoch": 4.676343865408289, "grad_norm": 0.10339850187301636, "learning_rate": 0.01, "loss": 1.9716, "step": 45585 }, { "epoch": 4.676651620845302, "grad_norm": 0.07544226199388504, "learning_rate": 0.01, "loss": 2.0023, "step": 45588 }, { "epoch": 4.676959376282314, "grad_norm": 0.06043079495429993, "learning_rate": 0.01, "loss": 2.0105, "step": 45591 }, { "epoch": 4.6772671317193275, "grad_norm": 0.06057659909129143, "learning_rate": 0.01, "loss": 1.9932, "step": 45594 }, { "epoch": 4.677574887156339, "grad_norm": 0.08405612409114838, "learning_rate": 0.01, "loss": 1.9765, "step": 45597 }, { "epoch": 4.677882642593352, "grad_norm": 0.048926811665296555, "learning_rate": 0.01, "loss": 1.9958, "step": 45600 }, { "epoch": 4.678190398030365, "grad_norm": 0.05887090042233467, "learning_rate": 0.01, "loss": 1.9904, "step": 45603 }, { "epoch": 4.678498153467378, "grad_norm": 0.04116439446806908, "learning_rate": 0.01, "loss": 2.0309, "step": 45606 }, { "epoch": 4.6788059089043905, "grad_norm": 0.04675902798771858, "learning_rate": 0.01, "loss": 1.9901, "step": 45609 }, { "epoch": 4.679113664341403, "grad_norm": 0.036510296165943146, "learning_rate": 0.01, "loss": 1.986, "step": 45612 }, { "epoch": 4.679421419778416, "grad_norm": 0.06395171582698822, "learning_rate": 0.01, "loss": 1.9632, "step": 45615 }, { "epoch": 4.679729175215429, "grad_norm": 0.09183214604854584, "learning_rate": 0.01, "loss": 1.9844, "step": 45618 }, { "epoch": 4.680036930652442, "grad_norm": 0.03406717628240585, "learning_rate": 0.01, "loss": 1.9838, "step": 45621 }, { "epoch": 4.680344686089454, "grad_norm": 0.11709492653608322, "learning_rate": 0.01, "loss": 1.9687, "step": 45624 }, { "epoch": 4.680652441526467, "grad_norm": 0.054468922317028046, "learning_rate": 0.01, "loss": 1.9965, "step": 45627 }, { "epoch": 4.68096019696348, "grad_norm": 0.07991788536310196, "learning_rate": 0.01, "loss": 2.0017, "step": 45630 }, { "epoch": 4.681267952400493, "grad_norm": 0.046350929886102676, "learning_rate": 0.01, "loss": 1.9873, "step": 45633 }, { "epoch": 4.681575707837505, "grad_norm": 0.09740296006202698, "learning_rate": 0.01, "loss": 1.9912, "step": 45636 }, { "epoch": 4.681883463274518, "grad_norm": 0.0619819350540638, "learning_rate": 0.01, "loss": 2.0134, "step": 45639 }, { "epoch": 4.68219121871153, "grad_norm": 0.04457273334264755, "learning_rate": 0.01, "loss": 1.9918, "step": 45642 }, { "epoch": 4.682498974148543, "grad_norm": 0.07806507498025894, "learning_rate": 0.01, "loss": 1.9996, "step": 45645 }, { "epoch": 4.682806729585556, "grad_norm": 0.07045695930719376, "learning_rate": 0.01, "loss": 1.9979, "step": 45648 }, { "epoch": 4.6831144850225686, "grad_norm": 0.0896361917257309, "learning_rate": 0.01, "loss": 1.988, "step": 45651 }, { "epoch": 4.683422240459581, "grad_norm": 0.04496191442012787, "learning_rate": 0.01, "loss": 2.0188, "step": 45654 }, { "epoch": 4.683729995896594, "grad_norm": 0.041538260877132416, "learning_rate": 0.01, "loss": 2.0357, "step": 45657 }, { "epoch": 4.684037751333607, "grad_norm": 0.07717595249414444, "learning_rate": 0.01, "loss": 1.977, "step": 45660 }, { "epoch": 4.68434550677062, "grad_norm": 0.046603985130786896, "learning_rate": 0.01, "loss": 1.9853, "step": 45663 }, { "epoch": 4.6846532622076325, "grad_norm": 0.0826171487569809, "learning_rate": 0.01, "loss": 1.9716, "step": 45666 }, { "epoch": 4.684961017644645, "grad_norm": 0.09356506168842316, "learning_rate": 0.01, "loss": 1.9932, "step": 45669 }, { "epoch": 4.685268773081658, "grad_norm": 0.04961970075964928, "learning_rate": 0.01, "loss": 1.9813, "step": 45672 }, { "epoch": 4.685576528518671, "grad_norm": 0.08724430203437805, "learning_rate": 0.01, "loss": 2.0075, "step": 45675 }, { "epoch": 4.685884283955684, "grad_norm": 0.093470498919487, "learning_rate": 0.01, "loss": 1.995, "step": 45678 }, { "epoch": 4.6861920393926955, "grad_norm": 0.04148003086447716, "learning_rate": 0.01, "loss": 2.0063, "step": 45681 }, { "epoch": 4.686499794829709, "grad_norm": 0.04073407128453255, "learning_rate": 0.01, "loss": 1.9729, "step": 45684 }, { "epoch": 4.686807550266721, "grad_norm": 0.06185337156057358, "learning_rate": 0.01, "loss": 2.0155, "step": 45687 }, { "epoch": 4.687115305703734, "grad_norm": 0.06323441118001938, "learning_rate": 0.01, "loss": 2.0087, "step": 45690 }, { "epoch": 4.687423061140747, "grad_norm": 0.06550868600606918, "learning_rate": 0.01, "loss": 1.9642, "step": 45693 }, { "epoch": 4.687730816577759, "grad_norm": 0.10702592879533768, "learning_rate": 0.01, "loss": 1.9947, "step": 45696 }, { "epoch": 4.688038572014772, "grad_norm": 0.07939399778842926, "learning_rate": 0.01, "loss": 2.0218, "step": 45699 }, { "epoch": 4.688346327451785, "grad_norm": 0.09626048058271408, "learning_rate": 0.01, "loss": 1.9873, "step": 45702 }, { "epoch": 4.688654082888798, "grad_norm": 0.10478439927101135, "learning_rate": 0.01, "loss": 1.9968, "step": 45705 }, { "epoch": 4.6889618383258105, "grad_norm": 0.06496943533420563, "learning_rate": 0.01, "loss": 2.0107, "step": 45708 }, { "epoch": 4.689269593762823, "grad_norm": 0.060445625334978104, "learning_rate": 0.01, "loss": 1.9732, "step": 45711 }, { "epoch": 4.689577349199836, "grad_norm": 0.050236016511917114, "learning_rate": 0.01, "loss": 2.0052, "step": 45714 }, { "epoch": 4.689885104636849, "grad_norm": 0.07051600515842438, "learning_rate": 0.01, "loss": 1.9856, "step": 45717 }, { "epoch": 4.690192860073862, "grad_norm": 0.14171123504638672, "learning_rate": 0.01, "loss": 1.9885, "step": 45720 }, { "epoch": 4.690500615510874, "grad_norm": 0.04662410169839859, "learning_rate": 0.01, "loss": 1.9872, "step": 45723 }, { "epoch": 4.690808370947886, "grad_norm": 0.04155685007572174, "learning_rate": 0.01, "loss": 1.986, "step": 45726 }, { "epoch": 4.6911161263849, "grad_norm": 0.03519884869456291, "learning_rate": 0.01, "loss": 2.0192, "step": 45729 }, { "epoch": 4.691423881821912, "grad_norm": 0.04860999807715416, "learning_rate": 0.01, "loss": 1.9879, "step": 45732 }, { "epoch": 4.691731637258925, "grad_norm": 0.05452680215239525, "learning_rate": 0.01, "loss": 1.987, "step": 45735 }, { "epoch": 4.692039392695937, "grad_norm": 0.08102243393659592, "learning_rate": 0.01, "loss": 2.0163, "step": 45738 }, { "epoch": 4.69234714813295, "grad_norm": 0.04494756832718849, "learning_rate": 0.01, "loss": 1.9904, "step": 45741 }, { "epoch": 4.692654903569963, "grad_norm": 0.05493905395269394, "learning_rate": 0.01, "loss": 1.9823, "step": 45744 }, { "epoch": 4.692962659006976, "grad_norm": 0.037139080464839935, "learning_rate": 0.01, "loss": 1.9843, "step": 45747 }, { "epoch": 4.6932704144439885, "grad_norm": 0.04770129173994064, "learning_rate": 0.01, "loss": 1.9545, "step": 45750 }, { "epoch": 4.693578169881001, "grad_norm": 0.1386919915676117, "learning_rate": 0.01, "loss": 1.9886, "step": 45753 }, { "epoch": 4.693885925318014, "grad_norm": 0.038857076317071915, "learning_rate": 0.01, "loss": 1.9791, "step": 45756 }, { "epoch": 4.694193680755027, "grad_norm": 0.08089780062437057, "learning_rate": 0.01, "loss": 1.9789, "step": 45759 }, { "epoch": 4.69450143619204, "grad_norm": 0.05583566427230835, "learning_rate": 0.01, "loss": 1.9852, "step": 45762 }, { "epoch": 4.694809191629052, "grad_norm": 0.06422879546880722, "learning_rate": 0.01, "loss": 2.001, "step": 45765 }, { "epoch": 4.695116947066065, "grad_norm": 0.037838250398635864, "learning_rate": 0.01, "loss": 1.9796, "step": 45768 }, { "epoch": 4.695424702503077, "grad_norm": 0.04485329985618591, "learning_rate": 0.01, "loss": 2.0003, "step": 45771 }, { "epoch": 4.695732457940091, "grad_norm": 0.13175508379936218, "learning_rate": 0.01, "loss": 1.9796, "step": 45774 }, { "epoch": 4.696040213377103, "grad_norm": 0.04778193309903145, "learning_rate": 0.01, "loss": 1.9786, "step": 45777 }, { "epoch": 4.696347968814115, "grad_norm": 0.07214421778917313, "learning_rate": 0.01, "loss": 1.9976, "step": 45780 }, { "epoch": 4.696655724251128, "grad_norm": 0.04276994615793228, "learning_rate": 0.01, "loss": 2.0162, "step": 45783 }, { "epoch": 4.696963479688141, "grad_norm": 0.049977049231529236, "learning_rate": 0.01, "loss": 2.0031, "step": 45786 }, { "epoch": 4.697271235125154, "grad_norm": 0.031859464943408966, "learning_rate": 0.01, "loss": 1.9764, "step": 45789 }, { "epoch": 4.6975789905621665, "grad_norm": 0.05537641420960426, "learning_rate": 0.01, "loss": 1.9991, "step": 45792 }, { "epoch": 4.697886745999179, "grad_norm": 0.07403066009283066, "learning_rate": 0.01, "loss": 1.9886, "step": 45795 }, { "epoch": 4.698194501436192, "grad_norm": 0.037336938083171844, "learning_rate": 0.01, "loss": 1.9824, "step": 45798 }, { "epoch": 4.698502256873205, "grad_norm": 0.04439583793282509, "learning_rate": 0.01, "loss": 2.0029, "step": 45801 }, { "epoch": 4.698810012310218, "grad_norm": 0.05648301541805267, "learning_rate": 0.01, "loss": 2.0049, "step": 45804 }, { "epoch": 4.6991177677472304, "grad_norm": 0.07558471709489822, "learning_rate": 0.01, "loss": 1.9963, "step": 45807 }, { "epoch": 4.699425523184243, "grad_norm": 0.04831695184111595, "learning_rate": 0.01, "loss": 1.981, "step": 45810 }, { "epoch": 4.699733278621256, "grad_norm": 0.05198276415467262, "learning_rate": 0.01, "loss": 1.9696, "step": 45813 }, { "epoch": 4.700041034058268, "grad_norm": 0.05408303439617157, "learning_rate": 0.01, "loss": 1.9825, "step": 45816 }, { "epoch": 4.700348789495282, "grad_norm": 0.107550710439682, "learning_rate": 0.01, "loss": 1.9777, "step": 45819 }, { "epoch": 4.7006565449322935, "grad_norm": 0.05953781679272652, "learning_rate": 0.01, "loss": 1.9972, "step": 45822 }, { "epoch": 4.700964300369306, "grad_norm": 0.09626014530658722, "learning_rate": 0.01, "loss": 1.989, "step": 45825 }, { "epoch": 4.701272055806319, "grad_norm": 0.08343878388404846, "learning_rate": 0.01, "loss": 2.0162, "step": 45828 }, { "epoch": 4.701579811243332, "grad_norm": 0.037152498960494995, "learning_rate": 0.01, "loss": 1.9595, "step": 45831 }, { "epoch": 4.701887566680345, "grad_norm": 0.10978374630212784, "learning_rate": 0.01, "loss": 1.9975, "step": 45834 }, { "epoch": 4.702195322117357, "grad_norm": 0.04754413291811943, "learning_rate": 0.01, "loss": 1.997, "step": 45837 }, { "epoch": 4.70250307755437, "grad_norm": 0.04514869302511215, "learning_rate": 0.01, "loss": 1.9843, "step": 45840 }, { "epoch": 4.702810832991383, "grad_norm": 0.08105579018592834, "learning_rate": 0.01, "loss": 1.9678, "step": 45843 }, { "epoch": 4.703118588428396, "grad_norm": 0.032461777329444885, "learning_rate": 0.01, "loss": 1.9961, "step": 45846 }, { "epoch": 4.7034263438654085, "grad_norm": 0.13264963030815125, "learning_rate": 0.01, "loss": 1.9787, "step": 45849 }, { "epoch": 4.703734099302421, "grad_norm": 0.06816978752613068, "learning_rate": 0.01, "loss": 1.9947, "step": 45852 }, { "epoch": 4.704041854739434, "grad_norm": 0.09646057337522507, "learning_rate": 0.01, "loss": 1.9838, "step": 45855 }, { "epoch": 4.704349610176447, "grad_norm": 0.06418583542108536, "learning_rate": 0.01, "loss": 2.0025, "step": 45858 }, { "epoch": 4.704657365613459, "grad_norm": 0.03923073783516884, "learning_rate": 0.01, "loss": 2.008, "step": 45861 }, { "epoch": 4.704965121050472, "grad_norm": 0.03935366868972778, "learning_rate": 0.01, "loss": 1.9926, "step": 45864 }, { "epoch": 4.705272876487484, "grad_norm": 0.0409211590886116, "learning_rate": 0.01, "loss": 2.0004, "step": 45867 }, { "epoch": 4.705580631924497, "grad_norm": 0.02996712550520897, "learning_rate": 0.01, "loss": 1.9865, "step": 45870 }, { "epoch": 4.70588838736151, "grad_norm": 0.03149070590734482, "learning_rate": 0.01, "loss": 1.9687, "step": 45873 }, { "epoch": 4.706196142798523, "grad_norm": 0.11619815230369568, "learning_rate": 0.01, "loss": 2.0, "step": 45876 }, { "epoch": 4.706503898235535, "grad_norm": 0.08959172666072845, "learning_rate": 0.01, "loss": 1.9773, "step": 45879 }, { "epoch": 4.706811653672548, "grad_norm": 0.03898904472589493, "learning_rate": 0.01, "loss": 2.006, "step": 45882 }, { "epoch": 4.707119409109561, "grad_norm": 0.05690839886665344, "learning_rate": 0.01, "loss": 1.995, "step": 45885 }, { "epoch": 4.707427164546574, "grad_norm": 0.08014564961194992, "learning_rate": 0.01, "loss": 1.9871, "step": 45888 }, { "epoch": 4.7077349199835865, "grad_norm": 0.07317539304494858, "learning_rate": 0.01, "loss": 1.9881, "step": 45891 }, { "epoch": 4.708042675420599, "grad_norm": 0.040355369448661804, "learning_rate": 0.01, "loss": 1.9926, "step": 45894 }, { "epoch": 4.708350430857612, "grad_norm": 0.04205753654241562, "learning_rate": 0.01, "loss": 2.0031, "step": 45897 }, { "epoch": 4.708658186294625, "grad_norm": 0.07082203030586243, "learning_rate": 0.01, "loss": 1.9845, "step": 45900 }, { "epoch": 4.708965941731638, "grad_norm": 0.037901826202869415, "learning_rate": 0.01, "loss": 1.9742, "step": 45903 }, { "epoch": 4.7092736971686495, "grad_norm": 0.04728815332055092, "learning_rate": 0.01, "loss": 2.0117, "step": 45906 }, { "epoch": 4.709581452605663, "grad_norm": 0.048279549926519394, "learning_rate": 0.01, "loss": 1.982, "step": 45909 }, { "epoch": 4.709889208042675, "grad_norm": 0.06365431100130081, "learning_rate": 0.01, "loss": 2.0031, "step": 45912 }, { "epoch": 4.710196963479688, "grad_norm": 0.07750722020864487, "learning_rate": 0.01, "loss": 2.0182, "step": 45915 }, { "epoch": 4.710504718916701, "grad_norm": 0.07337852567434311, "learning_rate": 0.01, "loss": 1.9747, "step": 45918 }, { "epoch": 4.710812474353713, "grad_norm": 0.055369194597005844, "learning_rate": 0.01, "loss": 1.9822, "step": 45921 }, { "epoch": 4.711120229790726, "grad_norm": 0.07653316110372543, "learning_rate": 0.01, "loss": 1.9758, "step": 45924 }, { "epoch": 4.711427985227739, "grad_norm": 0.13082928955554962, "learning_rate": 0.01, "loss": 2.0137, "step": 45927 }, { "epoch": 4.711735740664752, "grad_norm": 0.07909176498651505, "learning_rate": 0.01, "loss": 1.9699, "step": 45930 }, { "epoch": 4.7120434961017645, "grad_norm": 0.055445730686187744, "learning_rate": 0.01, "loss": 2.0064, "step": 45933 }, { "epoch": 4.712351251538777, "grad_norm": 0.08689797669649124, "learning_rate": 0.01, "loss": 1.9905, "step": 45936 }, { "epoch": 4.71265900697579, "grad_norm": 0.051975540816783905, "learning_rate": 0.01, "loss": 2.0116, "step": 45939 }, { "epoch": 4.712966762412803, "grad_norm": 0.1104854941368103, "learning_rate": 0.01, "loss": 1.9894, "step": 45942 }, { "epoch": 4.713274517849816, "grad_norm": 0.05927729606628418, "learning_rate": 0.01, "loss": 1.9928, "step": 45945 }, { "epoch": 4.713582273286828, "grad_norm": 0.051379501819610596, "learning_rate": 0.01, "loss": 1.9805, "step": 45948 }, { "epoch": 4.71389002872384, "grad_norm": 0.03545750305056572, "learning_rate": 0.01, "loss": 1.9946, "step": 45951 }, { "epoch": 4.714197784160854, "grad_norm": 0.044772762805223465, "learning_rate": 0.01, "loss": 1.9931, "step": 45954 }, { "epoch": 4.714505539597866, "grad_norm": 0.04172508046030998, "learning_rate": 0.01, "loss": 1.9998, "step": 45957 }, { "epoch": 4.714813295034879, "grad_norm": 0.04818372428417206, "learning_rate": 0.01, "loss": 1.9686, "step": 45960 }, { "epoch": 4.7151210504718915, "grad_norm": 0.10572110861539841, "learning_rate": 0.01, "loss": 1.9645, "step": 45963 }, { "epoch": 4.715428805908904, "grad_norm": 0.04968751594424248, "learning_rate": 0.01, "loss": 1.9699, "step": 45966 }, { "epoch": 4.715736561345917, "grad_norm": 0.08244633674621582, "learning_rate": 0.01, "loss": 1.9836, "step": 45969 }, { "epoch": 4.71604431678293, "grad_norm": 0.0786314308643341, "learning_rate": 0.01, "loss": 2.0048, "step": 45972 }, { "epoch": 4.716352072219943, "grad_norm": 0.05768943205475807, "learning_rate": 0.01, "loss": 1.9754, "step": 45975 }, { "epoch": 4.716659827656955, "grad_norm": 0.04455199092626572, "learning_rate": 0.01, "loss": 1.993, "step": 45978 }, { "epoch": 4.716967583093968, "grad_norm": 0.05535956844687462, "learning_rate": 0.01, "loss": 1.9775, "step": 45981 }, { "epoch": 4.717275338530981, "grad_norm": 0.03555374965071678, "learning_rate": 0.01, "loss": 1.9867, "step": 45984 }, { "epoch": 4.717583093967994, "grad_norm": 0.06458409130573273, "learning_rate": 0.01, "loss": 2.0006, "step": 45987 }, { "epoch": 4.7178908494050065, "grad_norm": 0.05734160915017128, "learning_rate": 0.01, "loss": 1.9711, "step": 45990 }, { "epoch": 4.718198604842019, "grad_norm": 0.06240745261311531, "learning_rate": 0.01, "loss": 2.0156, "step": 45993 }, { "epoch": 4.718506360279031, "grad_norm": 0.08987503498792648, "learning_rate": 0.01, "loss": 2.0033, "step": 45996 }, { "epoch": 4.718814115716045, "grad_norm": 0.06078198924660683, "learning_rate": 0.01, "loss": 1.9953, "step": 45999 }, { "epoch": 4.719121871153057, "grad_norm": 0.12024971842765808, "learning_rate": 0.01, "loss": 2.006, "step": 46002 }, { "epoch": 4.7194296265900695, "grad_norm": 0.06620301306247711, "learning_rate": 0.01, "loss": 1.9997, "step": 46005 }, { "epoch": 4.719737382027082, "grad_norm": 0.0450078584253788, "learning_rate": 0.01, "loss": 1.9906, "step": 46008 }, { "epoch": 4.720045137464095, "grad_norm": 0.04509684816002846, "learning_rate": 0.01, "loss": 2.0145, "step": 46011 }, { "epoch": 4.720352892901108, "grad_norm": 0.06029663607478142, "learning_rate": 0.01, "loss": 1.9678, "step": 46014 }, { "epoch": 4.720660648338121, "grad_norm": 0.06620631366968155, "learning_rate": 0.01, "loss": 2.0016, "step": 46017 }, { "epoch": 4.720968403775133, "grad_norm": 0.09920935332775116, "learning_rate": 0.01, "loss": 1.9842, "step": 46020 }, { "epoch": 4.721276159212146, "grad_norm": 0.04000372067093849, "learning_rate": 0.01, "loss": 1.9904, "step": 46023 }, { "epoch": 4.721583914649159, "grad_norm": 0.10262927412986755, "learning_rate": 0.01, "loss": 1.9864, "step": 46026 }, { "epoch": 4.721891670086172, "grad_norm": 0.07029026746749878, "learning_rate": 0.01, "loss": 2.0492, "step": 46029 }, { "epoch": 4.7221994255231845, "grad_norm": 0.03733726218342781, "learning_rate": 0.01, "loss": 1.9715, "step": 46032 }, { "epoch": 4.722507180960197, "grad_norm": 0.16273632645606995, "learning_rate": 0.01, "loss": 2.0146, "step": 46035 }, { "epoch": 4.72281493639721, "grad_norm": 0.052774641662836075, "learning_rate": 0.01, "loss": 1.9727, "step": 46038 }, { "epoch": 4.723122691834222, "grad_norm": 0.04785846173763275, "learning_rate": 0.01, "loss": 1.9824, "step": 46041 }, { "epoch": 4.723430447271236, "grad_norm": 0.046762675046920776, "learning_rate": 0.01, "loss": 2.01, "step": 46044 }, { "epoch": 4.7237382027082475, "grad_norm": 0.055082205682992935, "learning_rate": 0.01, "loss": 1.9893, "step": 46047 }, { "epoch": 4.72404595814526, "grad_norm": 0.12330979853868484, "learning_rate": 0.01, "loss": 2.006, "step": 46050 }, { "epoch": 4.724353713582273, "grad_norm": 0.0426286906003952, "learning_rate": 0.01, "loss": 2.0191, "step": 46053 }, { "epoch": 4.724661469019286, "grad_norm": 0.09888054430484772, "learning_rate": 0.01, "loss": 2.0043, "step": 46056 }, { "epoch": 4.724969224456299, "grad_norm": 0.09335997700691223, "learning_rate": 0.01, "loss": 1.9991, "step": 46059 }, { "epoch": 4.725276979893311, "grad_norm": 0.06959028542041779, "learning_rate": 0.01, "loss": 1.9583, "step": 46062 }, { "epoch": 4.725584735330324, "grad_norm": 0.06538756936788559, "learning_rate": 0.01, "loss": 1.9642, "step": 46065 }, { "epoch": 4.725892490767337, "grad_norm": 0.0652519017457962, "learning_rate": 0.01, "loss": 1.9783, "step": 46068 }, { "epoch": 4.72620024620435, "grad_norm": 0.049920666962862015, "learning_rate": 0.01, "loss": 1.9916, "step": 46071 }, { "epoch": 4.7265080016413625, "grad_norm": 0.0796656534075737, "learning_rate": 0.01, "loss": 1.9637, "step": 46074 }, { "epoch": 4.726815757078375, "grad_norm": 0.08785120397806168, "learning_rate": 0.01, "loss": 1.9645, "step": 46077 }, { "epoch": 4.727123512515388, "grad_norm": 0.05050429701805115, "learning_rate": 0.01, "loss": 2.0051, "step": 46080 }, { "epoch": 4.727431267952401, "grad_norm": 0.039991263300180435, "learning_rate": 0.01, "loss": 2.0019, "step": 46083 }, { "epoch": 4.727739023389413, "grad_norm": 0.04895581305027008, "learning_rate": 0.01, "loss": 1.9998, "step": 46086 }, { "epoch": 4.728046778826426, "grad_norm": 0.13111530244350433, "learning_rate": 0.01, "loss": 2.0091, "step": 46089 }, { "epoch": 4.728354534263438, "grad_norm": 0.07493706792593002, "learning_rate": 0.01, "loss": 1.9753, "step": 46092 }, { "epoch": 4.728662289700451, "grad_norm": 0.06003058701753616, "learning_rate": 0.01, "loss": 1.9846, "step": 46095 }, { "epoch": 4.728970045137464, "grad_norm": 0.05581682547926903, "learning_rate": 0.01, "loss": 1.986, "step": 46098 }, { "epoch": 4.729277800574477, "grad_norm": 0.0741606056690216, "learning_rate": 0.01, "loss": 1.9859, "step": 46101 }, { "epoch": 4.7295855560114894, "grad_norm": 0.05753305181860924, "learning_rate": 0.01, "loss": 1.9926, "step": 46104 }, { "epoch": 4.729893311448502, "grad_norm": 0.06790949404239655, "learning_rate": 0.01, "loss": 1.9728, "step": 46107 }, { "epoch": 4.730201066885515, "grad_norm": 0.05599404126405716, "learning_rate": 0.01, "loss": 1.9877, "step": 46110 }, { "epoch": 4.730508822322528, "grad_norm": 0.06556524336338043, "learning_rate": 0.01, "loss": 1.9785, "step": 46113 }, { "epoch": 4.730816577759541, "grad_norm": 0.043739136308431625, "learning_rate": 0.01, "loss": 1.9924, "step": 46116 }, { "epoch": 4.731124333196553, "grad_norm": 0.05598102882504463, "learning_rate": 0.01, "loss": 1.9934, "step": 46119 }, { "epoch": 4.731432088633566, "grad_norm": 0.0398273728787899, "learning_rate": 0.01, "loss": 1.9827, "step": 46122 }, { "epoch": 4.731739844070579, "grad_norm": 0.06281284987926483, "learning_rate": 0.01, "loss": 1.9762, "step": 46125 }, { "epoch": 4.732047599507592, "grad_norm": 0.0575578436255455, "learning_rate": 0.01, "loss": 1.9939, "step": 46128 }, { "epoch": 4.732355354944604, "grad_norm": 0.044283825904130936, "learning_rate": 0.01, "loss": 2.0001, "step": 46131 }, { "epoch": 4.732663110381616, "grad_norm": 0.11150834709405899, "learning_rate": 0.01, "loss": 1.9773, "step": 46134 }, { "epoch": 4.732970865818629, "grad_norm": 0.11505670100450516, "learning_rate": 0.01, "loss": 1.9894, "step": 46137 }, { "epoch": 4.733278621255642, "grad_norm": 0.10406883805990219, "learning_rate": 0.01, "loss": 1.9752, "step": 46140 }, { "epoch": 4.733586376692655, "grad_norm": 0.07568677514791489, "learning_rate": 0.01, "loss": 1.9658, "step": 46143 }, { "epoch": 4.7338941321296675, "grad_norm": 0.05774052441120148, "learning_rate": 0.01, "loss": 1.9888, "step": 46146 }, { "epoch": 4.73420188756668, "grad_norm": 0.05138164013624191, "learning_rate": 0.01, "loss": 1.9697, "step": 46149 }, { "epoch": 4.734509643003693, "grad_norm": 0.09138276427984238, "learning_rate": 0.01, "loss": 1.9881, "step": 46152 }, { "epoch": 4.734817398440706, "grad_norm": 0.04819070175290108, "learning_rate": 0.01, "loss": 1.9934, "step": 46155 }, { "epoch": 4.735125153877719, "grad_norm": 0.06231243535876274, "learning_rate": 0.01, "loss": 2.0009, "step": 46158 }, { "epoch": 4.735432909314731, "grad_norm": 0.05212223157286644, "learning_rate": 0.01, "loss": 1.9759, "step": 46161 }, { "epoch": 4.735740664751744, "grad_norm": 0.05681807920336723, "learning_rate": 0.01, "loss": 1.9808, "step": 46164 }, { "epoch": 4.736048420188757, "grad_norm": 0.039965178817510605, "learning_rate": 0.01, "loss": 1.9881, "step": 46167 }, { "epoch": 4.73635617562577, "grad_norm": 0.0815226286649704, "learning_rate": 0.01, "loss": 1.9794, "step": 46170 }, { "epoch": 4.7366639310627825, "grad_norm": 0.121616430580616, "learning_rate": 0.01, "loss": 1.9979, "step": 46173 }, { "epoch": 4.736971686499794, "grad_norm": 0.06908087432384491, "learning_rate": 0.01, "loss": 2.0142, "step": 46176 }, { "epoch": 4.737279441936807, "grad_norm": 0.043230995535850525, "learning_rate": 0.01, "loss": 1.995, "step": 46179 }, { "epoch": 4.73758719737382, "grad_norm": 0.10230391472578049, "learning_rate": 0.01, "loss": 1.967, "step": 46182 }, { "epoch": 4.737894952810833, "grad_norm": 0.06521748751401901, "learning_rate": 0.01, "loss": 1.9531, "step": 46185 }, { "epoch": 4.7382027082478455, "grad_norm": 0.052706390619277954, "learning_rate": 0.01, "loss": 1.9886, "step": 46188 }, { "epoch": 4.738510463684858, "grad_norm": 0.0332234725356102, "learning_rate": 0.01, "loss": 2.0046, "step": 46191 }, { "epoch": 4.738818219121871, "grad_norm": 0.061153993010520935, "learning_rate": 0.01, "loss": 1.9983, "step": 46194 }, { "epoch": 4.739125974558884, "grad_norm": 0.12053243815898895, "learning_rate": 0.01, "loss": 1.997, "step": 46197 }, { "epoch": 4.739433729995897, "grad_norm": 0.09798255562782288, "learning_rate": 0.01, "loss": 2.01, "step": 46200 }, { "epoch": 4.739741485432909, "grad_norm": 0.05450673773884773, "learning_rate": 0.01, "loss": 1.9899, "step": 46203 }, { "epoch": 4.740049240869922, "grad_norm": 0.03913121297955513, "learning_rate": 0.01, "loss": 1.9764, "step": 46206 }, { "epoch": 4.740356996306935, "grad_norm": 0.036051902920007706, "learning_rate": 0.01, "loss": 1.9876, "step": 46209 }, { "epoch": 4.740664751743948, "grad_norm": 0.04099172353744507, "learning_rate": 0.01, "loss": 1.9706, "step": 46212 }, { "epoch": 4.7409725071809605, "grad_norm": 0.03864535689353943, "learning_rate": 0.01, "loss": 1.981, "step": 46215 }, { "epoch": 4.741280262617973, "grad_norm": 0.055634405463933945, "learning_rate": 0.01, "loss": 2.0069, "step": 46218 }, { "epoch": 4.741588018054985, "grad_norm": 0.06536100059747696, "learning_rate": 0.01, "loss": 2.0133, "step": 46221 }, { "epoch": 4.741895773491998, "grad_norm": 0.05309274420142174, "learning_rate": 0.01, "loss": 1.9951, "step": 46224 }, { "epoch": 4.742203528929011, "grad_norm": 0.09953348338603973, "learning_rate": 0.01, "loss": 2.0127, "step": 46227 }, { "epoch": 4.7425112843660235, "grad_norm": 0.04084203764796257, "learning_rate": 0.01, "loss": 1.982, "step": 46230 }, { "epoch": 4.742819039803036, "grad_norm": 0.12028990685939789, "learning_rate": 0.01, "loss": 2.0051, "step": 46233 }, { "epoch": 4.743126795240049, "grad_norm": 0.04335862770676613, "learning_rate": 0.01, "loss": 1.9552, "step": 46236 }, { "epoch": 4.743434550677062, "grad_norm": 0.06697961688041687, "learning_rate": 0.01, "loss": 1.9944, "step": 46239 }, { "epoch": 4.743742306114075, "grad_norm": 0.047941289842128754, "learning_rate": 0.01, "loss": 1.9999, "step": 46242 }, { "epoch": 4.744050061551087, "grad_norm": 0.03778136894106865, "learning_rate": 0.01, "loss": 2.0118, "step": 46245 }, { "epoch": 4.7443578169881, "grad_norm": 0.04735583811998367, "learning_rate": 0.01, "loss": 1.9793, "step": 46248 }, { "epoch": 4.744665572425113, "grad_norm": 0.04471844062209129, "learning_rate": 0.01, "loss": 1.9721, "step": 46251 }, { "epoch": 4.744973327862126, "grad_norm": 0.03621865063905716, "learning_rate": 0.01, "loss": 1.9704, "step": 46254 }, { "epoch": 4.745281083299139, "grad_norm": 0.12407182902097702, "learning_rate": 0.01, "loss": 1.9829, "step": 46257 }, { "epoch": 4.745588838736151, "grad_norm": 0.05654411017894745, "learning_rate": 0.01, "loss": 2.0199, "step": 46260 }, { "epoch": 4.745896594173164, "grad_norm": 0.08955933898687363, "learning_rate": 0.01, "loss": 2.0169, "step": 46263 }, { "epoch": 4.746204349610176, "grad_norm": 0.08366284519433975, "learning_rate": 0.01, "loss": 1.9695, "step": 46266 }, { "epoch": 4.746512105047189, "grad_norm": 0.056272462010383606, "learning_rate": 0.01, "loss": 1.9831, "step": 46269 }, { "epoch": 4.746819860484202, "grad_norm": 0.05544830113649368, "learning_rate": 0.01, "loss": 1.9909, "step": 46272 }, { "epoch": 4.747127615921214, "grad_norm": 0.04018571227788925, "learning_rate": 0.01, "loss": 1.9737, "step": 46275 }, { "epoch": 4.747435371358227, "grad_norm": 0.07131030410528183, "learning_rate": 0.01, "loss": 2.0068, "step": 46278 }, { "epoch": 4.74774312679524, "grad_norm": 0.03746544569730759, "learning_rate": 0.01, "loss": 1.9749, "step": 46281 }, { "epoch": 4.748050882232253, "grad_norm": 0.09419632703065872, "learning_rate": 0.01, "loss": 1.9981, "step": 46284 }, { "epoch": 4.7483586376692655, "grad_norm": 0.05771811679005623, "learning_rate": 0.01, "loss": 2.0154, "step": 46287 }, { "epoch": 4.748666393106278, "grad_norm": 0.0985054075717926, "learning_rate": 0.01, "loss": 2.0356, "step": 46290 }, { "epoch": 4.748974148543291, "grad_norm": 0.06566286832094193, "learning_rate": 0.01, "loss": 2.0037, "step": 46293 }, { "epoch": 4.749281903980304, "grad_norm": 0.043330125510692596, "learning_rate": 0.01, "loss": 1.9883, "step": 46296 }, { "epoch": 4.749589659417317, "grad_norm": 0.05344987288117409, "learning_rate": 0.01, "loss": 1.9871, "step": 46299 }, { "epoch": 4.749897414854329, "grad_norm": 0.032919514924287796, "learning_rate": 0.01, "loss": 1.9709, "step": 46302 }, { "epoch": 4.750205170291342, "grad_norm": 0.08051367104053497, "learning_rate": 0.01, "loss": 1.9671, "step": 46305 }, { "epoch": 4.750512925728355, "grad_norm": 0.09481201320886612, "learning_rate": 0.01, "loss": 1.978, "step": 46308 }, { "epoch": 4.750820681165367, "grad_norm": 0.1356784701347351, "learning_rate": 0.01, "loss": 1.9896, "step": 46311 }, { "epoch": 4.75112843660238, "grad_norm": 0.12291105091571808, "learning_rate": 0.01, "loss": 2.0064, "step": 46314 }, { "epoch": 4.751436192039392, "grad_norm": 0.050581369549036026, "learning_rate": 0.01, "loss": 1.9901, "step": 46317 }, { "epoch": 4.751743947476405, "grad_norm": 0.08423294126987457, "learning_rate": 0.01, "loss": 1.9972, "step": 46320 }, { "epoch": 4.752051702913418, "grad_norm": 0.0642426609992981, "learning_rate": 0.01, "loss": 2.0018, "step": 46323 }, { "epoch": 4.752359458350431, "grad_norm": 0.042584337294101715, "learning_rate": 0.01, "loss": 2.0238, "step": 46326 }, { "epoch": 4.7526672137874435, "grad_norm": 0.04110237583518028, "learning_rate": 0.01, "loss": 2.0071, "step": 46329 }, { "epoch": 4.752974969224456, "grad_norm": 0.034602370113134384, "learning_rate": 0.01, "loss": 2.0079, "step": 46332 }, { "epoch": 4.753282724661469, "grad_norm": 0.05274072289466858, "learning_rate": 0.01, "loss": 1.9606, "step": 46335 }, { "epoch": 4.753590480098482, "grad_norm": 0.09157788008451462, "learning_rate": 0.01, "loss": 1.9881, "step": 46338 }, { "epoch": 4.753898235535495, "grad_norm": 0.04491696506738663, "learning_rate": 0.01, "loss": 1.9808, "step": 46341 }, { "epoch": 4.754205990972507, "grad_norm": 0.044836509972810745, "learning_rate": 0.01, "loss": 2.0027, "step": 46344 }, { "epoch": 4.75451374640952, "grad_norm": 0.08086413145065308, "learning_rate": 0.01, "loss": 2.0052, "step": 46347 }, { "epoch": 4.754821501846533, "grad_norm": 0.04024931788444519, "learning_rate": 0.01, "loss": 1.9634, "step": 46350 }, { "epoch": 4.755129257283546, "grad_norm": 0.053472552448511124, "learning_rate": 0.01, "loss": 1.9677, "step": 46353 }, { "epoch": 4.755437012720558, "grad_norm": 0.06579295545816422, "learning_rate": 0.01, "loss": 1.9847, "step": 46356 }, { "epoch": 4.75574476815757, "grad_norm": 0.07256436347961426, "learning_rate": 0.01, "loss": 1.9771, "step": 46359 }, { "epoch": 4.756052523594583, "grad_norm": 0.1356937736272812, "learning_rate": 0.01, "loss": 1.9961, "step": 46362 }, { "epoch": 4.756360279031596, "grad_norm": 0.06238779425621033, "learning_rate": 0.01, "loss": 1.995, "step": 46365 }, { "epoch": 4.756668034468609, "grad_norm": 0.03617888316512108, "learning_rate": 0.01, "loss": 2.0118, "step": 46368 }, { "epoch": 4.7569757899056215, "grad_norm": 0.039169710129499435, "learning_rate": 0.01, "loss": 2.0018, "step": 46371 }, { "epoch": 4.757283545342634, "grad_norm": 0.03998740762472153, "learning_rate": 0.01, "loss": 1.989, "step": 46374 }, { "epoch": 4.757591300779647, "grad_norm": 0.09922688454389572, "learning_rate": 0.01, "loss": 2.0139, "step": 46377 }, { "epoch": 4.75789905621666, "grad_norm": 0.07870779931545258, "learning_rate": 0.01, "loss": 1.9868, "step": 46380 }, { "epoch": 4.758206811653673, "grad_norm": 0.09917730838060379, "learning_rate": 0.01, "loss": 2.0119, "step": 46383 }, { "epoch": 4.758514567090685, "grad_norm": 0.14981189370155334, "learning_rate": 0.01, "loss": 1.9983, "step": 46386 }, { "epoch": 4.758822322527698, "grad_norm": 0.08356380462646484, "learning_rate": 0.01, "loss": 1.984, "step": 46389 }, { "epoch": 4.759130077964711, "grad_norm": 0.05980211868882179, "learning_rate": 0.01, "loss": 1.975, "step": 46392 }, { "epoch": 4.759437833401724, "grad_norm": 0.05891801416873932, "learning_rate": 0.01, "loss": 1.9677, "step": 46395 }, { "epoch": 4.7597455888387366, "grad_norm": 0.08687376976013184, "learning_rate": 0.01, "loss": 2.0062, "step": 46398 }, { "epoch": 4.760053344275748, "grad_norm": 0.03482063114643097, "learning_rate": 0.01, "loss": 1.9766, "step": 46401 }, { "epoch": 4.760361099712761, "grad_norm": 0.04206204041838646, "learning_rate": 0.01, "loss": 1.9951, "step": 46404 }, { "epoch": 4.760668855149774, "grad_norm": 0.051311641931533813, "learning_rate": 0.01, "loss": 1.9887, "step": 46407 }, { "epoch": 4.760976610586787, "grad_norm": 0.06586962938308716, "learning_rate": 0.01, "loss": 2.0033, "step": 46410 }, { "epoch": 4.7612843660238, "grad_norm": 0.058405712246894836, "learning_rate": 0.01, "loss": 1.9814, "step": 46413 }, { "epoch": 4.761592121460812, "grad_norm": 0.060229748487472534, "learning_rate": 0.01, "loss": 2.0056, "step": 46416 }, { "epoch": 4.761899876897825, "grad_norm": 0.08985790610313416, "learning_rate": 0.01, "loss": 2.0112, "step": 46419 }, { "epoch": 4.762207632334838, "grad_norm": 0.13349880278110504, "learning_rate": 0.01, "loss": 1.9881, "step": 46422 }, { "epoch": 4.762515387771851, "grad_norm": 0.05167052149772644, "learning_rate": 0.01, "loss": 1.9791, "step": 46425 }, { "epoch": 4.7628231432088635, "grad_norm": 0.03675553575158119, "learning_rate": 0.01, "loss": 2.0275, "step": 46428 }, { "epoch": 4.763130898645876, "grad_norm": 0.06192685663700104, "learning_rate": 0.01, "loss": 2.0186, "step": 46431 }, { "epoch": 4.763438654082889, "grad_norm": 0.06537027657032013, "learning_rate": 0.01, "loss": 1.9669, "step": 46434 }, { "epoch": 4.763746409519902, "grad_norm": 0.07077626138925552, "learning_rate": 0.01, "loss": 1.9927, "step": 46437 }, { "epoch": 4.764054164956915, "grad_norm": 0.0640728622674942, "learning_rate": 0.01, "loss": 1.9934, "step": 46440 }, { "epoch": 4.764361920393927, "grad_norm": 0.04746294394135475, "learning_rate": 0.01, "loss": 1.9545, "step": 46443 }, { "epoch": 4.764669675830939, "grad_norm": 0.04115734621882439, "learning_rate": 0.01, "loss": 1.9552, "step": 46446 }, { "epoch": 4.764977431267952, "grad_norm": 0.11647975444793701, "learning_rate": 0.01, "loss": 1.9621, "step": 46449 }, { "epoch": 4.765285186704965, "grad_norm": 0.0611908994615078, "learning_rate": 0.01, "loss": 1.9977, "step": 46452 }, { "epoch": 4.765592942141978, "grad_norm": 0.11524607241153717, "learning_rate": 0.01, "loss": 2.0042, "step": 46455 }, { "epoch": 4.76590069757899, "grad_norm": 0.10703855752944946, "learning_rate": 0.01, "loss": 1.9887, "step": 46458 }, { "epoch": 4.766208453016003, "grad_norm": 0.052681829780340195, "learning_rate": 0.01, "loss": 1.9965, "step": 46461 }, { "epoch": 4.766516208453016, "grad_norm": 0.04313669726252556, "learning_rate": 0.01, "loss": 1.9921, "step": 46464 }, { "epoch": 4.766823963890029, "grad_norm": 0.06806657463312149, "learning_rate": 0.01, "loss": 1.988, "step": 46467 }, { "epoch": 4.7671317193270415, "grad_norm": 0.07249857485294342, "learning_rate": 0.01, "loss": 1.9802, "step": 46470 }, { "epoch": 4.767439474764054, "grad_norm": 0.07816947996616364, "learning_rate": 0.01, "loss": 1.9951, "step": 46473 }, { "epoch": 4.767747230201067, "grad_norm": 0.05120289698243141, "learning_rate": 0.01, "loss": 1.9786, "step": 46476 }, { "epoch": 4.76805498563808, "grad_norm": 0.09938623756170273, "learning_rate": 0.01, "loss": 2.0115, "step": 46479 }, { "epoch": 4.768362741075093, "grad_norm": 0.060353558510541916, "learning_rate": 0.01, "loss": 1.989, "step": 46482 }, { "epoch": 4.768670496512105, "grad_norm": 0.10951244086027145, "learning_rate": 0.01, "loss": 2.0108, "step": 46485 }, { "epoch": 4.768978251949118, "grad_norm": 0.04746172949671745, "learning_rate": 0.01, "loss": 2.0104, "step": 46488 }, { "epoch": 4.76928600738613, "grad_norm": 0.07137439399957657, "learning_rate": 0.01, "loss": 1.9842, "step": 46491 }, { "epoch": 4.769593762823143, "grad_norm": 0.13574503362178802, "learning_rate": 0.01, "loss": 1.9792, "step": 46494 }, { "epoch": 4.769901518260156, "grad_norm": 0.047539595514535904, "learning_rate": 0.01, "loss": 1.97, "step": 46497 }, { "epoch": 4.770209273697168, "grad_norm": 0.0612449012696743, "learning_rate": 0.01, "loss": 1.9959, "step": 46500 }, { "epoch": 4.770517029134181, "grad_norm": 0.05257963016629219, "learning_rate": 0.01, "loss": 1.9983, "step": 46503 }, { "epoch": 4.770824784571194, "grad_norm": 0.05487014353275299, "learning_rate": 0.01, "loss": 1.9881, "step": 46506 }, { "epoch": 4.771132540008207, "grad_norm": 0.03313500061631203, "learning_rate": 0.01, "loss": 1.9812, "step": 46509 }, { "epoch": 4.7714402954452195, "grad_norm": 0.04244723170995712, "learning_rate": 0.01, "loss": 1.9694, "step": 46512 }, { "epoch": 4.771748050882232, "grad_norm": 0.06064971908926964, "learning_rate": 0.01, "loss": 1.999, "step": 46515 }, { "epoch": 4.772055806319245, "grad_norm": 0.04784930869936943, "learning_rate": 0.01, "loss": 1.9868, "step": 46518 }, { "epoch": 4.772363561756258, "grad_norm": 0.04266538843512535, "learning_rate": 0.01, "loss": 1.9772, "step": 46521 }, { "epoch": 4.772671317193271, "grad_norm": 0.09486623853445053, "learning_rate": 0.01, "loss": 1.9836, "step": 46524 }, { "epoch": 4.772979072630283, "grad_norm": 0.05113883316516876, "learning_rate": 0.01, "loss": 1.9841, "step": 46527 }, { "epoch": 4.773286828067296, "grad_norm": 0.11440654844045639, "learning_rate": 0.01, "loss": 2.0118, "step": 46530 }, { "epoch": 4.773594583504309, "grad_norm": 0.07778608053922653, "learning_rate": 0.01, "loss": 1.9965, "step": 46533 }, { "epoch": 4.773902338941321, "grad_norm": 0.042305488139390945, "learning_rate": 0.01, "loss": 1.9751, "step": 46536 }, { "epoch": 4.774210094378334, "grad_norm": 0.06056047976016998, "learning_rate": 0.01, "loss": 1.9808, "step": 46539 }, { "epoch": 4.774517849815346, "grad_norm": 0.03576405718922615, "learning_rate": 0.01, "loss": 1.9825, "step": 46542 }, { "epoch": 4.774825605252359, "grad_norm": 0.04558248445391655, "learning_rate": 0.01, "loss": 1.9967, "step": 46545 }, { "epoch": 4.775133360689372, "grad_norm": 0.04870672523975372, "learning_rate": 0.01, "loss": 1.9775, "step": 46548 }, { "epoch": 4.775441116126385, "grad_norm": 0.04591087996959686, "learning_rate": 0.01, "loss": 2.0104, "step": 46551 }, { "epoch": 4.775748871563398, "grad_norm": 0.08307138830423355, "learning_rate": 0.01, "loss": 1.9643, "step": 46554 }, { "epoch": 4.77605662700041, "grad_norm": 0.0701267346739769, "learning_rate": 0.01, "loss": 2.0051, "step": 46557 }, { "epoch": 4.776364382437423, "grad_norm": 0.13456737995147705, "learning_rate": 0.01, "loss": 1.9958, "step": 46560 }, { "epoch": 4.776672137874436, "grad_norm": 0.06605277210474014, "learning_rate": 0.01, "loss": 1.984, "step": 46563 }, { "epoch": 4.776979893311449, "grad_norm": 0.05360471457242966, "learning_rate": 0.01, "loss": 2.0167, "step": 46566 }, { "epoch": 4.7772876487484615, "grad_norm": 0.04942501336336136, "learning_rate": 0.01, "loss": 1.9641, "step": 46569 }, { "epoch": 4.777595404185474, "grad_norm": 0.09876788407564163, "learning_rate": 0.01, "loss": 2.0284, "step": 46572 }, { "epoch": 4.777903159622487, "grad_norm": 0.07075868546962738, "learning_rate": 0.01, "loss": 1.9783, "step": 46575 }, { "epoch": 4.7782109150595, "grad_norm": 0.06624365597963333, "learning_rate": 0.01, "loss": 1.976, "step": 46578 }, { "epoch": 4.778518670496512, "grad_norm": 0.04903232306241989, "learning_rate": 0.01, "loss": 1.9583, "step": 46581 }, { "epoch": 4.7788264259335245, "grad_norm": 0.06817879527807236, "learning_rate": 0.01, "loss": 1.9827, "step": 46584 }, { "epoch": 4.779134181370537, "grad_norm": 0.10077866911888123, "learning_rate": 0.01, "loss": 2.0064, "step": 46587 }, { "epoch": 4.77944193680755, "grad_norm": 0.05322563648223877, "learning_rate": 0.01, "loss": 1.9933, "step": 46590 }, { "epoch": 4.779749692244563, "grad_norm": 0.09559939801692963, "learning_rate": 0.01, "loss": 1.9541, "step": 46593 }, { "epoch": 4.780057447681576, "grad_norm": 0.06942281872034073, "learning_rate": 0.01, "loss": 2.0074, "step": 46596 }, { "epoch": 4.780365203118588, "grad_norm": 0.1250401884317398, "learning_rate": 0.01, "loss": 1.9913, "step": 46599 }, { "epoch": 4.780672958555601, "grad_norm": 0.09563729166984558, "learning_rate": 0.01, "loss": 1.9964, "step": 46602 }, { "epoch": 4.780980713992614, "grad_norm": 0.08675848692655563, "learning_rate": 0.01, "loss": 2.0147, "step": 46605 }, { "epoch": 4.781288469429627, "grad_norm": 0.0791327953338623, "learning_rate": 0.01, "loss": 1.9661, "step": 46608 }, { "epoch": 4.7815962248666395, "grad_norm": 0.10748487710952759, "learning_rate": 0.01, "loss": 2.0146, "step": 46611 }, { "epoch": 4.781903980303652, "grad_norm": 0.056079965084791183, "learning_rate": 0.01, "loss": 1.9769, "step": 46614 }, { "epoch": 4.782211735740665, "grad_norm": 0.04614640399813652, "learning_rate": 0.01, "loss": 2.0216, "step": 46617 }, { "epoch": 4.782519491177678, "grad_norm": 0.07398012280464172, "learning_rate": 0.01, "loss": 2.0094, "step": 46620 }, { "epoch": 4.782827246614691, "grad_norm": 0.07945587486028671, "learning_rate": 0.01, "loss": 1.9818, "step": 46623 }, { "epoch": 4.7831350020517025, "grad_norm": 0.040440626442432404, "learning_rate": 0.01, "loss": 1.9681, "step": 46626 }, { "epoch": 4.783442757488715, "grad_norm": 0.1381184458732605, "learning_rate": 0.01, "loss": 1.9851, "step": 46629 }, { "epoch": 4.783750512925728, "grad_norm": 0.06996659189462662, "learning_rate": 0.01, "loss": 2.0096, "step": 46632 }, { "epoch": 4.784058268362741, "grad_norm": 0.03792927786707878, "learning_rate": 0.01, "loss": 2.0041, "step": 46635 }, { "epoch": 4.784366023799754, "grad_norm": 0.03911750763654709, "learning_rate": 0.01, "loss": 2.0102, "step": 46638 }, { "epoch": 4.784673779236766, "grad_norm": 0.04588170349597931, "learning_rate": 0.01, "loss": 1.9972, "step": 46641 }, { "epoch": 4.784981534673779, "grad_norm": 0.10618551820516586, "learning_rate": 0.01, "loss": 1.9863, "step": 46644 }, { "epoch": 4.785289290110792, "grad_norm": 0.07402194291353226, "learning_rate": 0.01, "loss": 1.9799, "step": 46647 }, { "epoch": 4.785597045547805, "grad_norm": 0.05611973628401756, "learning_rate": 0.01, "loss": 1.9618, "step": 46650 }, { "epoch": 4.7859048009848175, "grad_norm": 0.062334805727005005, "learning_rate": 0.01, "loss": 1.9858, "step": 46653 }, { "epoch": 4.78621255642183, "grad_norm": 0.05972002074122429, "learning_rate": 0.01, "loss": 1.9957, "step": 46656 }, { "epoch": 4.786520311858843, "grad_norm": 0.10354367643594742, "learning_rate": 0.01, "loss": 1.9983, "step": 46659 }, { "epoch": 4.786828067295856, "grad_norm": 0.07653992623090744, "learning_rate": 0.01, "loss": 1.982, "step": 46662 }, { "epoch": 4.787135822732869, "grad_norm": 0.0790034830570221, "learning_rate": 0.01, "loss": 2.0122, "step": 46665 }, { "epoch": 4.787443578169881, "grad_norm": 0.11524873226881027, "learning_rate": 0.01, "loss": 1.9924, "step": 46668 }, { "epoch": 4.787751333606893, "grad_norm": 0.11828222125768661, "learning_rate": 0.01, "loss": 1.987, "step": 46671 }, { "epoch": 4.788059089043906, "grad_norm": 0.06129033491015434, "learning_rate": 0.01, "loss": 1.9941, "step": 46674 }, { "epoch": 4.788366844480919, "grad_norm": 0.060850802809000015, "learning_rate": 0.01, "loss": 1.9997, "step": 46677 }, { "epoch": 4.788674599917932, "grad_norm": 0.0488838255405426, "learning_rate": 0.01, "loss": 1.9862, "step": 46680 }, { "epoch": 4.788982355354944, "grad_norm": 0.03288913145661354, "learning_rate": 0.01, "loss": 2.0044, "step": 46683 }, { "epoch": 4.789290110791957, "grad_norm": 0.038961250334978104, "learning_rate": 0.01, "loss": 2.0041, "step": 46686 }, { "epoch": 4.78959786622897, "grad_norm": 0.10202857851982117, "learning_rate": 0.01, "loss": 2.019, "step": 46689 }, { "epoch": 4.789905621665983, "grad_norm": 0.05087895691394806, "learning_rate": 0.01, "loss": 1.9973, "step": 46692 }, { "epoch": 4.7902133771029956, "grad_norm": 0.07954477518796921, "learning_rate": 0.01, "loss": 1.9857, "step": 46695 }, { "epoch": 4.790521132540008, "grad_norm": 0.11283519119024277, "learning_rate": 0.01, "loss": 2.0066, "step": 46698 }, { "epoch": 4.790828887977021, "grad_norm": 0.04189791902899742, "learning_rate": 0.01, "loss": 1.9999, "step": 46701 }, { "epoch": 4.791136643414034, "grad_norm": 0.09559596329927444, "learning_rate": 0.01, "loss": 1.9667, "step": 46704 }, { "epoch": 4.791444398851047, "grad_norm": 0.07935875654220581, "learning_rate": 0.01, "loss": 1.9709, "step": 46707 }, { "epoch": 4.7917521542880595, "grad_norm": 0.09595377743244171, "learning_rate": 0.01, "loss": 2.0147, "step": 46710 }, { "epoch": 4.792059909725072, "grad_norm": 0.06680227816104889, "learning_rate": 0.01, "loss": 1.9969, "step": 46713 }, { "epoch": 4.792367665162084, "grad_norm": 0.07122410833835602, "learning_rate": 0.01, "loss": 2.0114, "step": 46716 }, { "epoch": 4.792675420599097, "grad_norm": 0.0937454104423523, "learning_rate": 0.01, "loss": 1.9891, "step": 46719 }, { "epoch": 4.79298317603611, "grad_norm": 0.08886270225048065, "learning_rate": 0.01, "loss": 1.9803, "step": 46722 }, { "epoch": 4.7932909314731225, "grad_norm": 0.04537273570895195, "learning_rate": 0.01, "loss": 2.0151, "step": 46725 }, { "epoch": 4.793598686910135, "grad_norm": 0.10327073931694031, "learning_rate": 0.01, "loss": 2.0013, "step": 46728 }, { "epoch": 4.793906442347148, "grad_norm": 0.08701428025960922, "learning_rate": 0.01, "loss": 1.9633, "step": 46731 }, { "epoch": 4.794214197784161, "grad_norm": 0.06731964647769928, "learning_rate": 0.01, "loss": 1.9849, "step": 46734 }, { "epoch": 4.794521953221174, "grad_norm": 0.15847548842430115, "learning_rate": 0.01, "loss": 2.0006, "step": 46737 }, { "epoch": 4.794829708658186, "grad_norm": 0.04891221970319748, "learning_rate": 0.01, "loss": 1.9916, "step": 46740 }, { "epoch": 4.795137464095199, "grad_norm": 0.040448226034641266, "learning_rate": 0.01, "loss": 1.9777, "step": 46743 }, { "epoch": 4.795445219532212, "grad_norm": 0.04388444870710373, "learning_rate": 0.01, "loss": 2.0007, "step": 46746 }, { "epoch": 4.795752974969225, "grad_norm": 0.05044842138886452, "learning_rate": 0.01, "loss": 1.9842, "step": 46749 }, { "epoch": 4.7960607304062375, "grad_norm": 0.17745061218738556, "learning_rate": 0.01, "loss": 1.9986, "step": 46752 }, { "epoch": 4.79636848584325, "grad_norm": 0.07918280363082886, "learning_rate": 0.01, "loss": 1.9807, "step": 46755 }, { "epoch": 4.796676241280263, "grad_norm": 0.0841284915804863, "learning_rate": 0.01, "loss": 2.0123, "step": 46758 }, { "epoch": 4.796983996717275, "grad_norm": 0.07425282150506973, "learning_rate": 0.01, "loss": 1.9821, "step": 46761 }, { "epoch": 4.797291752154288, "grad_norm": 0.11900149285793304, "learning_rate": 0.01, "loss": 2.0072, "step": 46764 }, { "epoch": 4.7975995075913005, "grad_norm": 0.054541297256946564, "learning_rate": 0.01, "loss": 1.9947, "step": 46767 }, { "epoch": 4.797907263028313, "grad_norm": 0.05434846505522728, "learning_rate": 0.01, "loss": 1.9964, "step": 46770 }, { "epoch": 4.798215018465326, "grad_norm": 0.045795127749443054, "learning_rate": 0.01, "loss": 2.0005, "step": 46773 }, { "epoch": 4.798522773902339, "grad_norm": 0.044646721333265305, "learning_rate": 0.01, "loss": 1.9882, "step": 46776 }, { "epoch": 4.798830529339352, "grad_norm": 0.06140962243080139, "learning_rate": 0.01, "loss": 1.9988, "step": 46779 }, { "epoch": 4.799138284776364, "grad_norm": 0.1806284338235855, "learning_rate": 0.01, "loss": 1.9941, "step": 46782 }, { "epoch": 4.799446040213377, "grad_norm": 0.1726996749639511, "learning_rate": 0.01, "loss": 1.9823, "step": 46785 }, { "epoch": 4.79975379565039, "grad_norm": 0.093462273478508, "learning_rate": 0.01, "loss": 2.0224, "step": 46788 }, { "epoch": 4.800061551087403, "grad_norm": 0.09162432700395584, "learning_rate": 0.01, "loss": 1.9807, "step": 46791 }, { "epoch": 4.8003693065244155, "grad_norm": 0.05243920534849167, "learning_rate": 0.01, "loss": 2.0048, "step": 46794 }, { "epoch": 4.800677061961428, "grad_norm": 0.05156363546848297, "learning_rate": 0.01, "loss": 2.0012, "step": 46797 }, { "epoch": 4.800984817398441, "grad_norm": 0.03927348554134369, "learning_rate": 0.01, "loss": 1.9735, "step": 46800 }, { "epoch": 4.801292572835454, "grad_norm": 0.04184743016958237, "learning_rate": 0.01, "loss": 1.967, "step": 46803 }, { "epoch": 4.801600328272466, "grad_norm": 0.10456445813179016, "learning_rate": 0.01, "loss": 1.9963, "step": 46806 }, { "epoch": 4.8019080837094785, "grad_norm": 0.14495427906513214, "learning_rate": 0.01, "loss": 1.9995, "step": 46809 }, { "epoch": 4.802215839146491, "grad_norm": 0.06935403496026993, "learning_rate": 0.01, "loss": 1.9953, "step": 46812 }, { "epoch": 4.802523594583504, "grad_norm": 0.07718656957149506, "learning_rate": 0.01, "loss": 1.9789, "step": 46815 }, { "epoch": 4.802831350020517, "grad_norm": 0.04619847983121872, "learning_rate": 0.01, "loss": 2.0083, "step": 46818 }, { "epoch": 4.80313910545753, "grad_norm": 0.04528527334332466, "learning_rate": 0.01, "loss": 1.9811, "step": 46821 }, { "epoch": 4.803446860894542, "grad_norm": 0.0626635029911995, "learning_rate": 0.01, "loss": 2.0067, "step": 46824 }, { "epoch": 4.803754616331555, "grad_norm": 0.055482156574726105, "learning_rate": 0.01, "loss": 2.0015, "step": 46827 }, { "epoch": 4.804062371768568, "grad_norm": 0.04317907243967056, "learning_rate": 0.01, "loss": 1.9835, "step": 46830 }, { "epoch": 4.804370127205581, "grad_norm": 0.12392894178628922, "learning_rate": 0.01, "loss": 1.9905, "step": 46833 }, { "epoch": 4.8046778826425935, "grad_norm": 0.07023516297340393, "learning_rate": 0.01, "loss": 2.0094, "step": 46836 }, { "epoch": 4.804985638079606, "grad_norm": 0.10643202811479568, "learning_rate": 0.01, "loss": 2.0031, "step": 46839 }, { "epoch": 4.805293393516619, "grad_norm": 0.07375111430883408, "learning_rate": 0.01, "loss": 1.9657, "step": 46842 }, { "epoch": 4.805601148953632, "grad_norm": 0.039533890783786774, "learning_rate": 0.01, "loss": 1.9951, "step": 46845 }, { "epoch": 4.805908904390645, "grad_norm": 0.03812634199857712, "learning_rate": 0.01, "loss": 1.9887, "step": 46848 }, { "epoch": 4.806216659827657, "grad_norm": 0.049066439270973206, "learning_rate": 0.01, "loss": 1.9826, "step": 46851 }, { "epoch": 4.806524415264669, "grad_norm": 0.17093585431575775, "learning_rate": 0.01, "loss": 1.996, "step": 46854 }, { "epoch": 4.806832170701682, "grad_norm": 0.06310732662677765, "learning_rate": 0.01, "loss": 1.9727, "step": 46857 }, { "epoch": 4.807139926138695, "grad_norm": 0.04781056195497513, "learning_rate": 0.01, "loss": 1.987, "step": 46860 }, { "epoch": 4.807447681575708, "grad_norm": 0.04082764312624931, "learning_rate": 0.01, "loss": 2.0017, "step": 46863 }, { "epoch": 4.8077554370127205, "grad_norm": 0.06627890467643738, "learning_rate": 0.01, "loss": 2.0018, "step": 46866 }, { "epoch": 4.808063192449733, "grad_norm": 0.07753533869981766, "learning_rate": 0.01, "loss": 2.0041, "step": 46869 }, { "epoch": 4.808370947886746, "grad_norm": 0.05382109060883522, "learning_rate": 0.01, "loss": 2.0029, "step": 46872 }, { "epoch": 4.808678703323759, "grad_norm": 0.06153309345245361, "learning_rate": 0.01, "loss": 1.9806, "step": 46875 }, { "epoch": 4.808986458760772, "grad_norm": 0.0524064376950264, "learning_rate": 0.01, "loss": 1.975, "step": 46878 }, { "epoch": 4.809294214197784, "grad_norm": 0.04740193486213684, "learning_rate": 0.01, "loss": 1.9846, "step": 46881 }, { "epoch": 4.809601969634797, "grad_norm": 0.04750111699104309, "learning_rate": 0.01, "loss": 2.0125, "step": 46884 }, { "epoch": 4.80990972507181, "grad_norm": 0.1953573226928711, "learning_rate": 0.01, "loss": 1.9876, "step": 46887 }, { "epoch": 4.810217480508823, "grad_norm": 0.05784987658262253, "learning_rate": 0.01, "loss": 1.9961, "step": 46890 }, { "epoch": 4.8105252359458355, "grad_norm": 0.06495459377765656, "learning_rate": 0.01, "loss": 1.9802, "step": 46893 }, { "epoch": 4.810832991382847, "grad_norm": 0.04319414868950844, "learning_rate": 0.01, "loss": 1.9865, "step": 46896 }, { "epoch": 4.81114074681986, "grad_norm": 0.05057989060878754, "learning_rate": 0.01, "loss": 2.0131, "step": 46899 }, { "epoch": 4.811448502256873, "grad_norm": 0.04836462065577507, "learning_rate": 0.01, "loss": 1.9928, "step": 46902 }, { "epoch": 4.811756257693886, "grad_norm": 0.06928513944149017, "learning_rate": 0.01, "loss": 1.9722, "step": 46905 }, { "epoch": 4.8120640131308985, "grad_norm": 0.04418247938156128, "learning_rate": 0.01, "loss": 1.9731, "step": 46908 }, { "epoch": 4.812371768567911, "grad_norm": 0.05600956827402115, "learning_rate": 0.01, "loss": 1.9597, "step": 46911 }, { "epoch": 4.812679524004924, "grad_norm": 0.18245935440063477, "learning_rate": 0.01, "loss": 1.9895, "step": 46914 }, { "epoch": 4.812987279441937, "grad_norm": 0.05147803574800491, "learning_rate": 0.01, "loss": 1.9836, "step": 46917 }, { "epoch": 4.81329503487895, "grad_norm": 0.049182165414094925, "learning_rate": 0.01, "loss": 1.9799, "step": 46920 }, { "epoch": 4.813602790315962, "grad_norm": 0.03926403820514679, "learning_rate": 0.01, "loss": 1.9839, "step": 46923 }, { "epoch": 4.813910545752975, "grad_norm": 0.04399009421467781, "learning_rate": 0.01, "loss": 1.9911, "step": 46926 }, { "epoch": 4.814218301189988, "grad_norm": 0.05575468763709068, "learning_rate": 0.01, "loss": 2.0025, "step": 46929 }, { "epoch": 4.814526056627001, "grad_norm": 0.056770700961351395, "learning_rate": 0.01, "loss": 2.0191, "step": 46932 }, { "epoch": 4.8148338120640135, "grad_norm": 0.0790528804063797, "learning_rate": 0.01, "loss": 1.9792, "step": 46935 }, { "epoch": 4.815141567501026, "grad_norm": 0.07976264506578445, "learning_rate": 0.01, "loss": 2.0267, "step": 46938 }, { "epoch": 4.815449322938038, "grad_norm": 0.058160681277513504, "learning_rate": 0.01, "loss": 1.9802, "step": 46941 }, { "epoch": 4.815757078375051, "grad_norm": 0.05894159525632858, "learning_rate": 0.01, "loss": 2.0006, "step": 46944 }, { "epoch": 4.816064833812064, "grad_norm": 0.061383627355098724, "learning_rate": 0.01, "loss": 1.9937, "step": 46947 }, { "epoch": 4.8163725892490765, "grad_norm": 0.06993051618337631, "learning_rate": 0.01, "loss": 1.9951, "step": 46950 }, { "epoch": 4.816680344686089, "grad_norm": 0.1546536386013031, "learning_rate": 0.01, "loss": 1.9949, "step": 46953 }, { "epoch": 4.816988100123102, "grad_norm": 0.11143519729375839, "learning_rate": 0.01, "loss": 1.9965, "step": 46956 }, { "epoch": 4.817295855560115, "grad_norm": 0.054721418768167496, "learning_rate": 0.01, "loss": 2.0038, "step": 46959 }, { "epoch": 4.817603610997128, "grad_norm": 0.04508695378899574, "learning_rate": 0.01, "loss": 1.9824, "step": 46962 }, { "epoch": 4.81791136643414, "grad_norm": 0.06738116592168808, "learning_rate": 0.01, "loss": 1.9702, "step": 46965 }, { "epoch": 4.818219121871153, "grad_norm": 0.05306596681475639, "learning_rate": 0.01, "loss": 1.9741, "step": 46968 }, { "epoch": 4.818526877308166, "grad_norm": 0.05283980816602707, "learning_rate": 0.01, "loss": 1.9738, "step": 46971 }, { "epoch": 4.818834632745179, "grad_norm": 0.04085366427898407, "learning_rate": 0.01, "loss": 1.9924, "step": 46974 }, { "epoch": 4.8191423881821915, "grad_norm": 0.0549020953476429, "learning_rate": 0.01, "loss": 1.9762, "step": 46977 }, { "epoch": 4.819450143619204, "grad_norm": 0.08194736391305923, "learning_rate": 0.01, "loss": 1.9834, "step": 46980 }, { "epoch": 4.819757899056217, "grad_norm": 0.07541119307279587, "learning_rate": 0.01, "loss": 2.0103, "step": 46983 }, { "epoch": 4.820065654493229, "grad_norm": 0.05164722725749016, "learning_rate": 0.01, "loss": 1.988, "step": 46986 }, { "epoch": 4.820373409930242, "grad_norm": 0.03965797275304794, "learning_rate": 0.01, "loss": 1.988, "step": 46989 }, { "epoch": 4.8206811653672546, "grad_norm": 0.05798366665840149, "learning_rate": 0.01, "loss": 2.0045, "step": 46992 }, { "epoch": 4.820988920804267, "grad_norm": 0.09747203439474106, "learning_rate": 0.01, "loss": 1.9585, "step": 46995 }, { "epoch": 4.82129667624128, "grad_norm": 0.037640564143657684, "learning_rate": 0.01, "loss": 1.9987, "step": 46998 }, { "epoch": 4.821604431678293, "grad_norm": 0.06714722514152527, "learning_rate": 0.01, "loss": 1.9763, "step": 47001 }, { "epoch": 4.821912187115306, "grad_norm": 0.06069006398320198, "learning_rate": 0.01, "loss": 1.9959, "step": 47004 }, { "epoch": 4.8222199425523185, "grad_norm": 0.04241282492876053, "learning_rate": 0.01, "loss": 1.9665, "step": 47007 }, { "epoch": 4.822527697989331, "grad_norm": 0.07319297641515732, "learning_rate": 0.01, "loss": 1.961, "step": 47010 }, { "epoch": 4.822835453426344, "grad_norm": 0.11949057132005692, "learning_rate": 0.01, "loss": 1.9639, "step": 47013 }, { "epoch": 4.823143208863357, "grad_norm": 0.10720200836658478, "learning_rate": 0.01, "loss": 1.9813, "step": 47016 }, { "epoch": 4.82345096430037, "grad_norm": 0.05312662571668625, "learning_rate": 0.01, "loss": 2.0028, "step": 47019 }, { "epoch": 4.823758719737382, "grad_norm": 0.05288184434175491, "learning_rate": 0.01, "loss": 2.0004, "step": 47022 }, { "epoch": 4.824066475174395, "grad_norm": 0.04024795815348625, "learning_rate": 0.01, "loss": 1.9603, "step": 47025 }, { "epoch": 4.824374230611408, "grad_norm": 0.03714558109641075, "learning_rate": 0.01, "loss": 1.9997, "step": 47028 }, { "epoch": 4.82468198604842, "grad_norm": 0.043919868767261505, "learning_rate": 0.01, "loss": 2.0067, "step": 47031 }, { "epoch": 4.824989741485433, "grad_norm": 0.09199999272823334, "learning_rate": 0.01, "loss": 2.0002, "step": 47034 }, { "epoch": 4.825297496922445, "grad_norm": 0.09651891142129898, "learning_rate": 0.01, "loss": 1.9986, "step": 47037 }, { "epoch": 4.825605252359458, "grad_norm": 0.11211739480495453, "learning_rate": 0.01, "loss": 1.9967, "step": 47040 }, { "epoch": 4.825913007796471, "grad_norm": 0.11191114038228989, "learning_rate": 0.01, "loss": 1.9963, "step": 47043 }, { "epoch": 4.826220763233484, "grad_norm": 0.06726837903261185, "learning_rate": 0.01, "loss": 2.001, "step": 47046 }, { "epoch": 4.8265285186704965, "grad_norm": 0.04758633300662041, "learning_rate": 0.01, "loss": 2.0015, "step": 47049 }, { "epoch": 4.826836274107509, "grad_norm": 0.07138072699308395, "learning_rate": 0.01, "loss": 1.9816, "step": 47052 }, { "epoch": 4.827144029544522, "grad_norm": 0.08691728860139847, "learning_rate": 0.01, "loss": 1.9933, "step": 47055 }, { "epoch": 4.827451784981535, "grad_norm": 0.060696471482515335, "learning_rate": 0.01, "loss": 1.9884, "step": 47058 }, { "epoch": 4.827759540418548, "grad_norm": 0.04225600138306618, "learning_rate": 0.01, "loss": 1.9765, "step": 47061 }, { "epoch": 4.82806729585556, "grad_norm": 0.03796116262674332, "learning_rate": 0.01, "loss": 1.9722, "step": 47064 }, { "epoch": 4.828375051292573, "grad_norm": 0.056367259472608566, "learning_rate": 0.01, "loss": 2.0058, "step": 47067 }, { "epoch": 4.828682806729586, "grad_norm": 0.10544770210981369, "learning_rate": 0.01, "loss": 2.0124, "step": 47070 }, { "epoch": 4.828990562166599, "grad_norm": 0.06839180737733841, "learning_rate": 0.01, "loss": 2.0131, "step": 47073 }, { "epoch": 4.829298317603611, "grad_norm": 0.08350997418165207, "learning_rate": 0.01, "loss": 2.0025, "step": 47076 }, { "epoch": 4.829606073040623, "grad_norm": 0.05540642887353897, "learning_rate": 0.01, "loss": 1.97, "step": 47079 }, { "epoch": 4.829913828477636, "grad_norm": 0.08671069890260696, "learning_rate": 0.01, "loss": 1.967, "step": 47082 }, { "epoch": 4.830221583914649, "grad_norm": 0.1410573422908783, "learning_rate": 0.01, "loss": 1.9948, "step": 47085 }, { "epoch": 4.830529339351662, "grad_norm": 0.06324710696935654, "learning_rate": 0.01, "loss": 1.9722, "step": 47088 }, { "epoch": 4.8308370947886745, "grad_norm": 0.04952717572450638, "learning_rate": 0.01, "loss": 1.972, "step": 47091 }, { "epoch": 4.831144850225687, "grad_norm": 0.058474645018577576, "learning_rate": 0.01, "loss": 1.9962, "step": 47094 }, { "epoch": 4.8314526056627, "grad_norm": 0.08904005587100983, "learning_rate": 0.01, "loss": 1.9938, "step": 47097 }, { "epoch": 4.831760361099713, "grad_norm": 0.03303196281194687, "learning_rate": 0.01, "loss": 1.9925, "step": 47100 }, { "epoch": 4.832068116536726, "grad_norm": 0.0696646049618721, "learning_rate": 0.01, "loss": 2.0134, "step": 47103 }, { "epoch": 4.832375871973738, "grad_norm": 0.09613518416881561, "learning_rate": 0.01, "loss": 2.0197, "step": 47106 }, { "epoch": 4.832683627410751, "grad_norm": 0.08955409377813339, "learning_rate": 0.01, "loss": 1.9708, "step": 47109 }, { "epoch": 4.832991382847764, "grad_norm": 0.07022068649530411, "learning_rate": 0.01, "loss": 2.0066, "step": 47112 }, { "epoch": 4.833299138284777, "grad_norm": 0.05557604506611824, "learning_rate": 0.01, "loss": 1.9883, "step": 47115 }, { "epoch": 4.8336068937217895, "grad_norm": 0.04961394891142845, "learning_rate": 0.01, "loss": 1.9734, "step": 47118 }, { "epoch": 4.833914649158801, "grad_norm": 0.07381154596805573, "learning_rate": 0.01, "loss": 1.9893, "step": 47121 }, { "epoch": 4.834222404595814, "grad_norm": 0.08874998986721039, "learning_rate": 0.01, "loss": 1.9819, "step": 47124 }, { "epoch": 4.834530160032827, "grad_norm": 0.12152114510536194, "learning_rate": 0.01, "loss": 1.9904, "step": 47127 }, { "epoch": 4.83483791546984, "grad_norm": 0.18640320003032684, "learning_rate": 0.01, "loss": 2.0007, "step": 47130 }, { "epoch": 4.8351456709068525, "grad_norm": 0.13211971521377563, "learning_rate": 0.01, "loss": 1.9598, "step": 47133 }, { "epoch": 4.835453426343865, "grad_norm": 0.0825662836432457, "learning_rate": 0.01, "loss": 2.0019, "step": 47136 }, { "epoch": 4.835761181780878, "grad_norm": 0.05962991341948509, "learning_rate": 0.01, "loss": 1.991, "step": 47139 }, { "epoch": 4.836068937217891, "grad_norm": 0.045787274837493896, "learning_rate": 0.01, "loss": 1.9837, "step": 47142 }, { "epoch": 4.836376692654904, "grad_norm": 0.05166277661919594, "learning_rate": 0.01, "loss": 1.9703, "step": 47145 }, { "epoch": 4.836684448091916, "grad_norm": 0.07144337892532349, "learning_rate": 0.01, "loss": 2.0034, "step": 47148 }, { "epoch": 4.836992203528929, "grad_norm": 0.03732317313551903, "learning_rate": 0.01, "loss": 2.0051, "step": 47151 }, { "epoch": 4.837299958965942, "grad_norm": 0.06613879650831223, "learning_rate": 0.01, "loss": 1.9917, "step": 47154 }, { "epoch": 4.837607714402955, "grad_norm": 0.05564868822693825, "learning_rate": 0.01, "loss": 2.0116, "step": 47157 }, { "epoch": 4.837915469839967, "grad_norm": 0.03259768709540367, "learning_rate": 0.01, "loss": 2.0112, "step": 47160 }, { "epoch": 4.83822322527698, "grad_norm": 0.11291679739952087, "learning_rate": 0.01, "loss": 1.9619, "step": 47163 }, { "epoch": 4.838530980713992, "grad_norm": 0.06787018477916718, "learning_rate": 0.01, "loss": 2.0003, "step": 47166 }, { "epoch": 4.838838736151005, "grad_norm": 0.10512536764144897, "learning_rate": 0.01, "loss": 1.986, "step": 47169 }, { "epoch": 4.839146491588018, "grad_norm": 0.10330035537481308, "learning_rate": 0.01, "loss": 1.9875, "step": 47172 }, { "epoch": 4.839454247025031, "grad_norm": 0.06286308169364929, "learning_rate": 0.01, "loss": 1.994, "step": 47175 }, { "epoch": 4.839762002462043, "grad_norm": 0.04793789982795715, "learning_rate": 0.01, "loss": 1.9849, "step": 47178 }, { "epoch": 4.840069757899056, "grad_norm": 0.043382007628679276, "learning_rate": 0.01, "loss": 1.9958, "step": 47181 }, { "epoch": 4.840377513336069, "grad_norm": 0.08022630959749222, "learning_rate": 0.01, "loss": 1.9936, "step": 47184 }, { "epoch": 4.840685268773082, "grad_norm": 0.09300918877124786, "learning_rate": 0.01, "loss": 1.959, "step": 47187 }, { "epoch": 4.8409930242100945, "grad_norm": 0.05550304800271988, "learning_rate": 0.01, "loss": 1.9739, "step": 47190 }, { "epoch": 4.841300779647107, "grad_norm": 0.0407077856361866, "learning_rate": 0.01, "loss": 1.9859, "step": 47193 }, { "epoch": 4.84160853508412, "grad_norm": 0.04916580393910408, "learning_rate": 0.01, "loss": 1.9728, "step": 47196 }, { "epoch": 4.841916290521133, "grad_norm": 0.090577132999897, "learning_rate": 0.01, "loss": 1.9765, "step": 47199 }, { "epoch": 4.842224045958146, "grad_norm": 0.11230761557817459, "learning_rate": 0.01, "loss": 1.975, "step": 47202 }, { "epoch": 4.8425318013951575, "grad_norm": 0.11779794096946716, "learning_rate": 0.01, "loss": 1.9952, "step": 47205 }, { "epoch": 4.842839556832171, "grad_norm": 0.04648299142718315, "learning_rate": 0.01, "loss": 2.0093, "step": 47208 }, { "epoch": 4.843147312269183, "grad_norm": 0.04901035502552986, "learning_rate": 0.01, "loss": 1.9869, "step": 47211 }, { "epoch": 4.843455067706196, "grad_norm": 0.05582377314567566, "learning_rate": 0.01, "loss": 1.9938, "step": 47214 }, { "epoch": 4.843762823143209, "grad_norm": 0.08325490355491638, "learning_rate": 0.01, "loss": 1.9974, "step": 47217 }, { "epoch": 4.844070578580221, "grad_norm": 0.09663175791501999, "learning_rate": 0.01, "loss": 1.983, "step": 47220 }, { "epoch": 4.844378334017234, "grad_norm": 0.067304328083992, "learning_rate": 0.01, "loss": 1.9912, "step": 47223 }, { "epoch": 4.844686089454247, "grad_norm": 0.09804333001375198, "learning_rate": 0.01, "loss": 2.0023, "step": 47226 }, { "epoch": 4.84499384489126, "grad_norm": 0.07931843400001526, "learning_rate": 0.01, "loss": 1.9744, "step": 47229 }, { "epoch": 4.8453016003282725, "grad_norm": 0.07244410365819931, "learning_rate": 0.01, "loss": 2.0131, "step": 47232 }, { "epoch": 4.845609355765285, "grad_norm": 0.08068135380744934, "learning_rate": 0.01, "loss": 1.9623, "step": 47235 }, { "epoch": 4.845917111202298, "grad_norm": 0.062296342104673386, "learning_rate": 0.01, "loss": 1.993, "step": 47238 }, { "epoch": 4.846224866639311, "grad_norm": 0.09988021850585938, "learning_rate": 0.01, "loss": 2.0143, "step": 47241 }, { "epoch": 4.846532622076324, "grad_norm": 0.12119688838720322, "learning_rate": 0.01, "loss": 1.986, "step": 47244 }, { "epoch": 4.846840377513336, "grad_norm": 0.06407175213098526, "learning_rate": 0.01, "loss": 2.0009, "step": 47247 }, { "epoch": 4.847148132950348, "grad_norm": 0.05600906163454056, "learning_rate": 0.01, "loss": 2.0139, "step": 47250 }, { "epoch": 4.847455888387362, "grad_norm": 0.04448190703988075, "learning_rate": 0.01, "loss": 1.9816, "step": 47253 }, { "epoch": 4.847763643824374, "grad_norm": 0.046519339084625244, "learning_rate": 0.01, "loss": 1.9892, "step": 47256 }, { "epoch": 4.848071399261387, "grad_norm": 0.07584928721189499, "learning_rate": 0.01, "loss": 1.9972, "step": 47259 }, { "epoch": 4.848379154698399, "grad_norm": 0.06511086225509644, "learning_rate": 0.01, "loss": 1.9875, "step": 47262 }, { "epoch": 4.848686910135412, "grad_norm": 0.10978051275014877, "learning_rate": 0.01, "loss": 1.9995, "step": 47265 }, { "epoch": 4.848994665572425, "grad_norm": 0.10694348812103271, "learning_rate": 0.01, "loss": 2.0037, "step": 47268 }, { "epoch": 4.849302421009438, "grad_norm": 0.11489532142877579, "learning_rate": 0.01, "loss": 1.9941, "step": 47271 }, { "epoch": 4.8496101764464505, "grad_norm": 0.061532165855169296, "learning_rate": 0.01, "loss": 2.0089, "step": 47274 }, { "epoch": 4.849917931883463, "grad_norm": 0.034115977585315704, "learning_rate": 0.01, "loss": 1.9695, "step": 47277 }, { "epoch": 4.850225687320476, "grad_norm": 0.07114948332309723, "learning_rate": 0.01, "loss": 2.0014, "step": 47280 }, { "epoch": 4.850533442757489, "grad_norm": 0.03593161702156067, "learning_rate": 0.01, "loss": 1.9972, "step": 47283 }, { "epoch": 4.850841198194502, "grad_norm": 0.09017783403396606, "learning_rate": 0.01, "loss": 1.9703, "step": 47286 }, { "epoch": 4.851148953631514, "grad_norm": 0.045291002839803696, "learning_rate": 0.01, "loss": 1.9802, "step": 47289 }, { "epoch": 4.851456709068527, "grad_norm": 0.05328730493783951, "learning_rate": 0.01, "loss": 1.9684, "step": 47292 }, { "epoch": 4.851764464505539, "grad_norm": 0.08284175395965576, "learning_rate": 0.01, "loss": 1.9943, "step": 47295 }, { "epoch": 4.852072219942553, "grad_norm": 0.04549727216362953, "learning_rate": 0.01, "loss": 1.9808, "step": 47298 }, { "epoch": 4.852379975379565, "grad_norm": 0.09103915095329285, "learning_rate": 0.01, "loss": 1.9745, "step": 47301 }, { "epoch": 4.8526877308165774, "grad_norm": 0.09698940068483353, "learning_rate": 0.01, "loss": 2.0048, "step": 47304 }, { "epoch": 4.85299548625359, "grad_norm": 0.12437165528535843, "learning_rate": 0.01, "loss": 1.9746, "step": 47307 }, { "epoch": 4.853303241690603, "grad_norm": 0.0723615437746048, "learning_rate": 0.01, "loss": 1.9948, "step": 47310 }, { "epoch": 4.853610997127616, "grad_norm": 0.06390126794576645, "learning_rate": 0.01, "loss": 1.9994, "step": 47313 }, { "epoch": 4.853918752564629, "grad_norm": 0.04488823935389519, "learning_rate": 0.01, "loss": 2.0044, "step": 47316 }, { "epoch": 4.854226508001641, "grad_norm": 0.036022037267684937, "learning_rate": 0.01, "loss": 1.9725, "step": 47319 }, { "epoch": 4.854534263438654, "grad_norm": 0.06746529787778854, "learning_rate": 0.01, "loss": 1.9735, "step": 47322 }, { "epoch": 4.854842018875667, "grad_norm": 0.06267426908016205, "learning_rate": 0.01, "loss": 1.97, "step": 47325 }, { "epoch": 4.85514977431268, "grad_norm": 0.08596473187208176, "learning_rate": 0.01, "loss": 1.9942, "step": 47328 }, { "epoch": 4.8554575297496925, "grad_norm": 0.04593675211071968, "learning_rate": 0.01, "loss": 1.9986, "step": 47331 }, { "epoch": 4.855765285186705, "grad_norm": 0.08707760274410248, "learning_rate": 0.01, "loss": 1.9881, "step": 47334 }, { "epoch": 4.856073040623718, "grad_norm": 0.05224407836794853, "learning_rate": 0.01, "loss": 1.9835, "step": 47337 }, { "epoch": 4.85638079606073, "grad_norm": 0.05910166725516319, "learning_rate": 0.01, "loss": 1.9864, "step": 47340 }, { "epoch": 4.856688551497744, "grad_norm": 0.14354678988456726, "learning_rate": 0.01, "loss": 1.986, "step": 47343 }, { "epoch": 4.8569963069347555, "grad_norm": 0.11897499859333038, "learning_rate": 0.01, "loss": 2.0184, "step": 47346 }, { "epoch": 4.857304062371768, "grad_norm": 0.06904298812150955, "learning_rate": 0.01, "loss": 1.9881, "step": 47349 }, { "epoch": 4.857611817808781, "grad_norm": 0.04855037108063698, "learning_rate": 0.01, "loss": 1.9921, "step": 47352 }, { "epoch": 4.857919573245794, "grad_norm": 0.042485300451517105, "learning_rate": 0.01, "loss": 1.9864, "step": 47355 }, { "epoch": 4.858227328682807, "grad_norm": 0.052240677177906036, "learning_rate": 0.01, "loss": 1.9711, "step": 47358 }, { "epoch": 4.858535084119819, "grad_norm": 0.09882494062185287, "learning_rate": 0.01, "loss": 1.9753, "step": 47361 }, { "epoch": 4.858842839556832, "grad_norm": 0.059417568147182465, "learning_rate": 0.01, "loss": 1.9699, "step": 47364 }, { "epoch": 4.859150594993845, "grad_norm": 0.042187973856925964, "learning_rate": 0.01, "loss": 1.9445, "step": 47367 }, { "epoch": 4.859458350430858, "grad_norm": 0.11045199632644653, "learning_rate": 0.01, "loss": 2.0084, "step": 47370 }, { "epoch": 4.8597661058678705, "grad_norm": 0.06902570277452469, "learning_rate": 0.01, "loss": 1.9933, "step": 47373 }, { "epoch": 4.860073861304883, "grad_norm": 0.1256779134273529, "learning_rate": 0.01, "loss": 2.0201, "step": 47376 }, { "epoch": 4.860381616741896, "grad_norm": 0.0714133083820343, "learning_rate": 0.01, "loss": 1.9732, "step": 47379 }, { "epoch": 4.860689372178909, "grad_norm": 0.050711020827293396, "learning_rate": 0.01, "loss": 1.9994, "step": 47382 }, { "epoch": 4.860997127615921, "grad_norm": 0.04845008999109268, "learning_rate": 0.01, "loss": 1.9875, "step": 47385 }, { "epoch": 4.861304883052934, "grad_norm": 0.04254496470093727, "learning_rate": 0.01, "loss": 2.0029, "step": 47388 }, { "epoch": 4.861612638489946, "grad_norm": 0.03248690441250801, "learning_rate": 0.01, "loss": 1.9794, "step": 47391 }, { "epoch": 4.861920393926959, "grad_norm": 0.05999647080898285, "learning_rate": 0.01, "loss": 1.9929, "step": 47394 }, { "epoch": 4.862228149363972, "grad_norm": 0.09282340854406357, "learning_rate": 0.01, "loss": 1.9877, "step": 47397 }, { "epoch": 4.862535904800985, "grad_norm": 0.04029770568013191, "learning_rate": 0.01, "loss": 1.9877, "step": 47400 }, { "epoch": 4.862843660237997, "grad_norm": 0.030764177441596985, "learning_rate": 0.01, "loss": 1.9783, "step": 47403 }, { "epoch": 4.86315141567501, "grad_norm": 0.04076581448316574, "learning_rate": 0.01, "loss": 1.9974, "step": 47406 }, { "epoch": 4.863459171112023, "grad_norm": 0.0759870857000351, "learning_rate": 0.01, "loss": 1.9605, "step": 47409 }, { "epoch": 4.863766926549036, "grad_norm": 0.041376739740371704, "learning_rate": 0.01, "loss": 2.0275, "step": 47412 }, { "epoch": 4.8640746819860485, "grad_norm": 0.13923729956150055, "learning_rate": 0.01, "loss": 1.9829, "step": 47415 }, { "epoch": 4.864382437423061, "grad_norm": 0.06702505052089691, "learning_rate": 0.01, "loss": 2.002, "step": 47418 }, { "epoch": 4.864690192860074, "grad_norm": 0.04206367954611778, "learning_rate": 0.01, "loss": 2.003, "step": 47421 }, { "epoch": 4.864997948297087, "grad_norm": 0.043106552213430405, "learning_rate": 0.01, "loss": 1.9679, "step": 47424 }, { "epoch": 4.8653057037341, "grad_norm": 0.04751205816864967, "learning_rate": 0.01, "loss": 1.9687, "step": 47427 }, { "epoch": 4.8656134591711115, "grad_norm": 0.11286492645740509, "learning_rate": 0.01, "loss": 1.9888, "step": 47430 }, { "epoch": 4.865921214608125, "grad_norm": 0.04195297136902809, "learning_rate": 0.01, "loss": 2.0008, "step": 47433 }, { "epoch": 4.866228970045137, "grad_norm": 0.03481072559952736, "learning_rate": 0.01, "loss": 1.982, "step": 47436 }, { "epoch": 4.86653672548215, "grad_norm": 0.0647101029753685, "learning_rate": 0.01, "loss": 1.9958, "step": 47439 }, { "epoch": 4.866844480919163, "grad_norm": 0.09517384320497513, "learning_rate": 0.01, "loss": 1.9902, "step": 47442 }, { "epoch": 4.867152236356175, "grad_norm": 0.06879560649394989, "learning_rate": 0.01, "loss": 1.9897, "step": 47445 }, { "epoch": 4.867459991793188, "grad_norm": 0.08635231107473373, "learning_rate": 0.01, "loss": 1.9884, "step": 47448 }, { "epoch": 4.867767747230201, "grad_norm": 0.04220673069357872, "learning_rate": 0.01, "loss": 1.9774, "step": 47451 }, { "epoch": 4.868075502667214, "grad_norm": 0.07045330852270126, "learning_rate": 0.01, "loss": 1.9817, "step": 47454 }, { "epoch": 4.868383258104227, "grad_norm": 0.04260876774787903, "learning_rate": 0.01, "loss": 1.9859, "step": 47457 }, { "epoch": 4.868691013541239, "grad_norm": 0.040992897003889084, "learning_rate": 0.01, "loss": 1.9635, "step": 47460 }, { "epoch": 4.868998768978252, "grad_norm": 0.04472249746322632, "learning_rate": 0.01, "loss": 2.0002, "step": 47463 }, { "epoch": 4.869306524415265, "grad_norm": 0.03990897163748741, "learning_rate": 0.01, "loss": 1.9875, "step": 47466 }, { "epoch": 4.869614279852278, "grad_norm": 0.10024430602788925, "learning_rate": 0.01, "loss": 1.9922, "step": 47469 }, { "epoch": 4.8699220352892905, "grad_norm": 0.08980470895767212, "learning_rate": 0.01, "loss": 1.9894, "step": 47472 }, { "epoch": 4.870229790726302, "grad_norm": 0.05399052053689957, "learning_rate": 0.01, "loss": 1.9665, "step": 47475 }, { "epoch": 4.870537546163316, "grad_norm": 0.03817398473620415, "learning_rate": 0.01, "loss": 1.9943, "step": 47478 }, { "epoch": 4.870845301600328, "grad_norm": 0.0670650526881218, "learning_rate": 0.01, "loss": 1.9897, "step": 47481 }, { "epoch": 4.871153057037341, "grad_norm": 0.0896904319524765, "learning_rate": 0.01, "loss": 1.9982, "step": 47484 }, { "epoch": 4.8714608124743535, "grad_norm": 0.10440392047166824, "learning_rate": 0.01, "loss": 1.9863, "step": 47487 }, { "epoch": 4.871768567911366, "grad_norm": 0.0894385576248169, "learning_rate": 0.01, "loss": 1.9936, "step": 47490 }, { "epoch": 4.872076323348379, "grad_norm": 0.05117709934711456, "learning_rate": 0.01, "loss": 2.0049, "step": 47493 }, { "epoch": 4.872384078785392, "grad_norm": 0.03292097896337509, "learning_rate": 0.01, "loss": 1.9781, "step": 47496 }, { "epoch": 4.872691834222405, "grad_norm": 0.032028477638959885, "learning_rate": 0.01, "loss": 1.9925, "step": 47499 }, { "epoch": 4.872999589659417, "grad_norm": 0.046977583318948746, "learning_rate": 0.01, "loss": 2.0151, "step": 47502 }, { "epoch": 4.87330734509643, "grad_norm": 0.11463116854429245, "learning_rate": 0.01, "loss": 1.9807, "step": 47505 }, { "epoch": 4.873615100533443, "grad_norm": 0.031687479466199875, "learning_rate": 0.01, "loss": 1.9652, "step": 47508 }, { "epoch": 4.873922855970456, "grad_norm": 0.04342150315642357, "learning_rate": 0.01, "loss": 2.0073, "step": 47511 }, { "epoch": 4.8742306114074685, "grad_norm": 0.03970487043261528, "learning_rate": 0.01, "loss": 1.9922, "step": 47514 }, { "epoch": 4.874538366844481, "grad_norm": 0.05668526887893677, "learning_rate": 0.01, "loss": 1.9907, "step": 47517 }, { "epoch": 4.874846122281493, "grad_norm": 0.05608886480331421, "learning_rate": 0.01, "loss": 1.9869, "step": 47520 }, { "epoch": 4.875153877718507, "grad_norm": 0.13083675503730774, "learning_rate": 0.01, "loss": 1.9794, "step": 47523 }, { "epoch": 4.875461633155519, "grad_norm": 0.040449466556310654, "learning_rate": 0.01, "loss": 1.9718, "step": 47526 }, { "epoch": 4.8757693885925315, "grad_norm": 0.08093597739934921, "learning_rate": 0.01, "loss": 1.9938, "step": 47529 }, { "epoch": 4.876077144029544, "grad_norm": 0.08471735566854477, "learning_rate": 0.01, "loss": 1.9785, "step": 47532 }, { "epoch": 4.876384899466557, "grad_norm": 0.05568789690732956, "learning_rate": 0.01, "loss": 1.9748, "step": 47535 }, { "epoch": 4.87669265490357, "grad_norm": 0.07529760152101517, "learning_rate": 0.01, "loss": 1.9734, "step": 47538 }, { "epoch": 4.877000410340583, "grad_norm": 0.06487952917814255, "learning_rate": 0.01, "loss": 1.9598, "step": 47541 }, { "epoch": 4.877308165777595, "grad_norm": 0.049183983355760574, "learning_rate": 0.01, "loss": 1.9761, "step": 47544 }, { "epoch": 4.877615921214608, "grad_norm": 0.08050165325403214, "learning_rate": 0.01, "loss": 1.9767, "step": 47547 }, { "epoch": 4.877923676651621, "grad_norm": 0.061584021896123886, "learning_rate": 0.01, "loss": 1.9966, "step": 47550 }, { "epoch": 4.878231432088634, "grad_norm": 0.052319157868623734, "learning_rate": 0.01, "loss": 2.01, "step": 47553 }, { "epoch": 4.8785391875256465, "grad_norm": 0.059711821377277374, "learning_rate": 0.01, "loss": 1.9743, "step": 47556 }, { "epoch": 4.878846942962659, "grad_norm": 0.10813643783330917, "learning_rate": 0.01, "loss": 2.0095, "step": 47559 }, { "epoch": 4.879154698399672, "grad_norm": 0.0562468096613884, "learning_rate": 0.01, "loss": 1.9917, "step": 47562 }, { "epoch": 4.879462453836684, "grad_norm": 0.14491213858127594, "learning_rate": 0.01, "loss": 1.9776, "step": 47565 }, { "epoch": 4.879770209273698, "grad_norm": 0.06594674289226532, "learning_rate": 0.01, "loss": 1.9815, "step": 47568 }, { "epoch": 4.8800779647107095, "grad_norm": 0.05883457511663437, "learning_rate": 0.01, "loss": 2.01, "step": 47571 }, { "epoch": 4.880385720147722, "grad_norm": 0.049259252846241, "learning_rate": 0.01, "loss": 1.987, "step": 47574 }, { "epoch": 4.880693475584735, "grad_norm": 0.0817684531211853, "learning_rate": 0.01, "loss": 1.9907, "step": 47577 }, { "epoch": 4.881001231021748, "grad_norm": 0.07413051277399063, "learning_rate": 0.01, "loss": 2.0177, "step": 47580 }, { "epoch": 4.881308986458761, "grad_norm": 0.03806111216545105, "learning_rate": 0.01, "loss": 1.9924, "step": 47583 }, { "epoch": 4.881616741895773, "grad_norm": 0.11986687034368515, "learning_rate": 0.01, "loss": 1.9963, "step": 47586 }, { "epoch": 4.881924497332786, "grad_norm": 0.09377092123031616, "learning_rate": 0.01, "loss": 1.9818, "step": 47589 }, { "epoch": 4.882232252769799, "grad_norm": 0.05205828696489334, "learning_rate": 0.01, "loss": 1.9777, "step": 47592 }, { "epoch": 4.882540008206812, "grad_norm": 0.07014745473861694, "learning_rate": 0.01, "loss": 1.9842, "step": 47595 }, { "epoch": 4.882847763643825, "grad_norm": 0.04630004242062569, "learning_rate": 0.01, "loss": 1.9782, "step": 47598 }, { "epoch": 4.883155519080837, "grad_norm": 0.06072268262505531, "learning_rate": 0.01, "loss": 1.9826, "step": 47601 }, { "epoch": 4.88346327451785, "grad_norm": 0.04583202302455902, "learning_rate": 0.01, "loss": 1.9966, "step": 47604 }, { "epoch": 4.883771029954863, "grad_norm": 0.10760218650102615, "learning_rate": 0.01, "loss": 1.9954, "step": 47607 }, { "epoch": 4.884078785391875, "grad_norm": 0.09803519397974014, "learning_rate": 0.01, "loss": 2.0101, "step": 47610 }, { "epoch": 4.8843865408288885, "grad_norm": 0.05660640448331833, "learning_rate": 0.01, "loss": 1.9988, "step": 47613 }, { "epoch": 4.8846942962659, "grad_norm": 0.07300207018852234, "learning_rate": 0.01, "loss": 1.9966, "step": 47616 }, { "epoch": 4.885002051702913, "grad_norm": 0.05942653492093086, "learning_rate": 0.01, "loss": 1.9922, "step": 47619 }, { "epoch": 4.885309807139926, "grad_norm": 0.0775621309876442, "learning_rate": 0.01, "loss": 2.0027, "step": 47622 }, { "epoch": 4.885617562576939, "grad_norm": 0.08601850271224976, "learning_rate": 0.01, "loss": 1.9876, "step": 47625 }, { "epoch": 4.8859253180139515, "grad_norm": 0.07847361266613007, "learning_rate": 0.01, "loss": 1.9886, "step": 47628 }, { "epoch": 4.886233073450964, "grad_norm": 0.06876290589570999, "learning_rate": 0.01, "loss": 1.9969, "step": 47631 }, { "epoch": 4.886540828887977, "grad_norm": 0.06279505789279938, "learning_rate": 0.01, "loss": 1.9865, "step": 47634 }, { "epoch": 4.88684858432499, "grad_norm": 0.0461944080889225, "learning_rate": 0.01, "loss": 1.9942, "step": 47637 }, { "epoch": 4.887156339762003, "grad_norm": 0.08709289878606796, "learning_rate": 0.01, "loss": 1.9779, "step": 47640 }, { "epoch": 4.887464095199015, "grad_norm": 0.07311128079891205, "learning_rate": 0.01, "loss": 1.9975, "step": 47643 }, { "epoch": 4.887771850636028, "grad_norm": 0.11558615416288376, "learning_rate": 0.01, "loss": 1.9976, "step": 47646 }, { "epoch": 4.888079606073041, "grad_norm": 0.044442903250455856, "learning_rate": 0.01, "loss": 1.9826, "step": 47649 }, { "epoch": 4.888387361510054, "grad_norm": 0.03592165187001228, "learning_rate": 0.01, "loss": 1.997, "step": 47652 }, { "epoch": 4.888695116947066, "grad_norm": 0.053503166884183884, "learning_rate": 0.01, "loss": 2.0183, "step": 47655 }, { "epoch": 4.889002872384079, "grad_norm": 0.05185321345925331, "learning_rate": 0.01, "loss": 1.9816, "step": 47658 }, { "epoch": 4.889310627821091, "grad_norm": 0.10305842012166977, "learning_rate": 0.01, "loss": 1.9621, "step": 47661 }, { "epoch": 4.889618383258104, "grad_norm": 0.04605614393949509, "learning_rate": 0.01, "loss": 2.0359, "step": 47664 }, { "epoch": 4.889926138695117, "grad_norm": 0.08601140230894089, "learning_rate": 0.01, "loss": 1.9911, "step": 47667 }, { "epoch": 4.8902338941321295, "grad_norm": 0.039195116609334946, "learning_rate": 0.01, "loss": 1.978, "step": 47670 }, { "epoch": 4.890541649569142, "grad_norm": 0.04058361053466797, "learning_rate": 0.01, "loss": 1.9952, "step": 47673 }, { "epoch": 4.890849405006155, "grad_norm": 0.049590058624744415, "learning_rate": 0.01, "loss": 1.9788, "step": 47676 }, { "epoch": 4.891157160443168, "grad_norm": 0.05253410339355469, "learning_rate": 0.01, "loss": 2.0, "step": 47679 }, { "epoch": 4.891464915880181, "grad_norm": 0.05858082324266434, "learning_rate": 0.01, "loss": 1.9868, "step": 47682 }, { "epoch": 4.891772671317193, "grad_norm": 0.05094842612743378, "learning_rate": 0.01, "loss": 1.9743, "step": 47685 }, { "epoch": 4.892080426754206, "grad_norm": 0.0539892241358757, "learning_rate": 0.01, "loss": 1.9765, "step": 47688 }, { "epoch": 4.892388182191219, "grad_norm": 0.15945689380168915, "learning_rate": 0.01, "loss": 1.9983, "step": 47691 }, { "epoch": 4.892695937628232, "grad_norm": 0.03433748334646225, "learning_rate": 0.01, "loss": 1.9898, "step": 47694 }, { "epoch": 4.8930036930652445, "grad_norm": 0.047357238829135895, "learning_rate": 0.01, "loss": 1.9801, "step": 47697 }, { "epoch": 4.893311448502256, "grad_norm": 0.05974509194493294, "learning_rate": 0.01, "loss": 2.0253, "step": 47700 }, { "epoch": 4.89361920393927, "grad_norm": 0.05918588861823082, "learning_rate": 0.01, "loss": 1.9797, "step": 47703 }, { "epoch": 4.893926959376282, "grad_norm": 0.04418300464749336, "learning_rate": 0.01, "loss": 2.0014, "step": 47706 }, { "epoch": 4.894234714813295, "grad_norm": 0.034721486270427704, "learning_rate": 0.01, "loss": 2.0021, "step": 47709 }, { "epoch": 4.8945424702503075, "grad_norm": 0.08422865718603134, "learning_rate": 0.01, "loss": 1.976, "step": 47712 }, { "epoch": 4.89485022568732, "grad_norm": 0.12269464135169983, "learning_rate": 0.01, "loss": 1.9866, "step": 47715 }, { "epoch": 4.895157981124333, "grad_norm": 0.06587913632392883, "learning_rate": 0.01, "loss": 1.999, "step": 47718 }, { "epoch": 4.895465736561346, "grad_norm": 0.12484870105981827, "learning_rate": 0.01, "loss": 1.9895, "step": 47721 }, { "epoch": 4.895773491998359, "grad_norm": 0.07100055366754532, "learning_rate": 0.01, "loss": 1.985, "step": 47724 }, { "epoch": 4.896081247435371, "grad_norm": 0.03272338956594467, "learning_rate": 0.01, "loss": 1.9816, "step": 47727 }, { "epoch": 4.896389002872384, "grad_norm": 0.03650757670402527, "learning_rate": 0.01, "loss": 2.0009, "step": 47730 }, { "epoch": 4.896696758309397, "grad_norm": 0.04600701853632927, "learning_rate": 0.01, "loss": 2.0026, "step": 47733 }, { "epoch": 4.89700451374641, "grad_norm": 0.06520526856184006, "learning_rate": 0.01, "loss": 1.9877, "step": 47736 }, { "epoch": 4.8973122691834226, "grad_norm": 0.04096482694149017, "learning_rate": 0.01, "loss": 1.9706, "step": 47739 }, { "epoch": 4.897620024620435, "grad_norm": 0.03720017522573471, "learning_rate": 0.01, "loss": 1.9745, "step": 47742 }, { "epoch": 4.897927780057447, "grad_norm": 0.04412877559661865, "learning_rate": 0.01, "loss": 1.9969, "step": 47745 }, { "epoch": 4.898235535494461, "grad_norm": 0.03968506306409836, "learning_rate": 0.01, "loss": 1.9944, "step": 47748 }, { "epoch": 4.898543290931473, "grad_norm": 0.10775657743215561, "learning_rate": 0.01, "loss": 1.9709, "step": 47751 }, { "epoch": 4.898851046368486, "grad_norm": 0.05303318426012993, "learning_rate": 0.01, "loss": 1.9536, "step": 47754 }, { "epoch": 4.899158801805498, "grad_norm": 0.0775865837931633, "learning_rate": 0.01, "loss": 1.9916, "step": 47757 }, { "epoch": 4.899466557242511, "grad_norm": 0.10708294063806534, "learning_rate": 0.01, "loss": 1.972, "step": 47760 }, { "epoch": 4.899774312679524, "grad_norm": 0.05531914532184601, "learning_rate": 0.01, "loss": 2.0002, "step": 47763 }, { "epoch": 4.900082068116537, "grad_norm": 0.057492613792419434, "learning_rate": 0.01, "loss": 2.0103, "step": 47766 }, { "epoch": 4.9003898235535495, "grad_norm": 0.04654363915324211, "learning_rate": 0.01, "loss": 1.966, "step": 47769 }, { "epoch": 4.900697578990562, "grad_norm": 0.060792889446020126, "learning_rate": 0.01, "loss": 1.9836, "step": 47772 }, { "epoch": 4.901005334427575, "grad_norm": 0.053037889301776886, "learning_rate": 0.01, "loss": 2.0002, "step": 47775 }, { "epoch": 4.901313089864588, "grad_norm": 0.07746932655572891, "learning_rate": 0.01, "loss": 1.9738, "step": 47778 }, { "epoch": 4.901620845301601, "grad_norm": 0.07753589004278183, "learning_rate": 0.01, "loss": 2.0094, "step": 47781 }, { "epoch": 4.901928600738613, "grad_norm": 0.07140833884477615, "learning_rate": 0.01, "loss": 1.9908, "step": 47784 }, { "epoch": 4.902236356175626, "grad_norm": 0.12805962562561035, "learning_rate": 0.01, "loss": 1.984, "step": 47787 }, { "epoch": 4.902544111612638, "grad_norm": 0.10444001108407974, "learning_rate": 0.01, "loss": 1.9788, "step": 47790 }, { "epoch": 4.902851867049652, "grad_norm": 0.13313624262809753, "learning_rate": 0.01, "loss": 1.9913, "step": 47793 }, { "epoch": 4.903159622486664, "grad_norm": 0.0864720493555069, "learning_rate": 0.01, "loss": 2.0043, "step": 47796 }, { "epoch": 4.903467377923676, "grad_norm": 0.05329997465014458, "learning_rate": 0.01, "loss": 2.0022, "step": 47799 }, { "epoch": 4.903775133360689, "grad_norm": 0.053774427622556686, "learning_rate": 0.01, "loss": 1.9824, "step": 47802 }, { "epoch": 4.904082888797702, "grad_norm": 0.06760123372077942, "learning_rate": 0.01, "loss": 1.9593, "step": 47805 }, { "epoch": 4.904390644234715, "grad_norm": 0.06961317360401154, "learning_rate": 0.01, "loss": 1.9785, "step": 47808 }, { "epoch": 4.9046983996717275, "grad_norm": 0.05297344923019409, "learning_rate": 0.01, "loss": 1.9786, "step": 47811 }, { "epoch": 4.90500615510874, "grad_norm": 0.06930317729711533, "learning_rate": 0.01, "loss": 2.0076, "step": 47814 }, { "epoch": 4.905313910545753, "grad_norm": 0.039554011076688766, "learning_rate": 0.01, "loss": 1.9833, "step": 47817 }, { "epoch": 4.905621665982766, "grad_norm": 0.03862786293029785, "learning_rate": 0.01, "loss": 1.9911, "step": 47820 }, { "epoch": 4.905929421419779, "grad_norm": 0.03559514880180359, "learning_rate": 0.01, "loss": 1.9733, "step": 47823 }, { "epoch": 4.906237176856791, "grad_norm": 0.11802364140748978, "learning_rate": 0.01, "loss": 2.0073, "step": 47826 }, { "epoch": 4.906544932293804, "grad_norm": 0.049168869853019714, "learning_rate": 0.01, "loss": 1.9787, "step": 47829 }, { "epoch": 4.906852687730817, "grad_norm": 0.07636560499668121, "learning_rate": 0.01, "loss": 1.9821, "step": 47832 }, { "epoch": 4.907160443167829, "grad_norm": 0.08972848206758499, "learning_rate": 0.01, "loss": 1.9712, "step": 47835 }, { "epoch": 4.9074681986048425, "grad_norm": 0.06916697323322296, "learning_rate": 0.01, "loss": 1.9965, "step": 47838 }, { "epoch": 4.907775954041854, "grad_norm": 0.058684833347797394, "learning_rate": 0.01, "loss": 1.9866, "step": 47841 }, { "epoch": 4.908083709478867, "grad_norm": 0.045184120535850525, "learning_rate": 0.01, "loss": 1.9579, "step": 47844 }, { "epoch": 4.90839146491588, "grad_norm": 0.11755534261465073, "learning_rate": 0.01, "loss": 1.9775, "step": 47847 }, { "epoch": 4.908699220352893, "grad_norm": 0.07570980489253998, "learning_rate": 0.01, "loss": 1.9835, "step": 47850 }, { "epoch": 4.9090069757899055, "grad_norm": 0.050079140812158585, "learning_rate": 0.01, "loss": 1.9986, "step": 47853 }, { "epoch": 4.909314731226918, "grad_norm": 0.12019573897123337, "learning_rate": 0.01, "loss": 1.9825, "step": 47856 }, { "epoch": 4.909622486663931, "grad_norm": 0.10437455028295517, "learning_rate": 0.01, "loss": 1.9902, "step": 47859 }, { "epoch": 4.909930242100944, "grad_norm": 0.05818986892700195, "learning_rate": 0.01, "loss": 1.9871, "step": 47862 }, { "epoch": 4.910237997537957, "grad_norm": 0.04473674297332764, "learning_rate": 0.01, "loss": 1.9964, "step": 47865 }, { "epoch": 4.910545752974969, "grad_norm": 0.07120556384325027, "learning_rate": 0.01, "loss": 1.9877, "step": 47868 }, { "epoch": 4.910853508411982, "grad_norm": 0.0661742091178894, "learning_rate": 0.01, "loss": 1.9813, "step": 47871 }, { "epoch": 4.911161263848995, "grad_norm": 0.07108204811811447, "learning_rate": 0.01, "loss": 1.9847, "step": 47874 }, { "epoch": 4.911469019286008, "grad_norm": 0.09054633975028992, "learning_rate": 0.01, "loss": 2.0145, "step": 47877 }, { "epoch": 4.91177677472302, "grad_norm": 0.13030128180980682, "learning_rate": 0.01, "loss": 1.9726, "step": 47880 }, { "epoch": 4.912084530160033, "grad_norm": 0.15652155876159668, "learning_rate": 0.01, "loss": 2.0049, "step": 47883 }, { "epoch": 4.912392285597045, "grad_norm": 0.10536278784275055, "learning_rate": 0.01, "loss": 1.9871, "step": 47886 }, { "epoch": 4.912700041034058, "grad_norm": 0.0899362787604332, "learning_rate": 0.01, "loss": 1.9676, "step": 47889 }, { "epoch": 4.913007796471071, "grad_norm": 0.057359661906957626, "learning_rate": 0.01, "loss": 1.9633, "step": 47892 }, { "epoch": 4.913315551908084, "grad_norm": 0.04800909757614136, "learning_rate": 0.01, "loss": 1.994, "step": 47895 }, { "epoch": 4.913623307345096, "grad_norm": 0.04786820337176323, "learning_rate": 0.01, "loss": 2.002, "step": 47898 }, { "epoch": 4.913931062782109, "grad_norm": 0.04169714078307152, "learning_rate": 0.01, "loss": 1.9764, "step": 47901 }, { "epoch": 4.914238818219122, "grad_norm": 0.04943560063838959, "learning_rate": 0.01, "loss": 1.9579, "step": 47904 }, { "epoch": 4.914546573656135, "grad_norm": 0.06505610793828964, "learning_rate": 0.01, "loss": 2.0132, "step": 47907 }, { "epoch": 4.9148543290931475, "grad_norm": 0.057665206491947174, "learning_rate": 0.01, "loss": 1.9838, "step": 47910 }, { "epoch": 4.91516208453016, "grad_norm": 0.060052540153265, "learning_rate": 0.01, "loss": 1.989, "step": 47913 }, { "epoch": 4.915469839967173, "grad_norm": 0.062361933290958405, "learning_rate": 0.01, "loss": 1.9894, "step": 47916 }, { "epoch": 4.915777595404186, "grad_norm": 0.15210957825183868, "learning_rate": 0.01, "loss": 1.9865, "step": 47919 }, { "epoch": 4.916085350841199, "grad_norm": 0.05437038466334343, "learning_rate": 0.01, "loss": 1.9967, "step": 47922 }, { "epoch": 4.9163931062782105, "grad_norm": 0.07012643665075302, "learning_rate": 0.01, "loss": 1.9975, "step": 47925 }, { "epoch": 4.916700861715223, "grad_norm": 0.056515853852033615, "learning_rate": 0.01, "loss": 1.9873, "step": 47928 }, { "epoch": 4.917008617152236, "grad_norm": 0.13166914880275726, "learning_rate": 0.01, "loss": 1.9749, "step": 47931 }, { "epoch": 4.917316372589249, "grad_norm": 0.05462309718132019, "learning_rate": 0.01, "loss": 1.9831, "step": 47934 }, { "epoch": 4.917624128026262, "grad_norm": 0.0725654810667038, "learning_rate": 0.01, "loss": 1.9991, "step": 47937 }, { "epoch": 4.917931883463274, "grad_norm": 0.09769755601882935, "learning_rate": 0.01, "loss": 1.9826, "step": 47940 }, { "epoch": 4.918239638900287, "grad_norm": 0.10543838888406754, "learning_rate": 0.01, "loss": 2.015, "step": 47943 }, { "epoch": 4.9185473943373, "grad_norm": 0.08627963066101074, "learning_rate": 0.01, "loss": 1.9588, "step": 47946 }, { "epoch": 4.918855149774313, "grad_norm": 0.05162891000509262, "learning_rate": 0.01, "loss": 2.0049, "step": 47949 }, { "epoch": 4.9191629052113255, "grad_norm": 0.040748897939920425, "learning_rate": 0.01, "loss": 2.0034, "step": 47952 }, { "epoch": 4.919470660648338, "grad_norm": 0.042143408209085464, "learning_rate": 0.01, "loss": 1.9958, "step": 47955 }, { "epoch": 4.919778416085351, "grad_norm": 0.035993028432130814, "learning_rate": 0.01, "loss": 1.9651, "step": 47958 }, { "epoch": 4.920086171522364, "grad_norm": 0.13255073130130768, "learning_rate": 0.01, "loss": 1.9861, "step": 47961 }, { "epoch": 4.920393926959377, "grad_norm": 0.06527701765298843, "learning_rate": 0.01, "loss": 2.0059, "step": 47964 }, { "epoch": 4.920701682396389, "grad_norm": 0.08764603734016418, "learning_rate": 0.01, "loss": 1.9612, "step": 47967 }, { "epoch": 4.921009437833401, "grad_norm": 0.11476396769285202, "learning_rate": 0.01, "loss": 1.9796, "step": 47970 }, { "epoch": 4.921317193270414, "grad_norm": 0.11724267154932022, "learning_rate": 0.01, "loss": 1.9849, "step": 47973 }, { "epoch": 4.921624948707427, "grad_norm": 0.06371249258518219, "learning_rate": 0.01, "loss": 1.9996, "step": 47976 }, { "epoch": 4.92193270414444, "grad_norm": 0.05650569498538971, "learning_rate": 0.01, "loss": 1.9873, "step": 47979 }, { "epoch": 4.922240459581452, "grad_norm": 0.05101975426077843, "learning_rate": 0.01, "loss": 1.9909, "step": 47982 }, { "epoch": 4.922548215018465, "grad_norm": 0.06948293745517731, "learning_rate": 0.01, "loss": 1.9667, "step": 47985 }, { "epoch": 4.922855970455478, "grad_norm": 0.05776602029800415, "learning_rate": 0.01, "loss": 1.9918, "step": 47988 }, { "epoch": 4.923163725892491, "grad_norm": 0.03927835077047348, "learning_rate": 0.01, "loss": 1.9804, "step": 47991 }, { "epoch": 4.9234714813295035, "grad_norm": 0.07943009585142136, "learning_rate": 0.01, "loss": 1.9817, "step": 47994 }, { "epoch": 4.923779236766516, "grad_norm": 0.07352134585380554, "learning_rate": 0.01, "loss": 2.0015, "step": 47997 }, { "epoch": 4.924086992203529, "grad_norm": 0.07295040786266327, "learning_rate": 0.01, "loss": 1.9583, "step": 48000 }, { "epoch": 4.924394747640542, "grad_norm": 0.08415599912405014, "learning_rate": 0.01, "loss": 1.9878, "step": 48003 }, { "epoch": 4.924702503077555, "grad_norm": 0.0890984758734703, "learning_rate": 0.01, "loss": 1.9735, "step": 48006 }, { "epoch": 4.925010258514567, "grad_norm": 0.12055069953203201, "learning_rate": 0.01, "loss": 1.9916, "step": 48009 }, { "epoch": 4.92531801395158, "grad_norm": 0.0676504373550415, "learning_rate": 0.01, "loss": 1.9898, "step": 48012 }, { "epoch": 4.925625769388592, "grad_norm": 0.047612790018320084, "learning_rate": 0.01, "loss": 1.9818, "step": 48015 }, { "epoch": 4.925933524825605, "grad_norm": 0.06470039486885071, "learning_rate": 0.01, "loss": 1.9904, "step": 48018 }, { "epoch": 4.926241280262618, "grad_norm": 0.07510625571012497, "learning_rate": 0.01, "loss": 1.9749, "step": 48021 }, { "epoch": 4.92654903569963, "grad_norm": 0.07793190330266953, "learning_rate": 0.01, "loss": 1.9749, "step": 48024 }, { "epoch": 4.926856791136643, "grad_norm": 0.08655183017253876, "learning_rate": 0.01, "loss": 1.9609, "step": 48027 }, { "epoch": 4.927164546573656, "grad_norm": 0.07441940903663635, "learning_rate": 0.01, "loss": 1.9847, "step": 48030 }, { "epoch": 4.927472302010669, "grad_norm": 0.04783869534730911, "learning_rate": 0.01, "loss": 1.9981, "step": 48033 }, { "epoch": 4.9277800574476815, "grad_norm": 0.13127107918262482, "learning_rate": 0.01, "loss": 2.0117, "step": 48036 }, { "epoch": 4.928087812884694, "grad_norm": 0.05267849192023277, "learning_rate": 0.01, "loss": 1.9726, "step": 48039 }, { "epoch": 4.928395568321707, "grad_norm": 0.04276135936379433, "learning_rate": 0.01, "loss": 1.9768, "step": 48042 }, { "epoch": 4.92870332375872, "grad_norm": 0.04237223416566849, "learning_rate": 0.01, "loss": 1.989, "step": 48045 }, { "epoch": 4.929011079195733, "grad_norm": 0.052018001675605774, "learning_rate": 0.01, "loss": 1.9783, "step": 48048 }, { "epoch": 4.9293188346327454, "grad_norm": 0.05408487468957901, "learning_rate": 0.01, "loss": 1.9997, "step": 48051 }, { "epoch": 4.929626590069758, "grad_norm": 0.07300473004579544, "learning_rate": 0.01, "loss": 1.9986, "step": 48054 }, { "epoch": 4.929934345506771, "grad_norm": 0.08607280254364014, "learning_rate": 0.01, "loss": 1.9952, "step": 48057 }, { "epoch": 4.930242100943783, "grad_norm": 0.061927784234285355, "learning_rate": 0.01, "loss": 1.9687, "step": 48060 }, { "epoch": 4.930549856380796, "grad_norm": 0.08371556550264359, "learning_rate": 0.01, "loss": 1.9692, "step": 48063 }, { "epoch": 4.9308576118178085, "grad_norm": 0.03664041683077812, "learning_rate": 0.01, "loss": 1.997, "step": 48066 }, { "epoch": 4.931165367254821, "grad_norm": 0.06964648514986038, "learning_rate": 0.01, "loss": 1.9893, "step": 48069 }, { "epoch": 4.931473122691834, "grad_norm": 0.07257051765918732, "learning_rate": 0.01, "loss": 1.9869, "step": 48072 }, { "epoch": 4.931780878128847, "grad_norm": 0.06781255453824997, "learning_rate": 0.01, "loss": 1.9678, "step": 48075 }, { "epoch": 4.93208863356586, "grad_norm": 0.04258018732070923, "learning_rate": 0.01, "loss": 1.992, "step": 48078 }, { "epoch": 4.932396389002872, "grad_norm": 0.05434480682015419, "learning_rate": 0.01, "loss": 1.9782, "step": 48081 }, { "epoch": 4.932704144439885, "grad_norm": 0.03577110916376114, "learning_rate": 0.01, "loss": 1.9817, "step": 48084 }, { "epoch": 4.933011899876898, "grad_norm": 0.11260990798473358, "learning_rate": 0.01, "loss": 2.0087, "step": 48087 }, { "epoch": 4.933319655313911, "grad_norm": 0.09070918709039688, "learning_rate": 0.01, "loss": 1.9969, "step": 48090 }, { "epoch": 4.9336274107509235, "grad_norm": 0.06896468997001648, "learning_rate": 0.01, "loss": 1.9965, "step": 48093 }, { "epoch": 4.933935166187936, "grad_norm": 0.050933387130498886, "learning_rate": 0.01, "loss": 1.9885, "step": 48096 }, { "epoch": 4.934242921624949, "grad_norm": 0.04397554323077202, "learning_rate": 0.01, "loss": 1.9988, "step": 48099 }, { "epoch": 4.934550677061962, "grad_norm": 0.03253142163157463, "learning_rate": 0.01, "loss": 1.9974, "step": 48102 }, { "epoch": 4.934858432498974, "grad_norm": 0.051716264337301254, "learning_rate": 0.01, "loss": 1.9985, "step": 48105 }, { "epoch": 4.9351661879359865, "grad_norm": 0.04921911656856537, "learning_rate": 0.01, "loss": 1.988, "step": 48108 }, { "epoch": 4.935473943372999, "grad_norm": 0.05059951916337013, "learning_rate": 0.01, "loss": 1.989, "step": 48111 }, { "epoch": 4.935781698810012, "grad_norm": 0.11451517045497894, "learning_rate": 0.01, "loss": 1.989, "step": 48114 }, { "epoch": 4.936089454247025, "grad_norm": 0.06287554651498795, "learning_rate": 0.01, "loss": 1.9658, "step": 48117 }, { "epoch": 4.936397209684038, "grad_norm": 0.11319955438375473, "learning_rate": 0.01, "loss": 1.9907, "step": 48120 }, { "epoch": 4.93670496512105, "grad_norm": 0.04149606078863144, "learning_rate": 0.01, "loss": 1.9655, "step": 48123 }, { "epoch": 4.937012720558063, "grad_norm": 0.044562436640262604, "learning_rate": 0.01, "loss": 1.9738, "step": 48126 }, { "epoch": 4.937320475995076, "grad_norm": 0.04073583707213402, "learning_rate": 0.01, "loss": 1.9959, "step": 48129 }, { "epoch": 4.937628231432089, "grad_norm": 0.05489436164498329, "learning_rate": 0.01, "loss": 1.9708, "step": 48132 }, { "epoch": 4.9379359868691015, "grad_norm": 0.07751783728599548, "learning_rate": 0.01, "loss": 1.9892, "step": 48135 }, { "epoch": 4.938243742306114, "grad_norm": 0.06630382686853409, "learning_rate": 0.01, "loss": 1.9795, "step": 48138 }, { "epoch": 4.938551497743127, "grad_norm": 0.07446899265050888, "learning_rate": 0.01, "loss": 1.9807, "step": 48141 }, { "epoch": 4.93885925318014, "grad_norm": 0.0776534453034401, "learning_rate": 0.01, "loss": 1.9972, "step": 48144 }, { "epoch": 4.939167008617153, "grad_norm": 0.07835227996110916, "learning_rate": 0.01, "loss": 2.0016, "step": 48147 }, { "epoch": 4.9394747640541645, "grad_norm": 0.0798632800579071, "learning_rate": 0.01, "loss": 1.9653, "step": 48150 }, { "epoch": 4.939782519491177, "grad_norm": 0.044298071414232254, "learning_rate": 0.01, "loss": 2.0051, "step": 48153 }, { "epoch": 4.94009027492819, "grad_norm": 0.10383112728595734, "learning_rate": 0.01, "loss": 1.9804, "step": 48156 }, { "epoch": 4.940398030365203, "grad_norm": 0.12793728709220886, "learning_rate": 0.01, "loss": 1.9772, "step": 48159 }, { "epoch": 4.940705785802216, "grad_norm": 0.038207195699214935, "learning_rate": 0.01, "loss": 1.9651, "step": 48162 }, { "epoch": 4.941013541239228, "grad_norm": 0.0840001255273819, "learning_rate": 0.01, "loss": 1.977, "step": 48165 }, { "epoch": 4.941321296676241, "grad_norm": 0.11788827925920486, "learning_rate": 0.01, "loss": 1.9909, "step": 48168 }, { "epoch": 4.941629052113254, "grad_norm": 0.05462497100234032, "learning_rate": 0.01, "loss": 1.9857, "step": 48171 }, { "epoch": 4.941936807550267, "grad_norm": 0.0785188302397728, "learning_rate": 0.01, "loss": 1.9887, "step": 48174 }, { "epoch": 4.9422445629872795, "grad_norm": 0.09669160097837448, "learning_rate": 0.01, "loss": 1.9811, "step": 48177 }, { "epoch": 4.942552318424292, "grad_norm": 0.0880751982331276, "learning_rate": 0.01, "loss": 1.9953, "step": 48180 }, { "epoch": 4.942860073861305, "grad_norm": 0.04356632009148598, "learning_rate": 0.01, "loss": 1.9849, "step": 48183 }, { "epoch": 4.943167829298318, "grad_norm": 0.055611494928598404, "learning_rate": 0.01, "loss": 1.9936, "step": 48186 }, { "epoch": 4.943475584735331, "grad_norm": 0.05191848427057266, "learning_rate": 0.01, "loss": 1.9677, "step": 48189 }, { "epoch": 4.943783340172343, "grad_norm": 0.06415262818336487, "learning_rate": 0.01, "loss": 1.9893, "step": 48192 }, { "epoch": 4.944091095609355, "grad_norm": 0.04284198582172394, "learning_rate": 0.01, "loss": 1.999, "step": 48195 }, { "epoch": 4.944398851046368, "grad_norm": 0.03310967609286308, "learning_rate": 0.01, "loss": 1.9865, "step": 48198 }, { "epoch": 4.944706606483381, "grad_norm": 0.11449988186359406, "learning_rate": 0.01, "loss": 1.9771, "step": 48201 }, { "epoch": 4.945014361920394, "grad_norm": 0.0641254335641861, "learning_rate": 0.01, "loss": 1.9854, "step": 48204 }, { "epoch": 4.9453221173574065, "grad_norm": 0.08665329962968826, "learning_rate": 0.01, "loss": 1.9855, "step": 48207 }, { "epoch": 4.945629872794419, "grad_norm": 0.03855408355593681, "learning_rate": 0.01, "loss": 1.9997, "step": 48210 }, { "epoch": 4.945937628231432, "grad_norm": 0.03156152740120888, "learning_rate": 0.01, "loss": 1.9975, "step": 48213 }, { "epoch": 4.946245383668445, "grad_norm": 0.052971091121435165, "learning_rate": 0.01, "loss": 1.9712, "step": 48216 }, { "epoch": 4.946553139105458, "grad_norm": 0.04167775437235832, "learning_rate": 0.01, "loss": 1.9749, "step": 48219 }, { "epoch": 4.94686089454247, "grad_norm": 0.051760535687208176, "learning_rate": 0.01, "loss": 1.9695, "step": 48222 }, { "epoch": 4.947168649979483, "grad_norm": 0.0969996377825737, "learning_rate": 0.01, "loss": 1.9915, "step": 48225 }, { "epoch": 4.947476405416496, "grad_norm": 0.05064645782113075, "learning_rate": 0.01, "loss": 1.967, "step": 48228 }, { "epoch": 4.947784160853509, "grad_norm": 0.04742693901062012, "learning_rate": 0.01, "loss": 1.9663, "step": 48231 }, { "epoch": 4.9480919162905215, "grad_norm": 0.03507422283291817, "learning_rate": 0.01, "loss": 1.9858, "step": 48234 }, { "epoch": 4.948399671727534, "grad_norm": 0.03717927634716034, "learning_rate": 0.01, "loss": 1.9756, "step": 48237 }, { "epoch": 4.948707427164546, "grad_norm": 0.11404886096715927, "learning_rate": 0.01, "loss": 1.9924, "step": 48240 }, { "epoch": 4.949015182601559, "grad_norm": 0.05842389166355133, "learning_rate": 0.01, "loss": 1.9805, "step": 48243 }, { "epoch": 4.949322938038572, "grad_norm": 0.0430951751768589, "learning_rate": 0.01, "loss": 1.9993, "step": 48246 }, { "epoch": 4.9496306934755845, "grad_norm": 0.053793564438819885, "learning_rate": 0.01, "loss": 1.9853, "step": 48249 }, { "epoch": 4.949938448912597, "grad_norm": 0.04643286019563675, "learning_rate": 0.01, "loss": 2.0084, "step": 48252 }, { "epoch": 4.95024620434961, "grad_norm": 0.0575956366956234, "learning_rate": 0.01, "loss": 1.9927, "step": 48255 }, { "epoch": 4.950553959786623, "grad_norm": 0.05439090356230736, "learning_rate": 0.01, "loss": 1.978, "step": 48258 }, { "epoch": 4.950861715223636, "grad_norm": 0.05502607673406601, "learning_rate": 0.01, "loss": 1.9743, "step": 48261 }, { "epoch": 4.951169470660648, "grad_norm": 0.07984888553619385, "learning_rate": 0.01, "loss": 1.979, "step": 48264 }, { "epoch": 4.951477226097661, "grad_norm": 0.041320640593767166, "learning_rate": 0.01, "loss": 1.9822, "step": 48267 }, { "epoch": 4.951784981534674, "grad_norm": 0.12548021972179413, "learning_rate": 0.01, "loss": 1.9845, "step": 48270 }, { "epoch": 4.952092736971687, "grad_norm": 0.035638876259326935, "learning_rate": 0.01, "loss": 1.989, "step": 48273 }, { "epoch": 4.9524004924086995, "grad_norm": 0.13796448707580566, "learning_rate": 0.01, "loss": 2.0005, "step": 48276 }, { "epoch": 4.952708247845712, "grad_norm": 0.08978889882564545, "learning_rate": 0.01, "loss": 1.9814, "step": 48279 }, { "epoch": 4.953016003282725, "grad_norm": 0.0677703395485878, "learning_rate": 0.01, "loss": 1.9835, "step": 48282 }, { "epoch": 4.953323758719737, "grad_norm": 0.04120895639061928, "learning_rate": 0.01, "loss": 2.0018, "step": 48285 }, { "epoch": 4.95363151415675, "grad_norm": 0.043558813631534576, "learning_rate": 0.01, "loss": 1.9684, "step": 48288 }, { "epoch": 4.9539392695937625, "grad_norm": 0.03488008677959442, "learning_rate": 0.01, "loss": 1.9922, "step": 48291 }, { "epoch": 4.954247025030775, "grad_norm": 0.06290942430496216, "learning_rate": 0.01, "loss": 1.9924, "step": 48294 }, { "epoch": 4.954554780467788, "grad_norm": 0.05662504583597183, "learning_rate": 0.01, "loss": 1.9819, "step": 48297 }, { "epoch": 4.954862535904801, "grad_norm": 0.04810362681746483, "learning_rate": 0.01, "loss": 1.9855, "step": 48300 }, { "epoch": 4.955170291341814, "grad_norm": 0.06158094108104706, "learning_rate": 0.01, "loss": 1.984, "step": 48303 }, { "epoch": 4.955478046778826, "grad_norm": 0.1016242504119873, "learning_rate": 0.01, "loss": 1.9976, "step": 48306 }, { "epoch": 4.955785802215839, "grad_norm": 0.05129433050751686, "learning_rate": 0.01, "loss": 1.9674, "step": 48309 }, { "epoch": 4.956093557652852, "grad_norm": 0.09010904282331467, "learning_rate": 0.01, "loss": 1.9844, "step": 48312 }, { "epoch": 4.956401313089865, "grad_norm": 0.09995526820421219, "learning_rate": 0.01, "loss": 1.9706, "step": 48315 }, { "epoch": 4.9567090685268775, "grad_norm": 0.0642913207411766, "learning_rate": 0.01, "loss": 1.9573, "step": 48318 }, { "epoch": 4.95701682396389, "grad_norm": 0.03862082585692406, "learning_rate": 0.01, "loss": 2.0143, "step": 48321 }, { "epoch": 4.957324579400903, "grad_norm": 0.11760932207107544, "learning_rate": 0.01, "loss": 2.003, "step": 48324 }, { "epoch": 4.957632334837916, "grad_norm": 0.09624608606100082, "learning_rate": 0.01, "loss": 1.9876, "step": 48327 }, { "epoch": 4.957940090274928, "grad_norm": 0.06814394891262054, "learning_rate": 0.01, "loss": 1.9711, "step": 48330 }, { "epoch": 4.9582478457119405, "grad_norm": 0.09379391372203827, "learning_rate": 0.01, "loss": 2.0017, "step": 48333 }, { "epoch": 4.958555601148953, "grad_norm": 0.04310177266597748, "learning_rate": 0.01, "loss": 1.9869, "step": 48336 }, { "epoch": 4.958863356585966, "grad_norm": 0.048953697085380554, "learning_rate": 0.01, "loss": 1.9959, "step": 48339 }, { "epoch": 4.959171112022979, "grad_norm": 0.03562283515930176, "learning_rate": 0.01, "loss": 1.9993, "step": 48342 }, { "epoch": 4.959478867459992, "grad_norm": 0.040618475526571274, "learning_rate": 0.01, "loss": 1.9973, "step": 48345 }, { "epoch": 4.9597866228970044, "grad_norm": 0.11639663577079773, "learning_rate": 0.01, "loss": 1.979, "step": 48348 }, { "epoch": 4.960094378334017, "grad_norm": 0.07946206629276276, "learning_rate": 0.01, "loss": 2.0178, "step": 48351 }, { "epoch": 4.96040213377103, "grad_norm": 0.05358874425292015, "learning_rate": 0.01, "loss": 1.9862, "step": 48354 }, { "epoch": 4.960709889208043, "grad_norm": 0.07157052308320999, "learning_rate": 0.01, "loss": 1.9989, "step": 48357 }, { "epoch": 4.961017644645056, "grad_norm": 0.038184091448783875, "learning_rate": 0.01, "loss": 1.9805, "step": 48360 }, { "epoch": 4.961325400082068, "grad_norm": 0.04140111058950424, "learning_rate": 0.01, "loss": 1.9832, "step": 48363 }, { "epoch": 4.961633155519081, "grad_norm": 0.08905436098575592, "learning_rate": 0.01, "loss": 2.0117, "step": 48366 }, { "epoch": 4.961940910956094, "grad_norm": 0.07200294733047485, "learning_rate": 0.01, "loss": 1.9858, "step": 48369 }, { "epoch": 4.962248666393107, "grad_norm": 0.0786944180727005, "learning_rate": 0.01, "loss": 2.0012, "step": 48372 }, { "epoch": 4.962556421830119, "grad_norm": 0.08718118071556091, "learning_rate": 0.01, "loss": 2.0058, "step": 48375 }, { "epoch": 4.962864177267131, "grad_norm": 0.10729999840259552, "learning_rate": 0.01, "loss": 1.9669, "step": 48378 }, { "epoch": 4.963171932704144, "grad_norm": 0.05224734544754028, "learning_rate": 0.01, "loss": 2.0007, "step": 48381 }, { "epoch": 4.963479688141157, "grad_norm": 0.10347335785627365, "learning_rate": 0.01, "loss": 1.9887, "step": 48384 }, { "epoch": 4.96378744357817, "grad_norm": 0.09103485941886902, "learning_rate": 0.01, "loss": 1.9813, "step": 48387 }, { "epoch": 4.9640951990151825, "grad_norm": 0.05988696217536926, "learning_rate": 0.01, "loss": 1.9608, "step": 48390 }, { "epoch": 4.964402954452195, "grad_norm": 0.05333305522799492, "learning_rate": 0.01, "loss": 1.9836, "step": 48393 }, { "epoch": 4.964710709889208, "grad_norm": 0.039692364633083344, "learning_rate": 0.01, "loss": 1.9938, "step": 48396 }, { "epoch": 4.965018465326221, "grad_norm": 0.048610180616378784, "learning_rate": 0.01, "loss": 1.9839, "step": 48399 }, { "epoch": 4.965326220763234, "grad_norm": 0.06438957899808884, "learning_rate": 0.01, "loss": 1.9844, "step": 48402 }, { "epoch": 4.965633976200246, "grad_norm": 0.06306184828281403, "learning_rate": 0.01, "loss": 2.0163, "step": 48405 }, { "epoch": 4.965941731637259, "grad_norm": 0.0924021527171135, "learning_rate": 0.01, "loss": 1.9897, "step": 48408 }, { "epoch": 4.966249487074272, "grad_norm": 0.06868547946214676, "learning_rate": 0.01, "loss": 2.0068, "step": 48411 }, { "epoch": 4.966557242511285, "grad_norm": 0.04776471480727196, "learning_rate": 0.01, "loss": 1.9846, "step": 48414 }, { "epoch": 4.9668649979482975, "grad_norm": 0.0417228639125824, "learning_rate": 0.01, "loss": 1.9844, "step": 48417 }, { "epoch": 4.967172753385309, "grad_norm": 0.03331043943762779, "learning_rate": 0.01, "loss": 1.9844, "step": 48420 }, { "epoch": 4.967480508822322, "grad_norm": 0.07828322052955627, "learning_rate": 0.01, "loss": 2.0043, "step": 48423 }, { "epoch": 4.967788264259335, "grad_norm": 0.11830344796180725, "learning_rate": 0.01, "loss": 1.9765, "step": 48426 }, { "epoch": 4.968096019696348, "grad_norm": 0.13753825426101685, "learning_rate": 0.01, "loss": 1.9831, "step": 48429 }, { "epoch": 4.9684037751333605, "grad_norm": 0.10867384821176529, "learning_rate": 0.01, "loss": 1.9948, "step": 48432 }, { "epoch": 4.968711530570373, "grad_norm": 0.14065410196781158, "learning_rate": 0.01, "loss": 1.9892, "step": 48435 }, { "epoch": 4.969019286007386, "grad_norm": 0.1064314916729927, "learning_rate": 0.01, "loss": 1.963, "step": 48438 }, { "epoch": 4.969327041444399, "grad_norm": 0.08995570242404938, "learning_rate": 0.01, "loss": 1.9843, "step": 48441 }, { "epoch": 4.969634796881412, "grad_norm": 0.05942520126700401, "learning_rate": 0.01, "loss": 1.961, "step": 48444 }, { "epoch": 4.969942552318424, "grad_norm": 0.03557314723730087, "learning_rate": 0.01, "loss": 1.9917, "step": 48447 }, { "epoch": 4.970250307755437, "grad_norm": 0.058727700263261795, "learning_rate": 0.01, "loss": 1.9581, "step": 48450 }, { "epoch": 4.97055806319245, "grad_norm": 0.07610124349594116, "learning_rate": 0.01, "loss": 2.0181, "step": 48453 }, { "epoch": 4.970865818629463, "grad_norm": 0.062314316630363464, "learning_rate": 0.01, "loss": 2.0007, "step": 48456 }, { "epoch": 4.9711735740664755, "grad_norm": 0.060933034867048264, "learning_rate": 0.01, "loss": 1.959, "step": 48459 }, { "epoch": 4.971481329503488, "grad_norm": 0.11259069293737411, "learning_rate": 0.01, "loss": 1.9831, "step": 48462 }, { "epoch": 4.9717890849405, "grad_norm": 0.039150118827819824, "learning_rate": 0.01, "loss": 1.9762, "step": 48465 }, { "epoch": 4.972096840377513, "grad_norm": 0.06994688510894775, "learning_rate": 0.01, "loss": 2.0186, "step": 48468 }, { "epoch": 4.972404595814526, "grad_norm": 0.08845566213130951, "learning_rate": 0.01, "loss": 1.9827, "step": 48471 }, { "epoch": 4.9727123512515385, "grad_norm": 0.0835181474685669, "learning_rate": 0.01, "loss": 1.9734, "step": 48474 }, { "epoch": 4.973020106688551, "grad_norm": 0.059876278042793274, "learning_rate": 0.01, "loss": 1.9928, "step": 48477 }, { "epoch": 4.973327862125564, "grad_norm": 0.08387959748506546, "learning_rate": 0.01, "loss": 1.9581, "step": 48480 }, { "epoch": 4.973635617562577, "grad_norm": 0.04472680762410164, "learning_rate": 0.01, "loss": 2.0113, "step": 48483 }, { "epoch": 4.97394337299959, "grad_norm": 0.04980779439210892, "learning_rate": 0.01, "loss": 2.0031, "step": 48486 }, { "epoch": 4.974251128436602, "grad_norm": 0.05865331366658211, "learning_rate": 0.01, "loss": 1.9721, "step": 48489 }, { "epoch": 4.974558883873615, "grad_norm": 0.07731412351131439, "learning_rate": 0.01, "loss": 1.9838, "step": 48492 }, { "epoch": 4.974866639310628, "grad_norm": 0.0759957805275917, "learning_rate": 0.01, "loss": 1.9765, "step": 48495 }, { "epoch": 4.975174394747641, "grad_norm": 0.06392304599285126, "learning_rate": 0.01, "loss": 2.0035, "step": 48498 }, { "epoch": 4.975482150184654, "grad_norm": 0.04608240723609924, "learning_rate": 0.01, "loss": 1.9812, "step": 48501 }, { "epoch": 4.975789905621666, "grad_norm": 0.04515860974788666, "learning_rate": 0.01, "loss": 1.965, "step": 48504 }, { "epoch": 4.976097661058679, "grad_norm": 0.049364540725946426, "learning_rate": 0.01, "loss": 2.0013, "step": 48507 }, { "epoch": 4.976405416495691, "grad_norm": 0.05034893751144409, "learning_rate": 0.01, "loss": 2.0042, "step": 48510 }, { "epoch": 4.976713171932704, "grad_norm": 0.05965251848101616, "learning_rate": 0.01, "loss": 1.9829, "step": 48513 }, { "epoch": 4.977020927369717, "grad_norm": 0.03976500406861305, "learning_rate": 0.01, "loss": 1.9755, "step": 48516 }, { "epoch": 4.977328682806729, "grad_norm": 0.0873221829533577, "learning_rate": 0.01, "loss": 1.9835, "step": 48519 }, { "epoch": 4.977636438243742, "grad_norm": 0.10230670869350433, "learning_rate": 0.01, "loss": 1.991, "step": 48522 }, { "epoch": 4.977944193680755, "grad_norm": 0.1398303061723709, "learning_rate": 0.01, "loss": 2.0051, "step": 48525 }, { "epoch": 4.978251949117768, "grad_norm": 0.047815751284360886, "learning_rate": 0.01, "loss": 1.9831, "step": 48528 }, { "epoch": 4.9785597045547805, "grad_norm": 0.05280955508351326, "learning_rate": 0.01, "loss": 2.0061, "step": 48531 }, { "epoch": 4.978867459991793, "grad_norm": 0.05164310708642006, "learning_rate": 0.01, "loss": 1.9943, "step": 48534 }, { "epoch": 4.979175215428806, "grad_norm": 0.04212404415011406, "learning_rate": 0.01, "loss": 1.962, "step": 48537 }, { "epoch": 4.979482970865819, "grad_norm": 0.03191149979829788, "learning_rate": 0.01, "loss": 1.9907, "step": 48540 }, { "epoch": 4.979790726302832, "grad_norm": 0.09330854564905167, "learning_rate": 0.01, "loss": 1.9564, "step": 48543 }, { "epoch": 4.980098481739844, "grad_norm": 0.03602724149823189, "learning_rate": 0.01, "loss": 2.0041, "step": 48546 }, { "epoch": 4.980406237176857, "grad_norm": 0.1307518631219864, "learning_rate": 0.01, "loss": 1.987, "step": 48549 }, { "epoch": 4.98071399261387, "grad_norm": 0.17725148797035217, "learning_rate": 0.01, "loss": 1.9892, "step": 48552 }, { "epoch": 4.981021748050882, "grad_norm": 0.13484884798526764, "learning_rate": 0.01, "loss": 1.9553, "step": 48555 }, { "epoch": 4.981329503487895, "grad_norm": 0.15105341374874115, "learning_rate": 0.01, "loss": 1.9821, "step": 48558 }, { "epoch": 4.981637258924907, "grad_norm": 0.07108210027217865, "learning_rate": 0.01, "loss": 1.9704, "step": 48561 }, { "epoch": 4.98194501436192, "grad_norm": 0.04044497013092041, "learning_rate": 0.01, "loss": 2.0126, "step": 48564 }, { "epoch": 4.982252769798933, "grad_norm": 0.03189781680703163, "learning_rate": 0.01, "loss": 1.9908, "step": 48567 }, { "epoch": 4.982560525235946, "grad_norm": 0.12914758920669556, "learning_rate": 0.01, "loss": 2.0168, "step": 48570 }, { "epoch": 4.9828682806729585, "grad_norm": 0.09454645961523056, "learning_rate": 0.01, "loss": 1.966, "step": 48573 }, { "epoch": 4.983176036109971, "grad_norm": 0.09286819398403168, "learning_rate": 0.01, "loss": 2.0191, "step": 48576 }, { "epoch": 4.983483791546984, "grad_norm": 0.05915534123778343, "learning_rate": 0.01, "loss": 1.9644, "step": 48579 }, { "epoch": 4.983791546983997, "grad_norm": 0.09016136080026627, "learning_rate": 0.01, "loss": 1.9874, "step": 48582 }, { "epoch": 4.98409930242101, "grad_norm": 0.06749111413955688, "learning_rate": 0.01, "loss": 1.9842, "step": 48585 }, { "epoch": 4.984407057858022, "grad_norm": 0.05699833482503891, "learning_rate": 0.01, "loss": 1.9644, "step": 48588 }, { "epoch": 4.984714813295035, "grad_norm": 0.06643623113632202, "learning_rate": 0.01, "loss": 1.9956, "step": 48591 }, { "epoch": 4.985022568732048, "grad_norm": 0.05193551629781723, "learning_rate": 0.01, "loss": 1.9854, "step": 48594 }, { "epoch": 4.985330324169061, "grad_norm": 0.03996053710579872, "learning_rate": 0.01, "loss": 2.013, "step": 48597 }, { "epoch": 4.985638079606073, "grad_norm": 0.09908989816904068, "learning_rate": 0.01, "loss": 1.9584, "step": 48600 }, { "epoch": 4.985945835043085, "grad_norm": 0.09618446230888367, "learning_rate": 0.01, "loss": 1.9927, "step": 48603 }, { "epoch": 4.986253590480098, "grad_norm": 0.04452410712838173, "learning_rate": 0.01, "loss": 1.9993, "step": 48606 }, { "epoch": 4.986561345917111, "grad_norm": 0.08481396734714508, "learning_rate": 0.01, "loss": 1.9919, "step": 48609 }, { "epoch": 4.986869101354124, "grad_norm": 0.07361941784620285, "learning_rate": 0.01, "loss": 2.0121, "step": 48612 }, { "epoch": 4.9871768567911365, "grad_norm": 0.04092632979154587, "learning_rate": 0.01, "loss": 1.9846, "step": 48615 }, { "epoch": 4.987484612228149, "grad_norm": 0.05964810773730278, "learning_rate": 0.01, "loss": 1.9779, "step": 48618 }, { "epoch": 4.987792367665162, "grad_norm": 0.04937126114964485, "learning_rate": 0.01, "loss": 1.9797, "step": 48621 }, { "epoch": 4.988100123102175, "grad_norm": 0.061311025172472, "learning_rate": 0.01, "loss": 1.9829, "step": 48624 }, { "epoch": 4.988407878539188, "grad_norm": 0.11813945323228836, "learning_rate": 0.01, "loss": 1.9857, "step": 48627 }, { "epoch": 4.9887156339762, "grad_norm": 0.13528530299663544, "learning_rate": 0.01, "loss": 1.9662, "step": 48630 }, { "epoch": 4.989023389413213, "grad_norm": 0.13620121777057648, "learning_rate": 0.01, "loss": 1.9745, "step": 48633 }, { "epoch": 4.989331144850226, "grad_norm": 0.050104204565286636, "learning_rate": 0.01, "loss": 1.9976, "step": 48636 }, { "epoch": 4.989638900287239, "grad_norm": 0.04701809585094452, "learning_rate": 0.01, "loss": 2.0059, "step": 48639 }, { "epoch": 4.989946655724252, "grad_norm": 0.05600928142666817, "learning_rate": 0.01, "loss": 1.9777, "step": 48642 }, { "epoch": 4.9902544111612634, "grad_norm": 0.03713737428188324, "learning_rate": 0.01, "loss": 1.9688, "step": 48645 }, { "epoch": 4.990562166598276, "grad_norm": 0.10586056858301163, "learning_rate": 0.01, "loss": 2.023, "step": 48648 }, { "epoch": 4.990869922035289, "grad_norm": 0.07154802978038788, "learning_rate": 0.01, "loss": 1.9858, "step": 48651 }, { "epoch": 4.991177677472302, "grad_norm": 0.046645209193229675, "learning_rate": 0.01, "loss": 2.0023, "step": 48654 }, { "epoch": 4.991485432909315, "grad_norm": 0.10580138862133026, "learning_rate": 0.01, "loss": 1.9892, "step": 48657 }, { "epoch": 4.991793188346327, "grad_norm": 0.05583944171667099, "learning_rate": 0.01, "loss": 1.9666, "step": 48660 }, { "epoch": 4.99210094378334, "grad_norm": 0.04817153140902519, "learning_rate": 0.01, "loss": 2.0042, "step": 48663 }, { "epoch": 4.992408699220353, "grad_norm": 0.1129242405295372, "learning_rate": 0.01, "loss": 1.9761, "step": 48666 }, { "epoch": 4.992716454657366, "grad_norm": 0.09947514533996582, "learning_rate": 0.01, "loss": 1.9986, "step": 48669 }, { "epoch": 4.9930242100943785, "grad_norm": 0.07797490060329437, "learning_rate": 0.01, "loss": 1.9965, "step": 48672 }, { "epoch": 4.993331965531391, "grad_norm": 0.04402044415473938, "learning_rate": 0.01, "loss": 1.9888, "step": 48675 }, { "epoch": 4.993639720968404, "grad_norm": 0.05189018324017525, "learning_rate": 0.01, "loss": 1.9704, "step": 48678 }, { "epoch": 4.993947476405417, "grad_norm": 0.04376624524593353, "learning_rate": 0.01, "loss": 1.9883, "step": 48681 }, { "epoch": 4.99425523184243, "grad_norm": 0.06388211250305176, "learning_rate": 0.01, "loss": 1.9648, "step": 48684 }, { "epoch": 4.994562987279442, "grad_norm": 0.05990754812955856, "learning_rate": 0.01, "loss": 1.9796, "step": 48687 }, { "epoch": 4.994870742716454, "grad_norm": 0.06319117546081543, "learning_rate": 0.01, "loss": 1.9895, "step": 48690 }, { "epoch": 4.995178498153467, "grad_norm": 0.06176729500293732, "learning_rate": 0.01, "loss": 2.0171, "step": 48693 }, { "epoch": 4.99548625359048, "grad_norm": 0.06712348759174347, "learning_rate": 0.01, "loss": 1.9993, "step": 48696 }, { "epoch": 4.995794009027493, "grad_norm": 0.046728748828172684, "learning_rate": 0.01, "loss": 2.0015, "step": 48699 }, { "epoch": 4.996101764464505, "grad_norm": 0.04008499160408974, "learning_rate": 0.01, "loss": 2.0052, "step": 48702 }, { "epoch": 4.996409519901518, "grad_norm": 0.04931486025452614, "learning_rate": 0.01, "loss": 1.9815, "step": 48705 }, { "epoch": 4.996717275338531, "grad_norm": 0.09341800212860107, "learning_rate": 0.01, "loss": 1.9616, "step": 48708 }, { "epoch": 4.997025030775544, "grad_norm": 0.17475301027297974, "learning_rate": 0.01, "loss": 2.0156, "step": 48711 }, { "epoch": 4.9973327862125565, "grad_norm": 0.10119811445474625, "learning_rate": 0.01, "loss": 1.9786, "step": 48714 }, { "epoch": 4.997640541649569, "grad_norm": 0.04519075155258179, "learning_rate": 0.01, "loss": 2.0038, "step": 48717 }, { "epoch": 4.997948297086582, "grad_norm": 0.04394150897860527, "learning_rate": 0.01, "loss": 1.9691, "step": 48720 }, { "epoch": 4.998256052523595, "grad_norm": 0.03471103683114052, "learning_rate": 0.01, "loss": 1.9795, "step": 48723 }, { "epoch": 4.998563807960608, "grad_norm": 0.06422135978937149, "learning_rate": 0.01, "loss": 1.9652, "step": 48726 }, { "epoch": 4.99887156339762, "grad_norm": 0.10134468972682953, "learning_rate": 0.01, "loss": 2.0216, "step": 48729 }, { "epoch": 4.999179318834633, "grad_norm": 0.07451833784580231, "learning_rate": 0.01, "loss": 1.9897, "step": 48732 }, { "epoch": 4.999487074271645, "grad_norm": 0.12009834498167038, "learning_rate": 0.01, "loss": 1.9786, "step": 48735 }, { "epoch": 4.999794829708658, "grad_norm": 0.18911142647266388, "learning_rate": 0.01, "loss": 1.9925, "step": 48738 }, { "epoch": 5.005751258087707, "grad_norm": 0.11327698826789856, "learning_rate": 0.01, "loss": 2.0213, "step": 48741 }, { "epoch": 5.006059361199548, "grad_norm": 0.05276428908109665, "learning_rate": 0.01, "loss": 2.0193, "step": 48744 }, { "epoch": 5.006367464311389, "grad_norm": 0.04349682852625847, "learning_rate": 0.01, "loss": 2.0173, "step": 48747 }, { "epoch": 5.006675567423231, "grad_norm": 0.043818019330501556, "learning_rate": 0.01, "loss": 2.0334, "step": 48750 }, { "epoch": 5.006983670535073, "grad_norm": 0.04884595423936844, "learning_rate": 0.01, "loss": 2.0128, "step": 48753 }, { "epoch": 5.007291773646914, "grad_norm": 0.04434294253587723, "learning_rate": 0.01, "loss": 1.9983, "step": 48756 }, { "epoch": 5.007599876758755, "grad_norm": 0.07619164884090424, "learning_rate": 0.01, "loss": 1.9972, "step": 48759 }, { "epoch": 5.0079079798705965, "grad_norm": 0.04731893911957741, "learning_rate": 0.01, "loss": 2.006, "step": 48762 }, { "epoch": 5.008216082982438, "grad_norm": 0.03700774163007736, "learning_rate": 0.01, "loss": 2.0118, "step": 48765 }, { "epoch": 5.00852418609428, "grad_norm": 0.03112631104886532, "learning_rate": 0.01, "loss": 2.0249, "step": 48768 }, { "epoch": 5.008832289206121, "grad_norm": 0.12492241710424423, "learning_rate": 0.01, "loss": 2.0255, "step": 48771 }, { "epoch": 5.0091403923179625, "grad_norm": 0.03778549283742905, "learning_rate": 0.01, "loss": 1.968, "step": 48774 }, { "epoch": 5.009448495429804, "grad_norm": 0.06593509018421173, "learning_rate": 0.01, "loss": 2.0112, "step": 48777 }, { "epoch": 5.009756598541645, "grad_norm": 0.04298333451151848, "learning_rate": 0.01, "loss": 2.03, "step": 48780 }, { "epoch": 5.010064701653486, "grad_norm": 0.04566636681556702, "learning_rate": 0.01, "loss": 2.0217, "step": 48783 }, { "epoch": 5.010372804765328, "grad_norm": 0.045355070382356644, "learning_rate": 0.01, "loss": 2.0253, "step": 48786 }, { "epoch": 5.01068090787717, "grad_norm": 0.10494118183851242, "learning_rate": 0.01, "loss": 1.9849, "step": 48789 }, { "epoch": 5.010989010989011, "grad_norm": 0.07352183014154434, "learning_rate": 0.01, "loss": 2.0157, "step": 48792 }, { "epoch": 5.011297114100852, "grad_norm": 0.07270955294370651, "learning_rate": 0.01, "loss": 2.0133, "step": 48795 }, { "epoch": 5.0116052172126935, "grad_norm": 0.04416406527161598, "learning_rate": 0.01, "loss": 2.0346, "step": 48798 }, { "epoch": 5.011913320324536, "grad_norm": 0.08084416389465332, "learning_rate": 0.01, "loss": 2.0187, "step": 48801 }, { "epoch": 5.012221423436377, "grad_norm": 0.04440615326166153, "learning_rate": 0.01, "loss": 2.0034, "step": 48804 }, { "epoch": 5.012529526548218, "grad_norm": 0.0649610310792923, "learning_rate": 0.01, "loss": 2.0398, "step": 48807 }, { "epoch": 5.012837629660059, "grad_norm": 0.08533710986375809, "learning_rate": 0.01, "loss": 1.9951, "step": 48810 }, { "epoch": 5.013145732771901, "grad_norm": 0.06310707330703735, "learning_rate": 0.01, "loss": 2.0566, "step": 48813 }, { "epoch": 5.013453835883743, "grad_norm": 0.07096098363399506, "learning_rate": 0.01, "loss": 2.0022, "step": 48816 }, { "epoch": 5.013761938995584, "grad_norm": 0.08358955383300781, "learning_rate": 0.01, "loss": 2.011, "step": 48819 }, { "epoch": 5.014070042107425, "grad_norm": 0.07774472236633301, "learning_rate": 0.01, "loss": 2.0217, "step": 48822 }, { "epoch": 5.014378145219267, "grad_norm": 0.090924933552742, "learning_rate": 0.01, "loss": 2.0236, "step": 48825 }, { "epoch": 5.014686248331108, "grad_norm": 0.08830771595239639, "learning_rate": 0.01, "loss": 2.0131, "step": 48828 }, { "epoch": 5.014994351442949, "grad_norm": 0.09647586941719055, "learning_rate": 0.01, "loss": 2.0095, "step": 48831 }, { "epoch": 5.015302454554791, "grad_norm": 0.09413495659828186, "learning_rate": 0.01, "loss": 1.9973, "step": 48834 }, { "epoch": 5.0156105576666326, "grad_norm": 0.046874430030584335, "learning_rate": 0.01, "loss": 1.998, "step": 48837 }, { "epoch": 5.015918660778474, "grad_norm": 0.050309523940086365, "learning_rate": 0.01, "loss": 1.998, "step": 48840 }, { "epoch": 5.016226763890315, "grad_norm": 0.045271504670381546, "learning_rate": 0.01, "loss": 1.9903, "step": 48843 }, { "epoch": 5.016534867002156, "grad_norm": 0.03297847509384155, "learning_rate": 0.01, "loss": 2.0161, "step": 48846 }, { "epoch": 5.0168429701139985, "grad_norm": 0.08260242640972137, "learning_rate": 0.01, "loss": 2.02, "step": 48849 }, { "epoch": 5.01715107322584, "grad_norm": 0.04784693941473961, "learning_rate": 0.01, "loss": 1.9874, "step": 48852 }, { "epoch": 5.017459176337681, "grad_norm": 0.07972519099712372, "learning_rate": 0.01, "loss": 1.9839, "step": 48855 }, { "epoch": 5.017767279449522, "grad_norm": 0.09882769733667374, "learning_rate": 0.01, "loss": 2.0151, "step": 48858 }, { "epoch": 5.0180753825613635, "grad_norm": 0.059475596994161606, "learning_rate": 0.01, "loss": 1.9868, "step": 48861 }, { "epoch": 5.018383485673206, "grad_norm": 0.03795355185866356, "learning_rate": 0.01, "loss": 2.0319, "step": 48864 }, { "epoch": 5.018691588785047, "grad_norm": 0.04603972285985947, "learning_rate": 0.01, "loss": 1.9968, "step": 48867 }, { "epoch": 5.018999691896888, "grad_norm": 0.0678548589348793, "learning_rate": 0.01, "loss": 1.9866, "step": 48870 }, { "epoch": 5.0193077950087295, "grad_norm": 0.0728834941983223, "learning_rate": 0.01, "loss": 2.0082, "step": 48873 }, { "epoch": 5.019615898120571, "grad_norm": 0.08255651593208313, "learning_rate": 0.01, "loss": 1.9846, "step": 48876 }, { "epoch": 5.019924001232413, "grad_norm": 0.08427122980356216, "learning_rate": 0.01, "loss": 2.0082, "step": 48879 }, { "epoch": 5.020232104344254, "grad_norm": 0.06279109418392181, "learning_rate": 0.01, "loss": 1.997, "step": 48882 }, { "epoch": 5.020540207456095, "grad_norm": 0.12473344802856445, "learning_rate": 0.01, "loss": 2.0206, "step": 48885 }, { "epoch": 5.020848310567937, "grad_norm": 0.05066891387104988, "learning_rate": 0.01, "loss": 2.024, "step": 48888 }, { "epoch": 5.021156413679778, "grad_norm": 0.038826942443847656, "learning_rate": 0.01, "loss": 1.9918, "step": 48891 }, { "epoch": 5.021464516791619, "grad_norm": 0.058017902076244354, "learning_rate": 0.01, "loss": 2.0084, "step": 48894 }, { "epoch": 5.021772619903461, "grad_norm": 0.09492091834545135, "learning_rate": 0.01, "loss": 1.994, "step": 48897 }, { "epoch": 5.022080723015303, "grad_norm": 0.037246495485305786, "learning_rate": 0.01, "loss": 1.9979, "step": 48900 }, { "epoch": 5.022388826127144, "grad_norm": 0.06708169728517532, "learning_rate": 0.01, "loss": 2.0032, "step": 48903 }, { "epoch": 5.022696929238985, "grad_norm": 0.07195274531841278, "learning_rate": 0.01, "loss": 2.0184, "step": 48906 }, { "epoch": 5.023005032350826, "grad_norm": 0.04685629904270172, "learning_rate": 0.01, "loss": 2.0146, "step": 48909 }, { "epoch": 5.0233131354626686, "grad_norm": 0.08163397759199142, "learning_rate": 0.01, "loss": 2.0, "step": 48912 }, { "epoch": 5.02362123857451, "grad_norm": 0.05403751879930496, "learning_rate": 0.01, "loss": 2.0059, "step": 48915 }, { "epoch": 5.023929341686351, "grad_norm": 0.038569968193769455, "learning_rate": 0.01, "loss": 2.0312, "step": 48918 }, { "epoch": 5.024237444798192, "grad_norm": 0.055535174906253815, "learning_rate": 0.01, "loss": 2.003, "step": 48921 }, { "epoch": 5.024545547910034, "grad_norm": 0.05628294125199318, "learning_rate": 0.01, "loss": 2.0147, "step": 48924 }, { "epoch": 5.024853651021876, "grad_norm": 0.06412038207054138, "learning_rate": 0.01, "loss": 1.995, "step": 48927 }, { "epoch": 5.025161754133717, "grad_norm": 0.09966779500246048, "learning_rate": 0.01, "loss": 2.0417, "step": 48930 }, { "epoch": 5.025469857245558, "grad_norm": 0.059476420283317566, "learning_rate": 0.01, "loss": 2.0212, "step": 48933 }, { "epoch": 5.0257779603573995, "grad_norm": 0.04914550483226776, "learning_rate": 0.01, "loss": 2.017, "step": 48936 }, { "epoch": 5.026086063469241, "grad_norm": 0.03358187898993492, "learning_rate": 0.01, "loss": 1.9981, "step": 48939 }, { "epoch": 5.026394166581082, "grad_norm": 0.0671076700091362, "learning_rate": 0.01, "loss": 1.9936, "step": 48942 }, { "epoch": 5.026702269692924, "grad_norm": 0.14911647140979767, "learning_rate": 0.01, "loss": 2.027, "step": 48945 }, { "epoch": 5.0270103728047655, "grad_norm": 0.05344945192337036, "learning_rate": 0.01, "loss": 1.9975, "step": 48948 }, { "epoch": 5.027318475916607, "grad_norm": 0.05113920569419861, "learning_rate": 0.01, "loss": 1.9966, "step": 48951 }, { "epoch": 5.027626579028448, "grad_norm": 0.052601687610149384, "learning_rate": 0.01, "loss": 1.9968, "step": 48954 }, { "epoch": 5.027934682140289, "grad_norm": 0.04715648666024208, "learning_rate": 0.01, "loss": 2.0033, "step": 48957 }, { "epoch": 5.028242785252131, "grad_norm": 0.0414542555809021, "learning_rate": 0.01, "loss": 2.0176, "step": 48960 }, { "epoch": 5.028550888363973, "grad_norm": 0.057792216539382935, "learning_rate": 0.01, "loss": 1.9973, "step": 48963 }, { "epoch": 5.028858991475814, "grad_norm": 0.12371774017810822, "learning_rate": 0.01, "loss": 2.0134, "step": 48966 }, { "epoch": 5.029167094587655, "grad_norm": 0.053585126996040344, "learning_rate": 0.01, "loss": 2.003, "step": 48969 }, { "epoch": 5.0294751976994965, "grad_norm": 0.051625724881887436, "learning_rate": 0.01, "loss": 1.9944, "step": 48972 }, { "epoch": 5.029783300811339, "grad_norm": 0.03650549426674843, "learning_rate": 0.01, "loss": 1.9873, "step": 48975 }, { "epoch": 5.03009140392318, "grad_norm": 0.039793796837329865, "learning_rate": 0.01, "loss": 2.0092, "step": 48978 }, { "epoch": 5.030399507035021, "grad_norm": 0.03223228082060814, "learning_rate": 0.01, "loss": 2.0254, "step": 48981 }, { "epoch": 5.030707610146862, "grad_norm": 0.06604496389627457, "learning_rate": 0.01, "loss": 1.9915, "step": 48984 }, { "epoch": 5.031015713258704, "grad_norm": 0.13230498135089874, "learning_rate": 0.01, "loss": 2.014, "step": 48987 }, { "epoch": 5.031323816370545, "grad_norm": 0.07130693644285202, "learning_rate": 0.01, "loss": 2.0058, "step": 48990 }, { "epoch": 5.031631919482387, "grad_norm": 0.13743533194065094, "learning_rate": 0.01, "loss": 1.9949, "step": 48993 }, { "epoch": 5.031940022594228, "grad_norm": 0.08371279388666153, "learning_rate": 0.01, "loss": 1.9991, "step": 48996 }, { "epoch": 5.03224812570607, "grad_norm": 0.06169065833091736, "learning_rate": 0.01, "loss": 2.011, "step": 48999 }, { "epoch": 5.032556228817911, "grad_norm": 0.05723833665251732, "learning_rate": 0.01, "loss": 1.994, "step": 49002 }, { "epoch": 5.032864331929752, "grad_norm": 0.043473754078149796, "learning_rate": 0.01, "loss": 2.0156, "step": 49005 }, { "epoch": 5.033172435041594, "grad_norm": 0.08324360102415085, "learning_rate": 0.01, "loss": 2.002, "step": 49008 }, { "epoch": 5.0334805381534355, "grad_norm": 0.04197626933455467, "learning_rate": 0.01, "loss": 2.0118, "step": 49011 }, { "epoch": 5.033788641265277, "grad_norm": 0.07882298529148102, "learning_rate": 0.01, "loss": 2.0226, "step": 49014 }, { "epoch": 5.034096744377118, "grad_norm": 0.08073476701974869, "learning_rate": 0.01, "loss": 2.0272, "step": 49017 }, { "epoch": 5.034404847488959, "grad_norm": 0.0678163543343544, "learning_rate": 0.01, "loss": 2.0203, "step": 49020 }, { "epoch": 5.0347129506008015, "grad_norm": 0.04524783417582512, "learning_rate": 0.01, "loss": 2.0038, "step": 49023 }, { "epoch": 5.035021053712643, "grad_norm": 0.06805024296045303, "learning_rate": 0.01, "loss": 2.0321, "step": 49026 }, { "epoch": 5.035329156824484, "grad_norm": 0.12917374074459076, "learning_rate": 0.01, "loss": 1.9719, "step": 49029 }, { "epoch": 5.035637259936325, "grad_norm": 0.04843614995479584, "learning_rate": 0.01, "loss": 2.0189, "step": 49032 }, { "epoch": 5.0359453630481665, "grad_norm": 0.09850560128688812, "learning_rate": 0.01, "loss": 2.0141, "step": 49035 }, { "epoch": 5.036253466160008, "grad_norm": 0.04039955511689186, "learning_rate": 0.01, "loss": 2.0142, "step": 49038 }, { "epoch": 5.03656156927185, "grad_norm": 0.06758705526590347, "learning_rate": 0.01, "loss": 2.0286, "step": 49041 }, { "epoch": 5.036869672383691, "grad_norm": 0.03571697697043419, "learning_rate": 0.01, "loss": 1.9944, "step": 49044 }, { "epoch": 5.0371777754955325, "grad_norm": 0.034528639167547226, "learning_rate": 0.01, "loss": 1.9608, "step": 49047 }, { "epoch": 5.037485878607374, "grad_norm": 0.05735669657588005, "learning_rate": 0.01, "loss": 1.9942, "step": 49050 }, { "epoch": 5.037793981719215, "grad_norm": 0.11180947721004486, "learning_rate": 0.01, "loss": 2.0172, "step": 49053 }, { "epoch": 5.038102084831057, "grad_norm": 0.08823433518409729, "learning_rate": 0.01, "loss": 2.0125, "step": 49056 }, { "epoch": 5.038410187942898, "grad_norm": 0.06959319114685059, "learning_rate": 0.01, "loss": 1.9918, "step": 49059 }, { "epoch": 5.03871829105474, "grad_norm": 0.03767579421401024, "learning_rate": 0.01, "loss": 2.0122, "step": 49062 }, { "epoch": 5.039026394166581, "grad_norm": 0.032121919095516205, "learning_rate": 0.01, "loss": 2.0137, "step": 49065 }, { "epoch": 5.039334497278422, "grad_norm": 0.09328833967447281, "learning_rate": 0.01, "loss": 2.0125, "step": 49068 }, { "epoch": 5.039642600390264, "grad_norm": 0.05128837004303932, "learning_rate": 0.01, "loss": 2.0073, "step": 49071 }, { "epoch": 5.039950703502106, "grad_norm": 0.04546624422073364, "learning_rate": 0.01, "loss": 2.0395, "step": 49074 }, { "epoch": 5.040258806613947, "grad_norm": 0.03980513662099838, "learning_rate": 0.01, "loss": 2.0147, "step": 49077 }, { "epoch": 5.040566909725788, "grad_norm": 0.04204264655709267, "learning_rate": 0.01, "loss": 2.0044, "step": 49080 }, { "epoch": 5.040875012837629, "grad_norm": 0.04164162278175354, "learning_rate": 0.01, "loss": 2.0249, "step": 49083 }, { "epoch": 5.041183115949471, "grad_norm": 0.0698886588215828, "learning_rate": 0.01, "loss": 1.9975, "step": 49086 }, { "epoch": 5.041491219061313, "grad_norm": 0.11362603306770325, "learning_rate": 0.01, "loss": 1.9944, "step": 49089 }, { "epoch": 5.041799322173154, "grad_norm": 0.08820103108882904, "learning_rate": 0.01, "loss": 2.0056, "step": 49092 }, { "epoch": 5.042107425284995, "grad_norm": 0.06023460626602173, "learning_rate": 0.01, "loss": 2.0121, "step": 49095 }, { "epoch": 5.042415528396837, "grad_norm": 0.03523271903395653, "learning_rate": 0.01, "loss": 2.0173, "step": 49098 }, { "epoch": 5.042723631508678, "grad_norm": 0.062407344579696655, "learning_rate": 0.01, "loss": 1.9907, "step": 49101 }, { "epoch": 5.04303173462052, "grad_norm": 0.04747510328888893, "learning_rate": 0.01, "loss": 1.996, "step": 49104 }, { "epoch": 5.043339837732361, "grad_norm": 0.04824723303318024, "learning_rate": 0.01, "loss": 1.9955, "step": 49107 }, { "epoch": 5.0436479408442025, "grad_norm": 0.1672361046075821, "learning_rate": 0.01, "loss": 2.0082, "step": 49110 }, { "epoch": 5.043956043956044, "grad_norm": 0.09675043821334839, "learning_rate": 0.01, "loss": 2.0048, "step": 49113 }, { "epoch": 5.044264147067885, "grad_norm": 0.06804441660642624, "learning_rate": 0.01, "loss": 1.9977, "step": 49116 }, { "epoch": 5.044572250179727, "grad_norm": 0.05074039101600647, "learning_rate": 0.01, "loss": 2.0089, "step": 49119 }, { "epoch": 5.0448803532915685, "grad_norm": 0.049977827817201614, "learning_rate": 0.01, "loss": 2.0269, "step": 49122 }, { "epoch": 5.04518845640341, "grad_norm": 0.05549508333206177, "learning_rate": 0.01, "loss": 1.9883, "step": 49125 }, { "epoch": 5.045496559515251, "grad_norm": 0.03386814519762993, "learning_rate": 0.01, "loss": 2.0178, "step": 49128 }, { "epoch": 5.045804662627092, "grad_norm": 0.04851256310939789, "learning_rate": 0.01, "loss": 2.0093, "step": 49131 }, { "epoch": 5.0461127657389335, "grad_norm": 0.0848245769739151, "learning_rate": 0.01, "loss": 1.9886, "step": 49134 }, { "epoch": 5.046420868850776, "grad_norm": 0.03692524507641792, "learning_rate": 0.01, "loss": 1.9997, "step": 49137 }, { "epoch": 5.046728971962617, "grad_norm": 0.0406675823032856, "learning_rate": 0.01, "loss": 2.0188, "step": 49140 }, { "epoch": 5.047037075074458, "grad_norm": 0.07141982764005661, "learning_rate": 0.01, "loss": 2.0155, "step": 49143 }, { "epoch": 5.0473451781862995, "grad_norm": 0.0453253835439682, "learning_rate": 0.01, "loss": 1.9995, "step": 49146 }, { "epoch": 5.047653281298141, "grad_norm": 0.04967833310365677, "learning_rate": 0.01, "loss": 2.0021, "step": 49149 }, { "epoch": 5.047961384409983, "grad_norm": 0.09721168875694275, "learning_rate": 0.01, "loss": 1.9925, "step": 49152 }, { "epoch": 5.048269487521824, "grad_norm": 0.03806902468204498, "learning_rate": 0.01, "loss": 2.0228, "step": 49155 }, { "epoch": 5.048577590633665, "grad_norm": 0.048705510795116425, "learning_rate": 0.01, "loss": 2.0227, "step": 49158 }, { "epoch": 5.048885693745507, "grad_norm": 0.04676016420125961, "learning_rate": 0.01, "loss": 2.007, "step": 49161 }, { "epoch": 5.049193796857348, "grad_norm": 0.1444026082754135, "learning_rate": 0.01, "loss": 1.9997, "step": 49164 }, { "epoch": 5.04950189996919, "grad_norm": 0.07510826736688614, "learning_rate": 0.01, "loss": 2.0152, "step": 49167 }, { "epoch": 5.049810003081031, "grad_norm": 0.05828002095222473, "learning_rate": 0.01, "loss": 2.0027, "step": 49170 }, { "epoch": 5.050118106192873, "grad_norm": 0.04933730885386467, "learning_rate": 0.01, "loss": 1.9781, "step": 49173 }, { "epoch": 5.050426209304714, "grad_norm": 0.07392150163650513, "learning_rate": 0.01, "loss": 1.9918, "step": 49176 }, { "epoch": 5.050734312416555, "grad_norm": 0.03829526528716087, "learning_rate": 0.01, "loss": 2.0377, "step": 49179 }, { "epoch": 5.051042415528396, "grad_norm": 0.06644531339406967, "learning_rate": 0.01, "loss": 2.0086, "step": 49182 }, { "epoch": 5.0513505186402385, "grad_norm": 0.045286018401384354, "learning_rate": 0.01, "loss": 2.019, "step": 49185 }, { "epoch": 5.05165862175208, "grad_norm": 0.1067107766866684, "learning_rate": 0.01, "loss": 1.9967, "step": 49188 }, { "epoch": 5.051966724863921, "grad_norm": 0.09251461923122406, "learning_rate": 0.01, "loss": 1.9928, "step": 49191 }, { "epoch": 5.052274827975762, "grad_norm": 0.051101312041282654, "learning_rate": 0.01, "loss": 1.9809, "step": 49194 }, { "epoch": 5.052582931087604, "grad_norm": 0.060298655182123184, "learning_rate": 0.01, "loss": 2.0393, "step": 49197 }, { "epoch": 5.052891034199446, "grad_norm": 0.11112777143716812, "learning_rate": 0.01, "loss": 2.0006, "step": 49200 }, { "epoch": 5.053199137311287, "grad_norm": 0.05832698941230774, "learning_rate": 0.01, "loss": 2.0047, "step": 49203 }, { "epoch": 5.053507240423128, "grad_norm": 0.04670676961541176, "learning_rate": 0.01, "loss": 2.0106, "step": 49206 }, { "epoch": 5.0538153435349695, "grad_norm": 0.03849385678768158, "learning_rate": 0.01, "loss": 2.0102, "step": 49209 }, { "epoch": 5.054123446646811, "grad_norm": 0.043539416044950485, "learning_rate": 0.01, "loss": 1.9973, "step": 49212 }, { "epoch": 5.054431549758653, "grad_norm": 0.04201329126954079, "learning_rate": 0.01, "loss": 1.9961, "step": 49215 }, { "epoch": 5.054739652870494, "grad_norm": 0.046697720885276794, "learning_rate": 0.01, "loss": 2.0044, "step": 49218 }, { "epoch": 5.0550477559823355, "grad_norm": 0.13588139414787292, "learning_rate": 0.01, "loss": 2.0044, "step": 49221 }, { "epoch": 5.055355859094177, "grad_norm": 0.06000100448727608, "learning_rate": 0.01, "loss": 1.9979, "step": 49224 }, { "epoch": 5.055663962206018, "grad_norm": 0.05746195465326309, "learning_rate": 0.01, "loss": 2.017, "step": 49227 }, { "epoch": 5.05597206531786, "grad_norm": 0.05890364944934845, "learning_rate": 0.01, "loss": 2.0246, "step": 49230 }, { "epoch": 5.056280168429701, "grad_norm": 0.05617443472146988, "learning_rate": 0.01, "loss": 2.009, "step": 49233 }, { "epoch": 5.056588271541543, "grad_norm": 0.035529427230358124, "learning_rate": 0.01, "loss": 1.9948, "step": 49236 }, { "epoch": 5.056896374653384, "grad_norm": 0.08901556581258774, "learning_rate": 0.01, "loss": 2.0114, "step": 49239 }, { "epoch": 5.057204477765225, "grad_norm": 0.07534974068403244, "learning_rate": 0.01, "loss": 1.9976, "step": 49242 }, { "epoch": 5.0575125808770665, "grad_norm": 0.09517022967338562, "learning_rate": 0.01, "loss": 1.9911, "step": 49245 }, { "epoch": 5.057820683988909, "grad_norm": 0.0671476274728775, "learning_rate": 0.01, "loss": 2.0098, "step": 49248 }, { "epoch": 5.05812878710075, "grad_norm": 0.08913878351449966, "learning_rate": 0.01, "loss": 1.9914, "step": 49251 }, { "epoch": 5.058436890212591, "grad_norm": 0.06179509684443474, "learning_rate": 0.01, "loss": 2.015, "step": 49254 }, { "epoch": 5.058744993324432, "grad_norm": 0.055043041706085205, "learning_rate": 0.01, "loss": 1.9962, "step": 49257 }, { "epoch": 5.059053096436274, "grad_norm": 0.05528896301984787, "learning_rate": 0.01, "loss": 2.0115, "step": 49260 }, { "epoch": 5.059361199548116, "grad_norm": 0.1153421550989151, "learning_rate": 0.01, "loss": 2.0251, "step": 49263 }, { "epoch": 5.059669302659957, "grad_norm": 0.11637650430202484, "learning_rate": 0.01, "loss": 1.9815, "step": 49266 }, { "epoch": 5.059977405771798, "grad_norm": 0.04912794381380081, "learning_rate": 0.01, "loss": 1.9949, "step": 49269 }, { "epoch": 5.06028550888364, "grad_norm": 0.030755288898944855, "learning_rate": 0.01, "loss": 1.9968, "step": 49272 }, { "epoch": 5.060593611995481, "grad_norm": 0.03253510594367981, "learning_rate": 0.01, "loss": 1.999, "step": 49275 }, { "epoch": 5.060901715107323, "grad_norm": 0.0353284515440464, "learning_rate": 0.01, "loss": 2.008, "step": 49278 }, { "epoch": 5.061209818219164, "grad_norm": 0.04621535912156105, "learning_rate": 0.01, "loss": 2.0276, "step": 49281 }, { "epoch": 5.0615179213310055, "grad_norm": 0.08098949491977692, "learning_rate": 0.01, "loss": 1.98, "step": 49284 }, { "epoch": 5.061826024442847, "grad_norm": 0.07662132382392883, "learning_rate": 0.01, "loss": 2.0115, "step": 49287 }, { "epoch": 5.062134127554688, "grad_norm": 0.06088561937212944, "learning_rate": 0.01, "loss": 2.0079, "step": 49290 }, { "epoch": 5.062442230666529, "grad_norm": 0.09895024448633194, "learning_rate": 0.01, "loss": 1.9972, "step": 49293 }, { "epoch": 5.0627503337783715, "grad_norm": 0.05712589621543884, "learning_rate": 0.01, "loss": 1.9998, "step": 49296 }, { "epoch": 5.063058436890213, "grad_norm": 0.03906116634607315, "learning_rate": 0.01, "loss": 1.998, "step": 49299 }, { "epoch": 5.063366540002054, "grad_norm": 0.04972919449210167, "learning_rate": 0.01, "loss": 2.0043, "step": 49302 }, { "epoch": 5.063674643113895, "grad_norm": 0.048737164586782455, "learning_rate": 0.01, "loss": 1.9862, "step": 49305 }, { "epoch": 5.0639827462257365, "grad_norm": 0.06541066616773605, "learning_rate": 0.01, "loss": 2.0251, "step": 49308 }, { "epoch": 5.064290849337579, "grad_norm": 0.048164594918489456, "learning_rate": 0.01, "loss": 2.0352, "step": 49311 }, { "epoch": 5.06459895244942, "grad_norm": 0.09063941240310669, "learning_rate": 0.01, "loss": 2.0265, "step": 49314 }, { "epoch": 5.064907055561261, "grad_norm": 0.04573111608624458, "learning_rate": 0.01, "loss": 2.0126, "step": 49317 }, { "epoch": 5.0652151586731025, "grad_norm": 0.033861320465803146, "learning_rate": 0.01, "loss": 2.0023, "step": 49320 }, { "epoch": 5.065523261784944, "grad_norm": 0.03708826005458832, "learning_rate": 0.01, "loss": 2.0182, "step": 49323 }, { "epoch": 5.065831364896786, "grad_norm": 0.10216791927814484, "learning_rate": 0.01, "loss": 2.0111, "step": 49326 }, { "epoch": 5.066139468008627, "grad_norm": 0.06112508475780487, "learning_rate": 0.01, "loss": 1.9837, "step": 49329 }, { "epoch": 5.066447571120468, "grad_norm": 0.06171619892120361, "learning_rate": 0.01, "loss": 2.007, "step": 49332 }, { "epoch": 5.06675567423231, "grad_norm": 0.03746636584401131, "learning_rate": 0.01, "loss": 1.9994, "step": 49335 }, { "epoch": 5.067063777344151, "grad_norm": 0.06251370906829834, "learning_rate": 0.01, "loss": 1.9989, "step": 49338 }, { "epoch": 5.067371880455992, "grad_norm": 0.03557705506682396, "learning_rate": 0.01, "loss": 2.0141, "step": 49341 }, { "epoch": 5.067679983567834, "grad_norm": 0.036395229399204254, "learning_rate": 0.01, "loss": 2.0069, "step": 49344 }, { "epoch": 5.067988086679676, "grad_norm": 0.12709848582744598, "learning_rate": 0.01, "loss": 2.0077, "step": 49347 }, { "epoch": 5.068296189791517, "grad_norm": 0.05967814847826958, "learning_rate": 0.01, "loss": 2.0009, "step": 49350 }, { "epoch": 5.068604292903358, "grad_norm": 0.09794972836971283, "learning_rate": 0.01, "loss": 2.0088, "step": 49353 }, { "epoch": 5.068912396015199, "grad_norm": 0.06263583153486252, "learning_rate": 0.01, "loss": 2.0012, "step": 49356 }, { "epoch": 5.0692204991270415, "grad_norm": 0.08471337705850601, "learning_rate": 0.01, "loss": 1.9976, "step": 49359 }, { "epoch": 5.069528602238883, "grad_norm": 0.06739859282970428, "learning_rate": 0.01, "loss": 2.0002, "step": 49362 }, { "epoch": 5.069836705350724, "grad_norm": 0.09328118711709976, "learning_rate": 0.01, "loss": 2.0053, "step": 49365 }, { "epoch": 5.070144808462565, "grad_norm": 0.09012078493833542, "learning_rate": 0.01, "loss": 1.9857, "step": 49368 }, { "epoch": 5.070452911574407, "grad_norm": 0.03964925929903984, "learning_rate": 0.01, "loss": 2.0001, "step": 49371 }, { "epoch": 5.070761014686249, "grad_norm": 0.12138742208480835, "learning_rate": 0.01, "loss": 2.0361, "step": 49374 }, { "epoch": 5.07106911779809, "grad_norm": 0.04422127828001976, "learning_rate": 0.01, "loss": 2.0163, "step": 49377 }, { "epoch": 5.071377220909931, "grad_norm": 0.038298092782497406, "learning_rate": 0.01, "loss": 1.9843, "step": 49380 }, { "epoch": 5.0716853240217725, "grad_norm": 0.04425305128097534, "learning_rate": 0.01, "loss": 1.9841, "step": 49383 }, { "epoch": 5.071993427133614, "grad_norm": 0.08276382088661194, "learning_rate": 0.01, "loss": 1.9874, "step": 49386 }, { "epoch": 5.072301530245455, "grad_norm": 0.06378073245286942, "learning_rate": 0.01, "loss": 2.0165, "step": 49389 }, { "epoch": 5.072609633357297, "grad_norm": 0.12052054703235626, "learning_rate": 0.01, "loss": 1.9964, "step": 49392 }, { "epoch": 5.0729177364691385, "grad_norm": 0.04634268954396248, "learning_rate": 0.01, "loss": 2.0047, "step": 49395 }, { "epoch": 5.07322583958098, "grad_norm": 0.055755455046892166, "learning_rate": 0.01, "loss": 2.0214, "step": 49398 }, { "epoch": 5.073533942692821, "grad_norm": 0.04300512373447418, "learning_rate": 0.01, "loss": 2.0191, "step": 49401 }, { "epoch": 5.073842045804662, "grad_norm": 0.08284097909927368, "learning_rate": 0.01, "loss": 2.0158, "step": 49404 }, { "epoch": 5.074150148916504, "grad_norm": 0.046609655022621155, "learning_rate": 0.01, "loss": 2.0046, "step": 49407 }, { "epoch": 5.074458252028346, "grad_norm": 0.06387645751237869, "learning_rate": 0.01, "loss": 2.0109, "step": 49410 }, { "epoch": 5.074766355140187, "grad_norm": 0.0506816990673542, "learning_rate": 0.01, "loss": 1.9857, "step": 49413 }, { "epoch": 5.075074458252028, "grad_norm": 0.03865443170070648, "learning_rate": 0.01, "loss": 2.0096, "step": 49416 }, { "epoch": 5.0753825613638694, "grad_norm": 0.07702656835317612, "learning_rate": 0.01, "loss": 2.0087, "step": 49419 }, { "epoch": 5.075690664475712, "grad_norm": 0.12181714177131653, "learning_rate": 0.01, "loss": 2.0012, "step": 49422 }, { "epoch": 5.075998767587553, "grad_norm": 0.06351039558649063, "learning_rate": 0.01, "loss": 2.0137, "step": 49425 }, { "epoch": 5.076306870699394, "grad_norm": 0.09312722831964493, "learning_rate": 0.01, "loss": 1.9969, "step": 49428 }, { "epoch": 5.076614973811235, "grad_norm": 0.06879006326198578, "learning_rate": 0.01, "loss": 1.982, "step": 49431 }, { "epoch": 5.076923076923077, "grad_norm": 0.06043083220720291, "learning_rate": 0.01, "loss": 2.0201, "step": 49434 }, { "epoch": 5.077231180034918, "grad_norm": 0.049164239317178726, "learning_rate": 0.01, "loss": 2.0171, "step": 49437 }, { "epoch": 5.07753928314676, "grad_norm": 0.04470280185341835, "learning_rate": 0.01, "loss": 1.9967, "step": 49440 }, { "epoch": 5.077847386258601, "grad_norm": 0.04513731226325035, "learning_rate": 0.01, "loss": 2.0123, "step": 49443 }, { "epoch": 5.078155489370443, "grad_norm": 0.07849404215812683, "learning_rate": 0.01, "loss": 2.0244, "step": 49446 }, { "epoch": 5.078463592482284, "grad_norm": 0.04768620431423187, "learning_rate": 0.01, "loss": 2.0, "step": 49449 }, { "epoch": 5.078771695594125, "grad_norm": 0.09680715203285217, "learning_rate": 0.01, "loss": 2.0234, "step": 49452 }, { "epoch": 5.079079798705967, "grad_norm": 0.07119087129831314, "learning_rate": 0.01, "loss": 2.01, "step": 49455 }, { "epoch": 5.0793879018178085, "grad_norm": 0.11560992896556854, "learning_rate": 0.01, "loss": 1.9967, "step": 49458 }, { "epoch": 5.07969600492965, "grad_norm": 0.039443932473659515, "learning_rate": 0.01, "loss": 2.0117, "step": 49461 }, { "epoch": 5.080004108041491, "grad_norm": 0.040348734706640244, "learning_rate": 0.01, "loss": 1.9996, "step": 49464 }, { "epoch": 5.080312211153332, "grad_norm": 0.10496348142623901, "learning_rate": 0.01, "loss": 2.0161, "step": 49467 }, { "epoch": 5.0806203142651745, "grad_norm": 0.10493961721658707, "learning_rate": 0.01, "loss": 2.0033, "step": 49470 }, { "epoch": 5.080928417377016, "grad_norm": 0.08104293048381805, "learning_rate": 0.01, "loss": 2.008, "step": 49473 }, { "epoch": 5.081236520488857, "grad_norm": 0.07609682530164719, "learning_rate": 0.01, "loss": 2.0156, "step": 49476 }, { "epoch": 5.081544623600698, "grad_norm": 0.03504324331879616, "learning_rate": 0.01, "loss": 1.9998, "step": 49479 }, { "epoch": 5.0818527267125395, "grad_norm": 0.03438768908381462, "learning_rate": 0.01, "loss": 1.9962, "step": 49482 }, { "epoch": 5.082160829824382, "grad_norm": 0.0372585728764534, "learning_rate": 0.01, "loss": 2.0175, "step": 49485 }, { "epoch": 5.082468932936223, "grad_norm": 0.06997973471879959, "learning_rate": 0.01, "loss": 2.0001, "step": 49488 }, { "epoch": 5.082777036048064, "grad_norm": 0.07300589978694916, "learning_rate": 0.01, "loss": 1.9983, "step": 49491 }, { "epoch": 5.0830851391599055, "grad_norm": 0.053376756608486176, "learning_rate": 0.01, "loss": 2.0148, "step": 49494 }, { "epoch": 5.083393242271747, "grad_norm": 0.060256477445364, "learning_rate": 0.01, "loss": 2.0119, "step": 49497 }, { "epoch": 5.083701345383588, "grad_norm": 0.0293456818908453, "learning_rate": 0.01, "loss": 1.9951, "step": 49500 }, { "epoch": 5.08400944849543, "grad_norm": 0.08368721604347229, "learning_rate": 0.01, "loss": 2.0083, "step": 49503 }, { "epoch": 5.084317551607271, "grad_norm": 0.15514978766441345, "learning_rate": 0.01, "loss": 1.9993, "step": 49506 }, { "epoch": 5.084625654719113, "grad_norm": 0.07931728661060333, "learning_rate": 0.01, "loss": 2.0019, "step": 49509 }, { "epoch": 5.084933757830954, "grad_norm": 0.038475409150123596, "learning_rate": 0.01, "loss": 2.0137, "step": 49512 }, { "epoch": 5.085241860942795, "grad_norm": 0.03167513385415077, "learning_rate": 0.01, "loss": 2.0129, "step": 49515 }, { "epoch": 5.085549964054637, "grad_norm": 0.053547751158475876, "learning_rate": 0.01, "loss": 2.0138, "step": 49518 }, { "epoch": 5.085858067166479, "grad_norm": 0.06962644308805466, "learning_rate": 0.01, "loss": 2.0007, "step": 49521 }, { "epoch": 5.08616617027832, "grad_norm": 0.06724515557289124, "learning_rate": 0.01, "loss": 1.9911, "step": 49524 }, { "epoch": 5.086474273390161, "grad_norm": 0.06945919245481491, "learning_rate": 0.01, "loss": 2.0069, "step": 49527 }, { "epoch": 5.086782376502002, "grad_norm": 0.03792745992541313, "learning_rate": 0.01, "loss": 2.0023, "step": 49530 }, { "epoch": 5.0870904796138445, "grad_norm": 0.05056298151612282, "learning_rate": 0.01, "loss": 2.0414, "step": 49533 }, { "epoch": 5.087398582725686, "grad_norm": 0.07531926780939102, "learning_rate": 0.01, "loss": 1.9934, "step": 49536 }, { "epoch": 5.087706685837527, "grad_norm": 0.0834566205739975, "learning_rate": 0.01, "loss": 1.9984, "step": 49539 }, { "epoch": 5.088014788949368, "grad_norm": 0.038478609174489975, "learning_rate": 0.01, "loss": 2.0142, "step": 49542 }, { "epoch": 5.08832289206121, "grad_norm": 0.06002604216337204, "learning_rate": 0.01, "loss": 1.9947, "step": 49545 }, { "epoch": 5.088630995173051, "grad_norm": 0.046000853180885315, "learning_rate": 0.01, "loss": 2.0068, "step": 49548 }, { "epoch": 5.088939098284893, "grad_norm": 0.06248699501156807, "learning_rate": 0.01, "loss": 2.0116, "step": 49551 }, { "epoch": 5.089247201396734, "grad_norm": 0.10761575400829315, "learning_rate": 0.01, "loss": 1.983, "step": 49554 }, { "epoch": 5.0895553045085755, "grad_norm": 0.09329602122306824, "learning_rate": 0.01, "loss": 1.9953, "step": 49557 }, { "epoch": 5.089863407620417, "grad_norm": 0.08233852684497833, "learning_rate": 0.01, "loss": 1.993, "step": 49560 }, { "epoch": 5.090171510732258, "grad_norm": 0.06591898202896118, "learning_rate": 0.01, "loss": 1.9769, "step": 49563 }, { "epoch": 5.0904796138441, "grad_norm": 0.11147011071443558, "learning_rate": 0.01, "loss": 2.0415, "step": 49566 }, { "epoch": 5.0907877169559415, "grad_norm": 0.10501214861869812, "learning_rate": 0.01, "loss": 2.0139, "step": 49569 }, { "epoch": 5.091095820067783, "grad_norm": 0.04929099977016449, "learning_rate": 0.01, "loss": 2.0095, "step": 49572 }, { "epoch": 5.091403923179624, "grad_norm": 0.09390784054994583, "learning_rate": 0.01, "loss": 2.0121, "step": 49575 }, { "epoch": 5.091712026291465, "grad_norm": 0.08599641174077988, "learning_rate": 0.01, "loss": 2.0126, "step": 49578 }, { "epoch": 5.092020129403307, "grad_norm": 0.04285844415426254, "learning_rate": 0.01, "loss": 2.005, "step": 49581 }, { "epoch": 5.092328232515149, "grad_norm": 0.034273531287908554, "learning_rate": 0.01, "loss": 2.0119, "step": 49584 }, { "epoch": 5.09263633562699, "grad_norm": 0.04617328196763992, "learning_rate": 0.01, "loss": 1.986, "step": 49587 }, { "epoch": 5.092944438738831, "grad_norm": 0.08739766478538513, "learning_rate": 0.01, "loss": 1.9793, "step": 49590 }, { "epoch": 5.093252541850672, "grad_norm": 0.06263922899961472, "learning_rate": 0.01, "loss": 2.0396, "step": 49593 }, { "epoch": 5.093560644962514, "grad_norm": 0.06044527515769005, "learning_rate": 0.01, "loss": 2.0066, "step": 49596 }, { "epoch": 5.093868748074356, "grad_norm": 0.046116676181554794, "learning_rate": 0.01, "loss": 1.9828, "step": 49599 }, { "epoch": 5.094176851186197, "grad_norm": 0.1035202220082283, "learning_rate": 0.01, "loss": 2.0087, "step": 49602 }, { "epoch": 5.094484954298038, "grad_norm": 0.03816954419016838, "learning_rate": 0.01, "loss": 1.9871, "step": 49605 }, { "epoch": 5.09479305740988, "grad_norm": 0.04604346677660942, "learning_rate": 0.01, "loss": 1.997, "step": 49608 }, { "epoch": 5.095101160521721, "grad_norm": 0.05297078937292099, "learning_rate": 0.01, "loss": 1.9893, "step": 49611 }, { "epoch": 5.095409263633563, "grad_norm": 0.03836045414209366, "learning_rate": 0.01, "loss": 2.0122, "step": 49614 }, { "epoch": 5.095717366745404, "grad_norm": 0.047086507081985474, "learning_rate": 0.01, "loss": 2.0136, "step": 49617 }, { "epoch": 5.096025469857246, "grad_norm": 0.04657857492566109, "learning_rate": 0.01, "loss": 1.9912, "step": 49620 }, { "epoch": 5.096333572969087, "grad_norm": 0.08411470055580139, "learning_rate": 0.01, "loss": 1.9885, "step": 49623 }, { "epoch": 5.096641676080928, "grad_norm": 0.16819079220294952, "learning_rate": 0.01, "loss": 2.013, "step": 49626 }, { "epoch": 5.09694977919277, "grad_norm": 0.04950143024325371, "learning_rate": 0.01, "loss": 1.9834, "step": 49629 }, { "epoch": 5.0972578823046115, "grad_norm": 0.039262961596250534, "learning_rate": 0.01, "loss": 1.9927, "step": 49632 }, { "epoch": 5.097565985416453, "grad_norm": 0.03746853023767471, "learning_rate": 0.01, "loss": 2.0156, "step": 49635 }, { "epoch": 5.097874088528294, "grad_norm": 0.052741505205631256, "learning_rate": 0.01, "loss": 2.0088, "step": 49638 }, { "epoch": 5.098182191640135, "grad_norm": 0.1192871630191803, "learning_rate": 0.01, "loss": 2.0065, "step": 49641 }, { "epoch": 5.098490294751977, "grad_norm": 0.054899461567401886, "learning_rate": 0.01, "loss": 1.998, "step": 49644 }, { "epoch": 5.098798397863819, "grad_norm": 0.05850033089518547, "learning_rate": 0.01, "loss": 1.9867, "step": 49647 }, { "epoch": 5.09910650097566, "grad_norm": 0.08758574724197388, "learning_rate": 0.01, "loss": 2.0139, "step": 49650 }, { "epoch": 5.099414604087501, "grad_norm": 0.09481152147054672, "learning_rate": 0.01, "loss": 1.9929, "step": 49653 }, { "epoch": 5.0997227071993425, "grad_norm": 0.12264327704906464, "learning_rate": 0.01, "loss": 2.0019, "step": 49656 }, { "epoch": 5.100030810311184, "grad_norm": 0.06704699993133545, "learning_rate": 0.01, "loss": 2.0161, "step": 49659 }, { "epoch": 5.100338913423026, "grad_norm": 0.10752804577350616, "learning_rate": 0.01, "loss": 2.0098, "step": 49662 }, { "epoch": 5.100647016534867, "grad_norm": 0.09217878431081772, "learning_rate": 0.01, "loss": 2.019, "step": 49665 }, { "epoch": 5.1009551196467084, "grad_norm": 0.04479275271296501, "learning_rate": 0.01, "loss": 2.0088, "step": 49668 }, { "epoch": 5.10126322275855, "grad_norm": 0.06177330017089844, "learning_rate": 0.01, "loss": 2.0016, "step": 49671 }, { "epoch": 5.101571325870391, "grad_norm": 0.09911325573921204, "learning_rate": 0.01, "loss": 2.0058, "step": 49674 }, { "epoch": 5.101879428982233, "grad_norm": 0.06496170163154602, "learning_rate": 0.01, "loss": 1.9966, "step": 49677 }, { "epoch": 5.102187532094074, "grad_norm": 0.04296518489718437, "learning_rate": 0.01, "loss": 2.0, "step": 49680 }, { "epoch": 5.102495635205916, "grad_norm": 0.05492605268955231, "learning_rate": 0.01, "loss": 2.0067, "step": 49683 }, { "epoch": 5.102803738317757, "grad_norm": 0.033720582723617554, "learning_rate": 0.01, "loss": 1.9872, "step": 49686 }, { "epoch": 5.103111841429598, "grad_norm": 0.10172949731349945, "learning_rate": 0.01, "loss": 1.9937, "step": 49689 }, { "epoch": 5.103419944541439, "grad_norm": 0.05326984077692032, "learning_rate": 0.01, "loss": 1.9909, "step": 49692 }, { "epoch": 5.103728047653282, "grad_norm": 0.10597484558820724, "learning_rate": 0.01, "loss": 2.0102, "step": 49695 }, { "epoch": 5.104036150765123, "grad_norm": 0.03910377249121666, "learning_rate": 0.01, "loss": 2.0016, "step": 49698 }, { "epoch": 5.104344253876964, "grad_norm": 0.04232342913746834, "learning_rate": 0.01, "loss": 2.0125, "step": 49701 }, { "epoch": 5.104652356988805, "grad_norm": 0.11343611031770706, "learning_rate": 0.01, "loss": 2.0245, "step": 49704 }, { "epoch": 5.104960460100647, "grad_norm": 0.06593618541955948, "learning_rate": 0.01, "loss": 2.0134, "step": 49707 }, { "epoch": 5.105268563212489, "grad_norm": 0.07576625049114227, "learning_rate": 0.01, "loss": 2.0023, "step": 49710 }, { "epoch": 5.10557666632433, "grad_norm": 0.10935617238283157, "learning_rate": 0.01, "loss": 1.9982, "step": 49713 }, { "epoch": 5.105884769436171, "grad_norm": 0.04176800325512886, "learning_rate": 0.01, "loss": 2.0134, "step": 49716 }, { "epoch": 5.106192872548013, "grad_norm": 0.05533302202820778, "learning_rate": 0.01, "loss": 2.007, "step": 49719 }, { "epoch": 5.106500975659854, "grad_norm": 0.05446555092930794, "learning_rate": 0.01, "loss": 2.0219, "step": 49722 }, { "epoch": 5.106809078771696, "grad_norm": 0.05760158970952034, "learning_rate": 0.01, "loss": 2.0075, "step": 49725 }, { "epoch": 5.107117181883537, "grad_norm": 0.11345206201076508, "learning_rate": 0.01, "loss": 1.9991, "step": 49728 }, { "epoch": 5.1074252849953785, "grad_norm": 0.09869284927845001, "learning_rate": 0.01, "loss": 1.9922, "step": 49731 }, { "epoch": 5.10773338810722, "grad_norm": 0.050909820944070816, "learning_rate": 0.01, "loss": 1.9975, "step": 49734 }, { "epoch": 5.108041491219061, "grad_norm": 0.03889784961938858, "learning_rate": 0.01, "loss": 2.0031, "step": 49737 }, { "epoch": 5.108349594330903, "grad_norm": 0.04064812883734703, "learning_rate": 0.01, "loss": 2.0074, "step": 49740 }, { "epoch": 5.1086576974427444, "grad_norm": 0.13409452140331268, "learning_rate": 0.01, "loss": 2.0245, "step": 49743 }, { "epoch": 5.108965800554586, "grad_norm": 0.0955972746014595, "learning_rate": 0.01, "loss": 1.9714, "step": 49746 }, { "epoch": 5.109273903666427, "grad_norm": 0.11944996565580368, "learning_rate": 0.01, "loss": 1.9819, "step": 49749 }, { "epoch": 5.109582006778268, "grad_norm": 0.06599771976470947, "learning_rate": 0.01, "loss": 1.9904, "step": 49752 }, { "epoch": 5.1098901098901095, "grad_norm": 0.05203530192375183, "learning_rate": 0.01, "loss": 2.0158, "step": 49755 }, { "epoch": 5.110198213001952, "grad_norm": 0.052099455147981644, "learning_rate": 0.01, "loss": 1.9899, "step": 49758 }, { "epoch": 5.110506316113793, "grad_norm": 0.06005854532122612, "learning_rate": 0.01, "loss": 2.0099, "step": 49761 }, { "epoch": 5.110814419225634, "grad_norm": 0.0429551899433136, "learning_rate": 0.01, "loss": 1.9929, "step": 49764 }, { "epoch": 5.111122522337475, "grad_norm": 0.07173555344343185, "learning_rate": 0.01, "loss": 2.0108, "step": 49767 }, { "epoch": 5.111430625449317, "grad_norm": 0.05251854285597801, "learning_rate": 0.01, "loss": 2.0304, "step": 49770 }, { "epoch": 5.111738728561159, "grad_norm": 0.06687440723180771, "learning_rate": 0.01, "loss": 1.9996, "step": 49773 }, { "epoch": 5.112046831673, "grad_norm": 0.0841129869222641, "learning_rate": 0.01, "loss": 2.0104, "step": 49776 }, { "epoch": 5.112354934784841, "grad_norm": 0.086825892329216, "learning_rate": 0.01, "loss": 2.009, "step": 49779 }, { "epoch": 5.112663037896683, "grad_norm": 0.059246089309453964, "learning_rate": 0.01, "loss": 1.9956, "step": 49782 }, { "epoch": 5.112971141008524, "grad_norm": 0.061836544424295425, "learning_rate": 0.01, "loss": 2.0194, "step": 49785 }, { "epoch": 5.113279244120366, "grad_norm": 0.0878891870379448, "learning_rate": 0.01, "loss": 2.0169, "step": 49788 }, { "epoch": 5.113587347232207, "grad_norm": 0.061963435262441635, "learning_rate": 0.01, "loss": 1.9868, "step": 49791 }, { "epoch": 5.113895450344049, "grad_norm": 0.07002797722816467, "learning_rate": 0.01, "loss": 2.0097, "step": 49794 }, { "epoch": 5.11420355345589, "grad_norm": 0.07307753711938858, "learning_rate": 0.01, "loss": 2.0016, "step": 49797 }, { "epoch": 5.114511656567731, "grad_norm": 0.05582696199417114, "learning_rate": 0.01, "loss": 2.0227, "step": 49800 }, { "epoch": 5.114819759679572, "grad_norm": 0.11906569451093674, "learning_rate": 0.01, "loss": 2.0094, "step": 49803 }, { "epoch": 5.1151278627914145, "grad_norm": 0.041671089828014374, "learning_rate": 0.01, "loss": 1.9973, "step": 49806 }, { "epoch": 5.115435965903256, "grad_norm": 0.040786996483802795, "learning_rate": 0.01, "loss": 2.0033, "step": 49809 }, { "epoch": 5.115744069015097, "grad_norm": 0.06962720304727554, "learning_rate": 0.01, "loss": 2.0245, "step": 49812 }, { "epoch": 5.116052172126938, "grad_norm": 0.05079250782728195, "learning_rate": 0.01, "loss": 2.0132, "step": 49815 }, { "epoch": 5.11636027523878, "grad_norm": 0.03422404080629349, "learning_rate": 0.01, "loss": 1.9976, "step": 49818 }, { "epoch": 5.116668378350622, "grad_norm": 0.13034658133983612, "learning_rate": 0.01, "loss": 2.0303, "step": 49821 }, { "epoch": 5.116976481462463, "grad_norm": 0.05432206392288208, "learning_rate": 0.01, "loss": 1.9922, "step": 49824 }, { "epoch": 5.117284584574304, "grad_norm": 0.11153416335582733, "learning_rate": 0.01, "loss": 1.9886, "step": 49827 }, { "epoch": 5.1175926876861455, "grad_norm": 0.04324459657073021, "learning_rate": 0.01, "loss": 2.0056, "step": 49830 }, { "epoch": 5.117900790797987, "grad_norm": 0.035336676985025406, "learning_rate": 0.01, "loss": 2.0035, "step": 49833 }, { "epoch": 5.118208893909829, "grad_norm": 0.07241083681583405, "learning_rate": 0.01, "loss": 2.0045, "step": 49836 }, { "epoch": 5.11851699702167, "grad_norm": 0.05331496521830559, "learning_rate": 0.01, "loss": 1.9724, "step": 49839 }, { "epoch": 5.118825100133511, "grad_norm": 0.04856008291244507, "learning_rate": 0.01, "loss": 2.009, "step": 49842 }, { "epoch": 5.119133203245353, "grad_norm": 0.07900402694940567, "learning_rate": 0.01, "loss": 1.9957, "step": 49845 }, { "epoch": 5.119441306357194, "grad_norm": 0.04300111532211304, "learning_rate": 0.01, "loss": 2.013, "step": 49848 }, { "epoch": 5.119749409469035, "grad_norm": 0.09242533892393112, "learning_rate": 0.01, "loss": 1.996, "step": 49851 }, { "epoch": 5.120057512580877, "grad_norm": 0.047889694571495056, "learning_rate": 0.01, "loss": 1.9968, "step": 49854 }, { "epoch": 5.120365615692719, "grad_norm": 0.04471924528479576, "learning_rate": 0.01, "loss": 2.0219, "step": 49857 }, { "epoch": 5.12067371880456, "grad_norm": 0.11666672676801682, "learning_rate": 0.01, "loss": 1.9998, "step": 49860 }, { "epoch": 5.120981821916401, "grad_norm": 0.0470040999352932, "learning_rate": 0.01, "loss": 1.9728, "step": 49863 }, { "epoch": 5.121289925028242, "grad_norm": 0.10009215027093887, "learning_rate": 0.01, "loss": 2.0075, "step": 49866 }, { "epoch": 5.121598028140085, "grad_norm": 0.06280893087387085, "learning_rate": 0.01, "loss": 2.0109, "step": 49869 }, { "epoch": 5.121906131251926, "grad_norm": 0.046029094606637955, "learning_rate": 0.01, "loss": 1.9907, "step": 49872 }, { "epoch": 5.122214234363767, "grad_norm": 0.11030067503452301, "learning_rate": 0.01, "loss": 2.006, "step": 49875 }, { "epoch": 5.122522337475608, "grad_norm": 0.033092547208070755, "learning_rate": 0.01, "loss": 2.0064, "step": 49878 }, { "epoch": 5.12283044058745, "grad_norm": 0.051649268716573715, "learning_rate": 0.01, "loss": 1.9767, "step": 49881 }, { "epoch": 5.123138543699292, "grad_norm": 0.06344709545373917, "learning_rate": 0.01, "loss": 2.0163, "step": 49884 }, { "epoch": 5.123446646811133, "grad_norm": 0.08509237319231033, "learning_rate": 0.01, "loss": 2.0251, "step": 49887 }, { "epoch": 5.123754749922974, "grad_norm": 0.04539303854107857, "learning_rate": 0.01, "loss": 2.0089, "step": 49890 }, { "epoch": 5.124062853034816, "grad_norm": 0.03646918386220932, "learning_rate": 0.01, "loss": 1.9879, "step": 49893 }, { "epoch": 5.124370956146657, "grad_norm": 0.12570123374462128, "learning_rate": 0.01, "loss": 1.9997, "step": 49896 }, { "epoch": 5.124679059258498, "grad_norm": 0.12240875512361526, "learning_rate": 0.01, "loss": 1.9984, "step": 49899 }, { "epoch": 5.12498716237034, "grad_norm": 0.10540018230676651, "learning_rate": 0.01, "loss": 2.0088, "step": 49902 }, { "epoch": 5.1252952654821815, "grad_norm": 0.08514107763767242, "learning_rate": 0.01, "loss": 1.9776, "step": 49905 }, { "epoch": 5.125603368594023, "grad_norm": 0.04462766274809837, "learning_rate": 0.01, "loss": 2.0086, "step": 49908 }, { "epoch": 5.125911471705864, "grad_norm": 0.03537189960479736, "learning_rate": 0.01, "loss": 1.9994, "step": 49911 }, { "epoch": 5.126219574817705, "grad_norm": 0.053130924701690674, "learning_rate": 0.01, "loss": 1.9961, "step": 49914 }, { "epoch": 5.126527677929547, "grad_norm": 0.08888891339302063, "learning_rate": 0.01, "loss": 2.0169, "step": 49917 }, { "epoch": 5.126835781041389, "grad_norm": 0.051719069480895996, "learning_rate": 0.01, "loss": 2.0018, "step": 49920 }, { "epoch": 5.12714388415323, "grad_norm": 0.123109832406044, "learning_rate": 0.01, "loss": 1.9926, "step": 49923 }, { "epoch": 5.127451987265071, "grad_norm": 0.08993472158908844, "learning_rate": 0.01, "loss": 1.9913, "step": 49926 }, { "epoch": 5.1277600903769125, "grad_norm": 0.10910573601722717, "learning_rate": 0.01, "loss": 1.9856, "step": 49929 }, { "epoch": 5.128068193488755, "grad_norm": 0.13778774440288544, "learning_rate": 0.01, "loss": 1.9985, "step": 49932 }, { "epoch": 5.128376296600596, "grad_norm": 0.08411096781492233, "learning_rate": 0.01, "loss": 2.0119, "step": 49935 }, { "epoch": 5.128684399712437, "grad_norm": 0.05669613182544708, "learning_rate": 0.01, "loss": 2.0166, "step": 49938 }, { "epoch": 5.128992502824278, "grad_norm": 0.037695713341236115, "learning_rate": 0.01, "loss": 1.9769, "step": 49941 }, { "epoch": 5.12930060593612, "grad_norm": 0.07977671176195145, "learning_rate": 0.01, "loss": 2.0116, "step": 49944 }, { "epoch": 5.129608709047961, "grad_norm": 0.10012632608413696, "learning_rate": 0.01, "loss": 1.9685, "step": 49947 }, { "epoch": 5.129916812159803, "grad_norm": 0.06524799764156342, "learning_rate": 0.01, "loss": 1.9901, "step": 49950 }, { "epoch": 5.130224915271644, "grad_norm": 0.1130509302020073, "learning_rate": 0.01, "loss": 1.9893, "step": 49953 }, { "epoch": 5.130533018383486, "grad_norm": 0.06062379479408264, "learning_rate": 0.01, "loss": 2.0056, "step": 49956 }, { "epoch": 5.130841121495327, "grad_norm": 0.06229391694068909, "learning_rate": 0.01, "loss": 2.0003, "step": 49959 }, { "epoch": 5.131149224607168, "grad_norm": 0.04881103336811066, "learning_rate": 0.01, "loss": 2.0095, "step": 49962 }, { "epoch": 5.13145732771901, "grad_norm": 0.0779888853430748, "learning_rate": 0.01, "loss": 2.0085, "step": 49965 }, { "epoch": 5.131765430830852, "grad_norm": 0.03707825019955635, "learning_rate": 0.01, "loss": 2.016, "step": 49968 }, { "epoch": 5.132073533942693, "grad_norm": 0.12145381420850754, "learning_rate": 0.01, "loss": 2.007, "step": 49971 }, { "epoch": 5.132381637054534, "grad_norm": 0.08990494906902313, "learning_rate": 0.01, "loss": 1.9846, "step": 49974 }, { "epoch": 5.132689740166375, "grad_norm": 0.09638572484254837, "learning_rate": 0.01, "loss": 2.0001, "step": 49977 }, { "epoch": 5.1329978432782175, "grad_norm": 0.037770826369524, "learning_rate": 0.01, "loss": 2.0012, "step": 49980 }, { "epoch": 5.133305946390059, "grad_norm": 0.038396887481212616, "learning_rate": 0.01, "loss": 1.9952, "step": 49983 }, { "epoch": 5.1336140495019, "grad_norm": 0.052246298640966415, "learning_rate": 0.01, "loss": 1.9946, "step": 49986 }, { "epoch": 5.133922152613741, "grad_norm": 0.043142352253198624, "learning_rate": 0.01, "loss": 2.0161, "step": 49989 }, { "epoch": 5.134230255725583, "grad_norm": 0.04713413119316101, "learning_rate": 0.01, "loss": 1.972, "step": 49992 }, { "epoch": 5.134538358837425, "grad_norm": 0.06687591224908829, "learning_rate": 0.01, "loss": 2.0093, "step": 49995 }, { "epoch": 5.134846461949266, "grad_norm": 0.057987745851278305, "learning_rate": 0.01, "loss": 1.9958, "step": 49998 }, { "epoch": 5.135154565061107, "grad_norm": 0.048778288066387177, "learning_rate": 0.01, "loss": 1.9865, "step": 50001 }, { "epoch": 5.1354626681729485, "grad_norm": 0.04489746689796448, "learning_rate": 0.01, "loss": 2.0132, "step": 50004 }, { "epoch": 5.13577077128479, "grad_norm": 0.08324793726205826, "learning_rate": 0.01, "loss": 1.9947, "step": 50007 }, { "epoch": 5.136078874396631, "grad_norm": 0.04505028948187828, "learning_rate": 0.01, "loss": 2.0203, "step": 50010 }, { "epoch": 5.136386977508473, "grad_norm": 0.07827684283256531, "learning_rate": 0.01, "loss": 1.9831, "step": 50013 }, { "epoch": 5.136695080620314, "grad_norm": 0.07187260687351227, "learning_rate": 0.01, "loss": 2.0129, "step": 50016 }, { "epoch": 5.137003183732156, "grad_norm": 0.0374215804040432, "learning_rate": 0.01, "loss": 1.9748, "step": 50019 }, { "epoch": 5.137311286843997, "grad_norm": 0.06169632822275162, "learning_rate": 0.01, "loss": 1.9924, "step": 50022 }, { "epoch": 5.137619389955838, "grad_norm": 0.08089695870876312, "learning_rate": 0.01, "loss": 1.9784, "step": 50025 }, { "epoch": 5.13792749306768, "grad_norm": 0.06311798840761185, "learning_rate": 0.01, "loss": 2.0028, "step": 50028 }, { "epoch": 5.138235596179522, "grad_norm": 0.0869307816028595, "learning_rate": 0.01, "loss": 1.9934, "step": 50031 }, { "epoch": 5.138543699291363, "grad_norm": 0.07645547389984131, "learning_rate": 0.01, "loss": 1.9852, "step": 50034 }, { "epoch": 5.138851802403204, "grad_norm": 0.09578394144773483, "learning_rate": 0.01, "loss": 2.0077, "step": 50037 }, { "epoch": 5.139159905515045, "grad_norm": 0.09977228194475174, "learning_rate": 0.01, "loss": 1.9937, "step": 50040 }, { "epoch": 5.139468008626887, "grad_norm": 0.05820827558636665, "learning_rate": 0.01, "loss": 2.0138, "step": 50043 }, { "epoch": 5.139776111738729, "grad_norm": 0.03890974819660187, "learning_rate": 0.01, "loss": 2.0006, "step": 50046 }, { "epoch": 5.14008421485057, "grad_norm": 0.10406100749969482, "learning_rate": 0.01, "loss": 1.9962, "step": 50049 }, { "epoch": 5.140392317962411, "grad_norm": 0.057227544486522675, "learning_rate": 0.01, "loss": 2.0076, "step": 50052 }, { "epoch": 5.140700421074253, "grad_norm": 0.07142063230276108, "learning_rate": 0.01, "loss": 2.0143, "step": 50055 }, { "epoch": 5.141008524186094, "grad_norm": 0.047450605779886246, "learning_rate": 0.01, "loss": 2.0203, "step": 50058 }, { "epoch": 5.141316627297936, "grad_norm": 0.07935485243797302, "learning_rate": 0.01, "loss": 1.9998, "step": 50061 }, { "epoch": 5.141624730409777, "grad_norm": 0.14235766232013702, "learning_rate": 0.01, "loss": 2.0007, "step": 50064 }, { "epoch": 5.141932833521619, "grad_norm": 0.054345373064279556, "learning_rate": 0.01, "loss": 2.0062, "step": 50067 }, { "epoch": 5.14224093663346, "grad_norm": 0.03830769658088684, "learning_rate": 0.01, "loss": 2.0103, "step": 50070 }, { "epoch": 5.142549039745301, "grad_norm": 0.034983497112989426, "learning_rate": 0.01, "loss": 2.005, "step": 50073 }, { "epoch": 5.142857142857143, "grad_norm": 0.05580296739935875, "learning_rate": 0.01, "loss": 2.0175, "step": 50076 }, { "epoch": 5.1431652459689845, "grad_norm": 0.11556407809257507, "learning_rate": 0.01, "loss": 2.0065, "step": 50079 }, { "epoch": 5.143473349080826, "grad_norm": 0.05813178792595863, "learning_rate": 0.01, "loss": 2.0039, "step": 50082 }, { "epoch": 5.143781452192667, "grad_norm": 0.08391161262989044, "learning_rate": 0.01, "loss": 2.008, "step": 50085 }, { "epoch": 5.144089555304508, "grad_norm": 0.1391163021326065, "learning_rate": 0.01, "loss": 1.9844, "step": 50088 }, { "epoch": 5.14439765841635, "grad_norm": 0.038378529250621796, "learning_rate": 0.01, "loss": 2.0127, "step": 50091 }, { "epoch": 5.144705761528192, "grad_norm": 0.03499170020222664, "learning_rate": 0.01, "loss": 2.0067, "step": 50094 }, { "epoch": 5.145013864640033, "grad_norm": 0.038385018706321716, "learning_rate": 0.01, "loss": 2.0021, "step": 50097 }, { "epoch": 5.145321967751874, "grad_norm": 0.04664738476276398, "learning_rate": 0.01, "loss": 1.9992, "step": 50100 }, { "epoch": 5.1456300708637155, "grad_norm": 0.05988583341240883, "learning_rate": 0.01, "loss": 2.0116, "step": 50103 }, { "epoch": 5.145938173975557, "grad_norm": 0.05822984501719475, "learning_rate": 0.01, "loss": 1.9832, "step": 50106 }, { "epoch": 5.146246277087399, "grad_norm": 0.1176968440413475, "learning_rate": 0.01, "loss": 2.0017, "step": 50109 }, { "epoch": 5.14655438019924, "grad_norm": 0.04409262165427208, "learning_rate": 0.01, "loss": 1.9751, "step": 50112 }, { "epoch": 5.146862483311081, "grad_norm": 0.18404054641723633, "learning_rate": 0.01, "loss": 2.0388, "step": 50115 }, { "epoch": 5.147170586422923, "grad_norm": 0.07370147854089737, "learning_rate": 0.01, "loss": 2.0207, "step": 50118 }, { "epoch": 5.147478689534764, "grad_norm": 0.05898935720324516, "learning_rate": 0.01, "loss": 1.9861, "step": 50121 }, { "epoch": 5.147786792646606, "grad_norm": 0.04993924871087074, "learning_rate": 0.01, "loss": 1.9986, "step": 50124 }, { "epoch": 5.148094895758447, "grad_norm": 0.07191821932792664, "learning_rate": 0.01, "loss": 1.9945, "step": 50127 }, { "epoch": 5.148402998870289, "grad_norm": 0.06233205646276474, "learning_rate": 0.01, "loss": 1.9957, "step": 50130 }, { "epoch": 5.14871110198213, "grad_norm": 0.10599711537361145, "learning_rate": 0.01, "loss": 1.9935, "step": 50133 }, { "epoch": 5.149019205093971, "grad_norm": 0.05540407449007034, "learning_rate": 0.01, "loss": 2.0058, "step": 50136 }, { "epoch": 5.149327308205813, "grad_norm": 0.0512741319835186, "learning_rate": 0.01, "loss": 1.9917, "step": 50139 }, { "epoch": 5.149635411317655, "grad_norm": 0.060490820556879044, "learning_rate": 0.01, "loss": 1.9808, "step": 50142 }, { "epoch": 5.149943514429496, "grad_norm": 0.03850322216749191, "learning_rate": 0.01, "loss": 1.9957, "step": 50145 }, { "epoch": 5.150251617541337, "grad_norm": 0.12944872677326202, "learning_rate": 0.01, "loss": 2.0228, "step": 50148 }, { "epoch": 5.150559720653178, "grad_norm": 0.1282481700181961, "learning_rate": 0.01, "loss": 2.0104, "step": 50151 }, { "epoch": 5.15086782376502, "grad_norm": 0.09446661174297333, "learning_rate": 0.01, "loss": 2.011, "step": 50154 }, { "epoch": 5.151175926876862, "grad_norm": 0.04041890799999237, "learning_rate": 0.01, "loss": 2.0124, "step": 50157 }, { "epoch": 5.151484029988703, "grad_norm": 0.056447647511959076, "learning_rate": 0.01, "loss": 2.008, "step": 50160 }, { "epoch": 5.151792133100544, "grad_norm": 0.0449320413172245, "learning_rate": 0.01, "loss": 2.0123, "step": 50163 }, { "epoch": 5.1521002362123856, "grad_norm": 0.07914337515830994, "learning_rate": 0.01, "loss": 1.9795, "step": 50166 }, { "epoch": 5.152408339324227, "grad_norm": 0.06510215252637863, "learning_rate": 0.01, "loss": 2.0122, "step": 50169 }, { "epoch": 5.152716442436069, "grad_norm": 0.04413073882460594, "learning_rate": 0.01, "loss": 1.9958, "step": 50172 }, { "epoch": 5.15302454554791, "grad_norm": 0.045415036380290985, "learning_rate": 0.01, "loss": 1.9823, "step": 50175 }, { "epoch": 5.1533326486597515, "grad_norm": 0.04759620502591133, "learning_rate": 0.01, "loss": 2.0026, "step": 50178 }, { "epoch": 5.153640751771593, "grad_norm": 0.08537039160728455, "learning_rate": 0.01, "loss": 1.9982, "step": 50181 }, { "epoch": 5.153948854883434, "grad_norm": 0.10971151292324066, "learning_rate": 0.01, "loss": 2.0096, "step": 50184 }, { "epoch": 5.154256957995276, "grad_norm": 0.04773986339569092, "learning_rate": 0.01, "loss": 2.0192, "step": 50187 }, { "epoch": 5.154565061107117, "grad_norm": 0.13717158138751984, "learning_rate": 0.01, "loss": 2.0082, "step": 50190 }, { "epoch": 5.154873164218959, "grad_norm": 0.05396975204348564, "learning_rate": 0.01, "loss": 1.9911, "step": 50193 }, { "epoch": 5.1551812673308, "grad_norm": 0.03611796349287033, "learning_rate": 0.01, "loss": 1.9988, "step": 50196 }, { "epoch": 5.155489370442641, "grad_norm": 0.0544486828148365, "learning_rate": 0.01, "loss": 1.9916, "step": 50199 }, { "epoch": 5.1557974735544825, "grad_norm": 0.1198575496673584, "learning_rate": 0.01, "loss": 2.0187, "step": 50202 }, { "epoch": 5.156105576666325, "grad_norm": 0.04922753944993019, "learning_rate": 0.01, "loss": 2.0136, "step": 50205 }, { "epoch": 5.156413679778166, "grad_norm": 0.07132868468761444, "learning_rate": 0.01, "loss": 1.9788, "step": 50208 }, { "epoch": 5.156721782890007, "grad_norm": 0.06232021376490593, "learning_rate": 0.01, "loss": 1.9926, "step": 50211 }, { "epoch": 5.157029886001848, "grad_norm": 0.0492694191634655, "learning_rate": 0.01, "loss": 2.017, "step": 50214 }, { "epoch": 5.15733798911369, "grad_norm": 0.040781404823064804, "learning_rate": 0.01, "loss": 1.9982, "step": 50217 }, { "epoch": 5.157646092225532, "grad_norm": 0.04086026921868324, "learning_rate": 0.01, "loss": 2.0217, "step": 50220 }, { "epoch": 5.157954195337373, "grad_norm": 0.04154505580663681, "learning_rate": 0.01, "loss": 2.0069, "step": 50223 }, { "epoch": 5.158262298449214, "grad_norm": 0.043042492121458054, "learning_rate": 0.01, "loss": 1.9652, "step": 50226 }, { "epoch": 5.158570401561056, "grad_norm": 0.10211813449859619, "learning_rate": 0.01, "loss": 2.014, "step": 50229 }, { "epoch": 5.158878504672897, "grad_norm": 0.0644875019788742, "learning_rate": 0.01, "loss": 1.9997, "step": 50232 }, { "epoch": 5.159186607784739, "grad_norm": 0.09346074610948563, "learning_rate": 0.01, "loss": 2.0304, "step": 50235 }, { "epoch": 5.15949471089658, "grad_norm": 0.04961559548974037, "learning_rate": 0.01, "loss": 2.0267, "step": 50238 }, { "epoch": 5.159802814008422, "grad_norm": 0.07135666906833649, "learning_rate": 0.01, "loss": 2.0063, "step": 50241 }, { "epoch": 5.160110917120263, "grad_norm": 0.0363953560590744, "learning_rate": 0.01, "loss": 2.0157, "step": 50244 }, { "epoch": 5.160419020232104, "grad_norm": 0.05364762246608734, "learning_rate": 0.01, "loss": 1.9895, "step": 50247 }, { "epoch": 5.160727123343945, "grad_norm": 0.07405871897935867, "learning_rate": 0.01, "loss": 2.0068, "step": 50250 }, { "epoch": 5.1610352264557875, "grad_norm": 0.06045752391219139, "learning_rate": 0.01, "loss": 2.0126, "step": 50253 }, { "epoch": 5.161343329567629, "grad_norm": 0.05497819930315018, "learning_rate": 0.01, "loss": 1.9921, "step": 50256 }, { "epoch": 5.16165143267947, "grad_norm": 0.12490664422512054, "learning_rate": 0.01, "loss": 2.0215, "step": 50259 }, { "epoch": 5.161959535791311, "grad_norm": 0.04628274589776993, "learning_rate": 0.01, "loss": 2.0116, "step": 50262 }, { "epoch": 5.1622676389031525, "grad_norm": 0.06884883344173431, "learning_rate": 0.01, "loss": 2.021, "step": 50265 }, { "epoch": 5.162575742014995, "grad_norm": 0.057228926569223404, "learning_rate": 0.01, "loss": 1.9707, "step": 50268 }, { "epoch": 5.162883845126836, "grad_norm": 0.05467236414551735, "learning_rate": 0.01, "loss": 1.9975, "step": 50271 }, { "epoch": 5.163191948238677, "grad_norm": 0.10743111371994019, "learning_rate": 0.01, "loss": 2.0041, "step": 50274 }, { "epoch": 5.1635000513505185, "grad_norm": 0.06426141411066055, "learning_rate": 0.01, "loss": 2.0137, "step": 50277 }, { "epoch": 5.16380815446236, "grad_norm": 0.09047043323516846, "learning_rate": 0.01, "loss": 1.9975, "step": 50280 }, { "epoch": 5.164116257574202, "grad_norm": 0.032664790749549866, "learning_rate": 0.01, "loss": 2.0101, "step": 50283 }, { "epoch": 5.164424360686043, "grad_norm": 0.05022372305393219, "learning_rate": 0.01, "loss": 1.9948, "step": 50286 }, { "epoch": 5.164732463797884, "grad_norm": 0.06359392404556274, "learning_rate": 0.01, "loss": 1.9953, "step": 50289 }, { "epoch": 5.165040566909726, "grad_norm": 0.0480986088514328, "learning_rate": 0.01, "loss": 1.9781, "step": 50292 }, { "epoch": 5.165348670021567, "grad_norm": 0.04164107143878937, "learning_rate": 0.01, "loss": 1.994, "step": 50295 }, { "epoch": 5.165656773133408, "grad_norm": 0.06567779183387756, "learning_rate": 0.01, "loss": 2.0243, "step": 50298 }, { "epoch": 5.16596487624525, "grad_norm": 0.11087372153997421, "learning_rate": 0.01, "loss": 1.9872, "step": 50301 }, { "epoch": 5.166272979357092, "grad_norm": 0.056734826415777206, "learning_rate": 0.01, "loss": 2.0047, "step": 50304 }, { "epoch": 5.166581082468933, "grad_norm": 0.06245841085910797, "learning_rate": 0.01, "loss": 2.0065, "step": 50307 }, { "epoch": 5.166889185580774, "grad_norm": 0.07361923158168793, "learning_rate": 0.01, "loss": 1.9788, "step": 50310 }, { "epoch": 5.167197288692615, "grad_norm": 0.08085532486438751, "learning_rate": 0.01, "loss": 1.9912, "step": 50313 }, { "epoch": 5.167505391804458, "grad_norm": 0.08500105887651443, "learning_rate": 0.01, "loss": 2.0234, "step": 50316 }, { "epoch": 5.167813494916299, "grad_norm": 0.07736580818891525, "learning_rate": 0.01, "loss": 1.9908, "step": 50319 }, { "epoch": 5.16812159802814, "grad_norm": 0.08762416243553162, "learning_rate": 0.01, "loss": 1.9993, "step": 50322 }, { "epoch": 5.168429701139981, "grad_norm": 0.05399163439869881, "learning_rate": 0.01, "loss": 1.9932, "step": 50325 }, { "epoch": 5.168737804251823, "grad_norm": 0.05431235954165459, "learning_rate": 0.01, "loss": 1.9898, "step": 50328 }, { "epoch": 5.169045907363665, "grad_norm": 0.04496511071920395, "learning_rate": 0.01, "loss": 2.0105, "step": 50331 }, { "epoch": 5.169354010475506, "grad_norm": 0.04711058363318443, "learning_rate": 0.01, "loss": 1.9891, "step": 50334 }, { "epoch": 5.169662113587347, "grad_norm": 0.08989371359348297, "learning_rate": 0.01, "loss": 1.9997, "step": 50337 }, { "epoch": 5.1699702166991885, "grad_norm": 0.048945628106594086, "learning_rate": 0.01, "loss": 1.9945, "step": 50340 }, { "epoch": 5.17027831981103, "grad_norm": 0.09270982444286346, "learning_rate": 0.01, "loss": 1.9826, "step": 50343 }, { "epoch": 5.170586422922872, "grad_norm": 0.05704176053404808, "learning_rate": 0.01, "loss": 1.9958, "step": 50346 }, { "epoch": 5.170894526034713, "grad_norm": 0.03455890342593193, "learning_rate": 0.01, "loss": 2.0412, "step": 50349 }, { "epoch": 5.1712026291465545, "grad_norm": 0.041682351380586624, "learning_rate": 0.01, "loss": 2.0007, "step": 50352 }, { "epoch": 5.171510732258396, "grad_norm": 0.04951677471399307, "learning_rate": 0.01, "loss": 1.9945, "step": 50355 }, { "epoch": 5.171818835370237, "grad_norm": 0.056010644882917404, "learning_rate": 0.01, "loss": 1.9864, "step": 50358 }, { "epoch": 5.172126938482078, "grad_norm": 0.1273152083158493, "learning_rate": 0.01, "loss": 1.9878, "step": 50361 }, { "epoch": 5.17243504159392, "grad_norm": 0.08742735534906387, "learning_rate": 0.01, "loss": 1.9902, "step": 50364 }, { "epoch": 5.172743144705762, "grad_norm": 0.03796623274683952, "learning_rate": 0.01, "loss": 1.9849, "step": 50367 }, { "epoch": 5.173051247817603, "grad_norm": 0.035885389894247055, "learning_rate": 0.01, "loss": 2.0107, "step": 50370 }, { "epoch": 5.173359350929444, "grad_norm": 0.040873028337955475, "learning_rate": 0.01, "loss": 2.0045, "step": 50373 }, { "epoch": 5.1736674540412855, "grad_norm": 0.03645748645067215, "learning_rate": 0.01, "loss": 2.0087, "step": 50376 }, { "epoch": 5.173975557153128, "grad_norm": 0.08969543129205704, "learning_rate": 0.01, "loss": 2.0174, "step": 50379 }, { "epoch": 5.174283660264969, "grad_norm": 0.05043506622314453, "learning_rate": 0.01, "loss": 1.9939, "step": 50382 }, { "epoch": 5.17459176337681, "grad_norm": 0.053546857088804245, "learning_rate": 0.01, "loss": 1.9953, "step": 50385 }, { "epoch": 5.174899866488651, "grad_norm": 0.03222036734223366, "learning_rate": 0.01, "loss": 2.0002, "step": 50388 }, { "epoch": 5.175207969600493, "grad_norm": 0.07949094474315643, "learning_rate": 0.01, "loss": 1.9913, "step": 50391 }, { "epoch": 5.175516072712334, "grad_norm": 0.0858977660536766, "learning_rate": 0.01, "loss": 2.0061, "step": 50394 }, { "epoch": 5.175824175824176, "grad_norm": 0.10471560060977936, "learning_rate": 0.01, "loss": 2.0044, "step": 50397 }, { "epoch": 5.176132278936017, "grad_norm": 0.10742645710706711, "learning_rate": 0.01, "loss": 1.9701, "step": 50400 }, { "epoch": 5.176440382047859, "grad_norm": 0.035415954887866974, "learning_rate": 0.01, "loss": 2.0005, "step": 50403 }, { "epoch": 5.1767484851597, "grad_norm": 0.043476980179548264, "learning_rate": 0.01, "loss": 1.9984, "step": 50406 }, { "epoch": 5.177056588271541, "grad_norm": 0.05659857392311096, "learning_rate": 0.01, "loss": 2.006, "step": 50409 }, { "epoch": 5.177364691383383, "grad_norm": 0.09809272736310959, "learning_rate": 0.01, "loss": 1.9947, "step": 50412 }, { "epoch": 5.1776727944952246, "grad_norm": 0.0645933598279953, "learning_rate": 0.01, "loss": 1.9848, "step": 50415 }, { "epoch": 5.177980897607066, "grad_norm": 0.11137600988149643, "learning_rate": 0.01, "loss": 1.9903, "step": 50418 }, { "epoch": 5.178289000718907, "grad_norm": 0.03614633530378342, "learning_rate": 0.01, "loss": 2.0087, "step": 50421 }, { "epoch": 5.178597103830748, "grad_norm": 0.09580224752426147, "learning_rate": 0.01, "loss": 1.9932, "step": 50424 }, { "epoch": 5.1789052069425905, "grad_norm": 0.0874679684638977, "learning_rate": 0.01, "loss": 1.9941, "step": 50427 }, { "epoch": 5.179213310054432, "grad_norm": 0.06637660413980484, "learning_rate": 0.01, "loss": 2.0219, "step": 50430 }, { "epoch": 5.179521413166273, "grad_norm": 0.049818407744169235, "learning_rate": 0.01, "loss": 1.9953, "step": 50433 }, { "epoch": 5.179829516278114, "grad_norm": 0.035875897854566574, "learning_rate": 0.01, "loss": 1.9689, "step": 50436 }, { "epoch": 5.1801376193899555, "grad_norm": 0.12956075370311737, "learning_rate": 0.01, "loss": 1.9805, "step": 50439 }, { "epoch": 5.180445722501798, "grad_norm": 0.0568457767367363, "learning_rate": 0.01, "loss": 1.9922, "step": 50442 }, { "epoch": 5.180753825613639, "grad_norm": 0.06275864690542221, "learning_rate": 0.01, "loss": 1.9874, "step": 50445 }, { "epoch": 5.18106192872548, "grad_norm": 0.07609914243221283, "learning_rate": 0.01, "loss": 1.9822, "step": 50448 }, { "epoch": 5.1813700318373215, "grad_norm": 0.0721612498164177, "learning_rate": 0.01, "loss": 2.0329, "step": 50451 }, { "epoch": 5.181678134949163, "grad_norm": 0.08733054995536804, "learning_rate": 0.01, "loss": 1.9605, "step": 50454 }, { "epoch": 5.181986238061004, "grad_norm": 0.10601712018251419, "learning_rate": 0.01, "loss": 1.9896, "step": 50457 }, { "epoch": 5.182294341172846, "grad_norm": 0.17671173810958862, "learning_rate": 0.01, "loss": 1.9903, "step": 50460 }, { "epoch": 5.182602444284687, "grad_norm": 0.08600035309791565, "learning_rate": 0.01, "loss": 1.9823, "step": 50463 }, { "epoch": 5.182910547396529, "grad_norm": 0.05488591641187668, "learning_rate": 0.01, "loss": 2.0033, "step": 50466 }, { "epoch": 5.18321865050837, "grad_norm": 0.08030954003334045, "learning_rate": 0.01, "loss": 1.9899, "step": 50469 }, { "epoch": 5.183526753620211, "grad_norm": 0.05973450466990471, "learning_rate": 0.01, "loss": 2.0063, "step": 50472 }, { "epoch": 5.183834856732053, "grad_norm": 0.07898557931184769, "learning_rate": 0.01, "loss": 2.0174, "step": 50475 }, { "epoch": 5.184142959843895, "grad_norm": 0.10037455707788467, "learning_rate": 0.01, "loss": 2.0057, "step": 50478 }, { "epoch": 5.184451062955736, "grad_norm": 0.08369861543178558, "learning_rate": 0.01, "loss": 1.9906, "step": 50481 }, { "epoch": 5.184759166067577, "grad_norm": 0.06350437551736832, "learning_rate": 0.01, "loss": 1.9976, "step": 50484 }, { "epoch": 5.185067269179418, "grad_norm": 0.05332079157233238, "learning_rate": 0.01, "loss": 1.9818, "step": 50487 }, { "epoch": 5.1853753722912606, "grad_norm": 0.09419017285108566, "learning_rate": 0.01, "loss": 1.9884, "step": 50490 }, { "epoch": 5.185683475403102, "grad_norm": 0.05123918130993843, "learning_rate": 0.01, "loss": 1.9896, "step": 50493 }, { "epoch": 5.185991578514943, "grad_norm": 0.08905530720949173, "learning_rate": 0.01, "loss": 2.0142, "step": 50496 }, { "epoch": 5.186299681626784, "grad_norm": 0.04132084175944328, "learning_rate": 0.01, "loss": 1.9858, "step": 50499 }, { "epoch": 5.186607784738626, "grad_norm": 0.04010608792304993, "learning_rate": 0.01, "loss": 1.9863, "step": 50502 }, { "epoch": 5.186915887850467, "grad_norm": 0.035268135368824005, "learning_rate": 0.01, "loss": 2.0181, "step": 50505 }, { "epoch": 5.187223990962309, "grad_norm": 0.07214079797267914, "learning_rate": 0.01, "loss": 1.9995, "step": 50508 }, { "epoch": 5.18753209407415, "grad_norm": 0.0641101822257042, "learning_rate": 0.01, "loss": 2.0056, "step": 50511 }, { "epoch": 5.1878401971859915, "grad_norm": 0.05329526588320732, "learning_rate": 0.01, "loss": 1.9798, "step": 50514 }, { "epoch": 5.188148300297833, "grad_norm": 0.10318466275930405, "learning_rate": 0.01, "loss": 1.9931, "step": 50517 }, { "epoch": 5.188456403409674, "grad_norm": 0.04416116699576378, "learning_rate": 0.01, "loss": 2.0333, "step": 50520 }, { "epoch": 5.188764506521516, "grad_norm": 0.03857633098959923, "learning_rate": 0.01, "loss": 1.9833, "step": 50523 }, { "epoch": 5.1890726096333575, "grad_norm": 0.054197683930397034, "learning_rate": 0.01, "loss": 2.0029, "step": 50526 }, { "epoch": 5.189380712745199, "grad_norm": 0.045404430478811264, "learning_rate": 0.01, "loss": 2.0048, "step": 50529 }, { "epoch": 5.18968881585704, "grad_norm": 0.10853458195924759, "learning_rate": 0.01, "loss": 1.9961, "step": 50532 }, { "epoch": 5.189996918968881, "grad_norm": 0.043089404702186584, "learning_rate": 0.01, "loss": 1.9954, "step": 50535 }, { "epoch": 5.190305022080723, "grad_norm": 0.05289897695183754, "learning_rate": 0.01, "loss": 1.9959, "step": 50538 }, { "epoch": 5.190613125192565, "grad_norm": 0.06284762918949127, "learning_rate": 0.01, "loss": 1.9905, "step": 50541 }, { "epoch": 5.190921228304406, "grad_norm": 0.0615723617374897, "learning_rate": 0.01, "loss": 2.0115, "step": 50544 }, { "epoch": 5.191229331416247, "grad_norm": 0.10425807535648346, "learning_rate": 0.01, "loss": 2.003, "step": 50547 }, { "epoch": 5.1915374345280885, "grad_norm": 0.09661048650741577, "learning_rate": 0.01, "loss": 2.004, "step": 50550 }, { "epoch": 5.19184553763993, "grad_norm": 0.059544507414102554, "learning_rate": 0.01, "loss": 2.0324, "step": 50553 }, { "epoch": 5.192153640751772, "grad_norm": 0.08903210610151291, "learning_rate": 0.01, "loss": 1.9994, "step": 50556 }, { "epoch": 5.192461743863613, "grad_norm": 0.04461904987692833, "learning_rate": 0.01, "loss": 2.0073, "step": 50559 }, { "epoch": 5.192769846975454, "grad_norm": 0.07844073325395584, "learning_rate": 0.01, "loss": 2.0141, "step": 50562 }, { "epoch": 5.193077950087296, "grad_norm": 0.08520335704088211, "learning_rate": 0.01, "loss": 1.9663, "step": 50565 }, { "epoch": 5.193386053199137, "grad_norm": 0.13090673089027405, "learning_rate": 0.01, "loss": 1.9887, "step": 50568 }, { "epoch": 5.193694156310979, "grad_norm": 0.08202611654996872, "learning_rate": 0.01, "loss": 2.0021, "step": 50571 }, { "epoch": 5.19400225942282, "grad_norm": 0.05638153478503227, "learning_rate": 0.01, "loss": 1.9983, "step": 50574 }, { "epoch": 5.194310362534662, "grad_norm": 0.03813661262392998, "learning_rate": 0.01, "loss": 2.0002, "step": 50577 }, { "epoch": 5.194618465646503, "grad_norm": 0.13044418394565582, "learning_rate": 0.01, "loss": 2.0132, "step": 50580 }, { "epoch": 5.194926568758344, "grad_norm": 0.04875580221414566, "learning_rate": 0.01, "loss": 2.0023, "step": 50583 }, { "epoch": 5.195234671870186, "grad_norm": 0.03675463795661926, "learning_rate": 0.01, "loss": 1.9981, "step": 50586 }, { "epoch": 5.1955427749820275, "grad_norm": 0.050565268844366074, "learning_rate": 0.01, "loss": 1.9943, "step": 50589 }, { "epoch": 5.195850878093869, "grad_norm": 0.08294785022735596, "learning_rate": 0.01, "loss": 2.0057, "step": 50592 }, { "epoch": 5.19615898120571, "grad_norm": 0.04059421271085739, "learning_rate": 0.01, "loss": 1.9631, "step": 50595 }, { "epoch": 5.196467084317551, "grad_norm": 0.044956889003515244, "learning_rate": 0.01, "loss": 1.9804, "step": 50598 }, { "epoch": 5.1967751874293935, "grad_norm": 0.11942701786756516, "learning_rate": 0.01, "loss": 2.0148, "step": 50601 }, { "epoch": 5.197083290541235, "grad_norm": 0.04948386549949646, "learning_rate": 0.01, "loss": 2.0043, "step": 50604 }, { "epoch": 5.197391393653076, "grad_norm": 0.08146567642688751, "learning_rate": 0.01, "loss": 2.0315, "step": 50607 }, { "epoch": 5.197699496764917, "grad_norm": 0.0451958030462265, "learning_rate": 0.01, "loss": 2.0138, "step": 50610 }, { "epoch": 5.1980075998767585, "grad_norm": 0.036100223660469055, "learning_rate": 0.01, "loss": 1.9876, "step": 50613 }, { "epoch": 5.1983157029886, "grad_norm": 0.035324469208717346, "learning_rate": 0.01, "loss": 1.9929, "step": 50616 }, { "epoch": 5.198623806100442, "grad_norm": 0.05005744844675064, "learning_rate": 0.01, "loss": 2.0062, "step": 50619 }, { "epoch": 5.198931909212283, "grad_norm": 0.08080115169286728, "learning_rate": 0.01, "loss": 1.9893, "step": 50622 }, { "epoch": 5.1992400123241245, "grad_norm": 0.07723166048526764, "learning_rate": 0.01, "loss": 1.9991, "step": 50625 }, { "epoch": 5.199548115435966, "grad_norm": 0.11351467669010162, "learning_rate": 0.01, "loss": 1.9948, "step": 50628 }, { "epoch": 5.199856218547807, "grad_norm": 0.03940621018409729, "learning_rate": 0.01, "loss": 1.9724, "step": 50631 }, { "epoch": 5.200164321659649, "grad_norm": 0.06582506000995636, "learning_rate": 0.01, "loss": 1.9937, "step": 50634 }, { "epoch": 5.20047242477149, "grad_norm": 0.030670402571558952, "learning_rate": 0.01, "loss": 1.979, "step": 50637 }, { "epoch": 5.200780527883332, "grad_norm": 0.06636448204517365, "learning_rate": 0.01, "loss": 1.9824, "step": 50640 }, { "epoch": 5.201088630995173, "grad_norm": 0.06079031527042389, "learning_rate": 0.01, "loss": 2.0074, "step": 50643 }, { "epoch": 5.201396734107014, "grad_norm": 0.1049150750041008, "learning_rate": 0.01, "loss": 2.0114, "step": 50646 }, { "epoch": 5.2017048372188555, "grad_norm": 0.08547158539295197, "learning_rate": 0.01, "loss": 1.9965, "step": 50649 }, { "epoch": 5.202012940330698, "grad_norm": 0.06914935261011124, "learning_rate": 0.01, "loss": 2.0037, "step": 50652 }, { "epoch": 5.202321043442539, "grad_norm": 0.05997157096862793, "learning_rate": 0.01, "loss": 1.9891, "step": 50655 }, { "epoch": 5.20262914655438, "grad_norm": 0.052482035011053085, "learning_rate": 0.01, "loss": 1.9916, "step": 50658 }, { "epoch": 5.202937249666221, "grad_norm": 0.12078742682933807, "learning_rate": 0.01, "loss": 2.0135, "step": 50661 }, { "epoch": 5.203245352778063, "grad_norm": 0.08411680161952972, "learning_rate": 0.01, "loss": 2.0155, "step": 50664 }, { "epoch": 5.203553455889905, "grad_norm": 0.06186600774526596, "learning_rate": 0.01, "loss": 2.022, "step": 50667 }, { "epoch": 5.203861559001746, "grad_norm": 0.09473706781864166, "learning_rate": 0.01, "loss": 1.9982, "step": 50670 }, { "epoch": 5.204169662113587, "grad_norm": 0.05376753211021423, "learning_rate": 0.01, "loss": 1.9948, "step": 50673 }, { "epoch": 5.204477765225429, "grad_norm": 0.08271234482526779, "learning_rate": 0.01, "loss": 2.0173, "step": 50676 }, { "epoch": 5.20478586833727, "grad_norm": 0.07704611122608185, "learning_rate": 0.01, "loss": 1.9843, "step": 50679 }, { "epoch": 5.205093971449112, "grad_norm": 0.0716482475399971, "learning_rate": 0.01, "loss": 1.9891, "step": 50682 }, { "epoch": 5.205402074560953, "grad_norm": 0.07560107111930847, "learning_rate": 0.01, "loss": 2.0152, "step": 50685 }, { "epoch": 5.2057101776727945, "grad_norm": 0.07398194819688797, "learning_rate": 0.01, "loss": 2.0095, "step": 50688 }, { "epoch": 5.206018280784636, "grad_norm": 0.09211590141057968, "learning_rate": 0.01, "loss": 1.9672, "step": 50691 }, { "epoch": 5.206326383896477, "grad_norm": 0.0511045902967453, "learning_rate": 0.01, "loss": 1.9978, "step": 50694 }, { "epoch": 5.206634487008319, "grad_norm": 0.046343546360731125, "learning_rate": 0.01, "loss": 1.989, "step": 50697 }, { "epoch": 5.2069425901201605, "grad_norm": 0.0472230389714241, "learning_rate": 0.01, "loss": 1.9947, "step": 50700 }, { "epoch": 5.207250693232002, "grad_norm": 0.09529436379671097, "learning_rate": 0.01, "loss": 2.0117, "step": 50703 }, { "epoch": 5.207558796343843, "grad_norm": 0.15323568880558014, "learning_rate": 0.01, "loss": 2.0082, "step": 50706 }, { "epoch": 5.207866899455684, "grad_norm": 0.1495465189218521, "learning_rate": 0.01, "loss": 2.0021, "step": 50709 }, { "epoch": 5.2081750025675255, "grad_norm": 0.09416273981332779, "learning_rate": 0.01, "loss": 1.9982, "step": 50712 }, { "epoch": 5.208483105679368, "grad_norm": 0.05051533132791519, "learning_rate": 0.01, "loss": 1.9977, "step": 50715 }, { "epoch": 5.208791208791209, "grad_norm": 0.03743917867541313, "learning_rate": 0.01, "loss": 1.9872, "step": 50718 }, { "epoch": 5.20909931190305, "grad_norm": 0.04133860394358635, "learning_rate": 0.01, "loss": 1.9981, "step": 50721 }, { "epoch": 5.2094074150148915, "grad_norm": 0.05624905601143837, "learning_rate": 0.01, "loss": 2.0213, "step": 50724 }, { "epoch": 5.209715518126733, "grad_norm": 0.04526040330529213, "learning_rate": 0.01, "loss": 2.0013, "step": 50727 }, { "epoch": 5.210023621238575, "grad_norm": 0.06159577891230583, "learning_rate": 0.01, "loss": 1.9786, "step": 50730 }, { "epoch": 5.210331724350416, "grad_norm": 0.10257083177566528, "learning_rate": 0.01, "loss": 1.9977, "step": 50733 }, { "epoch": 5.210639827462257, "grad_norm": 0.0896536186337471, "learning_rate": 0.01, "loss": 1.9878, "step": 50736 }, { "epoch": 5.210947930574099, "grad_norm": 0.0499761626124382, "learning_rate": 0.01, "loss": 1.9854, "step": 50739 }, { "epoch": 5.21125603368594, "grad_norm": 0.044219888746738434, "learning_rate": 0.01, "loss": 2.0012, "step": 50742 }, { "epoch": 5.211564136797782, "grad_norm": 0.056992996484041214, "learning_rate": 0.01, "loss": 2.0009, "step": 50745 }, { "epoch": 5.211872239909623, "grad_norm": 0.10208120197057724, "learning_rate": 0.01, "loss": 2.014, "step": 50748 }, { "epoch": 5.212180343021465, "grad_norm": 0.07243528962135315, "learning_rate": 0.01, "loss": 2.0177, "step": 50751 }, { "epoch": 5.212488446133306, "grad_norm": 0.04413043335080147, "learning_rate": 0.01, "loss": 1.996, "step": 50754 }, { "epoch": 5.212796549245147, "grad_norm": 0.09615755081176758, "learning_rate": 0.01, "loss": 1.9998, "step": 50757 }, { "epoch": 5.213104652356988, "grad_norm": 0.04845009744167328, "learning_rate": 0.01, "loss": 1.9976, "step": 50760 }, { "epoch": 5.2134127554688305, "grad_norm": 0.12175197154283524, "learning_rate": 0.01, "loss": 2.0204, "step": 50763 }, { "epoch": 5.213720858580672, "grad_norm": 0.045076884329319, "learning_rate": 0.01, "loss": 1.9739, "step": 50766 }, { "epoch": 5.214028961692513, "grad_norm": 0.07600415498018265, "learning_rate": 0.01, "loss": 2.0119, "step": 50769 }, { "epoch": 5.214337064804354, "grad_norm": 0.06227222830057144, "learning_rate": 0.01, "loss": 1.9922, "step": 50772 }, { "epoch": 5.214645167916196, "grad_norm": 0.10495235025882721, "learning_rate": 0.01, "loss": 2.0132, "step": 50775 }, { "epoch": 5.214953271028038, "grad_norm": 0.08388978242874146, "learning_rate": 0.01, "loss": 2.018, "step": 50778 }, { "epoch": 5.215261374139879, "grad_norm": 0.07591544091701508, "learning_rate": 0.01, "loss": 2.0234, "step": 50781 }, { "epoch": 5.21556947725172, "grad_norm": 0.07411694526672363, "learning_rate": 0.01, "loss": 2.0082, "step": 50784 }, { "epoch": 5.2158775803635615, "grad_norm": 0.07496411353349686, "learning_rate": 0.01, "loss": 2.0198, "step": 50787 }, { "epoch": 5.216185683475403, "grad_norm": 0.07005130499601364, "learning_rate": 0.01, "loss": 1.9985, "step": 50790 }, { "epoch": 5.216493786587245, "grad_norm": 0.05165226384997368, "learning_rate": 0.01, "loss": 2.0114, "step": 50793 }, { "epoch": 5.216801889699086, "grad_norm": 0.09063982218503952, "learning_rate": 0.01, "loss": 1.9989, "step": 50796 }, { "epoch": 5.2171099928109275, "grad_norm": 0.03189365193247795, "learning_rate": 0.01, "loss": 1.9828, "step": 50799 }, { "epoch": 5.217418095922769, "grad_norm": 0.07051954418420792, "learning_rate": 0.01, "loss": 2.0027, "step": 50802 }, { "epoch": 5.21772619903461, "grad_norm": 0.045339349657297134, "learning_rate": 0.01, "loss": 1.9932, "step": 50805 }, { "epoch": 5.218034302146451, "grad_norm": 0.041392091661691666, "learning_rate": 0.01, "loss": 2.0054, "step": 50808 }, { "epoch": 5.218342405258293, "grad_norm": 0.05115804076194763, "learning_rate": 0.01, "loss": 2.0046, "step": 50811 }, { "epoch": 5.218650508370135, "grad_norm": 0.0681786835193634, "learning_rate": 0.01, "loss": 2.0167, "step": 50814 }, { "epoch": 5.218958611481976, "grad_norm": 0.057782597839832306, "learning_rate": 0.01, "loss": 1.9909, "step": 50817 }, { "epoch": 5.219266714593817, "grad_norm": 0.04151969403028488, "learning_rate": 0.01, "loss": 1.9884, "step": 50820 }, { "epoch": 5.2195748177056585, "grad_norm": 0.06815563142299652, "learning_rate": 0.01, "loss": 2.0215, "step": 50823 }, { "epoch": 5.219882920817501, "grad_norm": 0.1696784943342209, "learning_rate": 0.01, "loss": 2.0014, "step": 50826 }, { "epoch": 5.220191023929342, "grad_norm": 0.04126200079917908, "learning_rate": 0.01, "loss": 1.9838, "step": 50829 }, { "epoch": 5.220499127041183, "grad_norm": 0.03278988599777222, "learning_rate": 0.01, "loss": 2.0102, "step": 50832 }, { "epoch": 5.220807230153024, "grad_norm": 0.0632409155368805, "learning_rate": 0.01, "loss": 2.0014, "step": 50835 }, { "epoch": 5.221115333264866, "grad_norm": 0.06449749320745468, "learning_rate": 0.01, "loss": 2.0045, "step": 50838 }, { "epoch": 5.221423436376708, "grad_norm": 0.04530004784464836, "learning_rate": 0.01, "loss": 1.9978, "step": 50841 }, { "epoch": 5.221731539488549, "grad_norm": 0.06646733731031418, "learning_rate": 0.01, "loss": 1.9972, "step": 50844 }, { "epoch": 5.22203964260039, "grad_norm": 0.040766507387161255, "learning_rate": 0.01, "loss": 1.9718, "step": 50847 }, { "epoch": 5.222347745712232, "grad_norm": 0.04910712689161301, "learning_rate": 0.01, "loss": 1.9944, "step": 50850 }, { "epoch": 5.222655848824073, "grad_norm": 0.03790167346596718, "learning_rate": 0.01, "loss": 1.9806, "step": 50853 }, { "epoch": 5.222963951935915, "grad_norm": 0.04654322564601898, "learning_rate": 0.01, "loss": 1.9981, "step": 50856 }, { "epoch": 5.223272055047756, "grad_norm": 0.05322917550802231, "learning_rate": 0.01, "loss": 1.9919, "step": 50859 }, { "epoch": 5.2235801581595975, "grad_norm": 0.10331053286790848, "learning_rate": 0.01, "loss": 2.0289, "step": 50862 }, { "epoch": 5.223888261271439, "grad_norm": 0.14793960750102997, "learning_rate": 0.01, "loss": 1.9954, "step": 50865 }, { "epoch": 5.22419636438328, "grad_norm": 0.06507808715105057, "learning_rate": 0.01, "loss": 2.0158, "step": 50868 }, { "epoch": 5.224504467495121, "grad_norm": 0.06138547509908676, "learning_rate": 0.01, "loss": 2.0176, "step": 50871 }, { "epoch": 5.2248125706069635, "grad_norm": 0.04522119089961052, "learning_rate": 0.01, "loss": 1.9838, "step": 50874 }, { "epoch": 5.225120673718805, "grad_norm": 0.044236768037080765, "learning_rate": 0.01, "loss": 2.0052, "step": 50877 }, { "epoch": 5.225428776830646, "grad_norm": 0.05313803628087044, "learning_rate": 0.01, "loss": 2.0163, "step": 50880 }, { "epoch": 5.225736879942487, "grad_norm": 0.06283990293741226, "learning_rate": 0.01, "loss": 2.0061, "step": 50883 }, { "epoch": 5.2260449830543285, "grad_norm": 0.05976197123527527, "learning_rate": 0.01, "loss": 1.9953, "step": 50886 }, { "epoch": 5.226353086166171, "grad_norm": 0.10755223780870438, "learning_rate": 0.01, "loss": 1.9934, "step": 50889 }, { "epoch": 5.226661189278012, "grad_norm": 0.040908023715019226, "learning_rate": 0.01, "loss": 1.9898, "step": 50892 }, { "epoch": 5.226969292389853, "grad_norm": 0.08019308000802994, "learning_rate": 0.01, "loss": 1.9709, "step": 50895 }, { "epoch": 5.2272773955016945, "grad_norm": 0.04088309779763222, "learning_rate": 0.01, "loss": 2.0215, "step": 50898 }, { "epoch": 5.227585498613536, "grad_norm": 0.06042852997779846, "learning_rate": 0.01, "loss": 1.9801, "step": 50901 }, { "epoch": 5.227893601725377, "grad_norm": 0.06619790196418762, "learning_rate": 0.01, "loss": 2.0253, "step": 50904 }, { "epoch": 5.228201704837219, "grad_norm": 0.0903916209936142, "learning_rate": 0.01, "loss": 2.0068, "step": 50907 }, { "epoch": 5.22850980794906, "grad_norm": 0.06611988693475723, "learning_rate": 0.01, "loss": 1.9967, "step": 50910 }, { "epoch": 5.228817911060902, "grad_norm": 0.07255574315786362, "learning_rate": 0.01, "loss": 1.9713, "step": 50913 }, { "epoch": 5.229126014172743, "grad_norm": 0.07492070645093918, "learning_rate": 0.01, "loss": 1.9643, "step": 50916 }, { "epoch": 5.229434117284584, "grad_norm": 0.07850334793329239, "learning_rate": 0.01, "loss": 2.0012, "step": 50919 }, { "epoch": 5.229742220396426, "grad_norm": 0.0485299751162529, "learning_rate": 0.01, "loss": 1.999, "step": 50922 }, { "epoch": 5.230050323508268, "grad_norm": 0.09580907970666885, "learning_rate": 0.01, "loss": 2.0075, "step": 50925 }, { "epoch": 5.230358426620109, "grad_norm": 0.10689478367567062, "learning_rate": 0.01, "loss": 2.0018, "step": 50928 }, { "epoch": 5.23066652973195, "grad_norm": 0.08192954957485199, "learning_rate": 0.01, "loss": 2.0266, "step": 50931 }, { "epoch": 5.230974632843791, "grad_norm": 0.06330616772174835, "learning_rate": 0.01, "loss": 1.9731, "step": 50934 }, { "epoch": 5.2312827359556335, "grad_norm": 0.041825070977211, "learning_rate": 0.01, "loss": 2.0054, "step": 50937 }, { "epoch": 5.231590839067475, "grad_norm": 0.04370388388633728, "learning_rate": 0.01, "loss": 1.9912, "step": 50940 }, { "epoch": 5.231898942179316, "grad_norm": 0.039774972945451736, "learning_rate": 0.01, "loss": 1.9827, "step": 50943 }, { "epoch": 5.232207045291157, "grad_norm": 0.06065038964152336, "learning_rate": 0.01, "loss": 1.9772, "step": 50946 }, { "epoch": 5.232515148402999, "grad_norm": 0.1340397149324417, "learning_rate": 0.01, "loss": 1.9971, "step": 50949 }, { "epoch": 5.232823251514841, "grad_norm": 0.05149194970726967, "learning_rate": 0.01, "loss": 2.0259, "step": 50952 }, { "epoch": 5.233131354626682, "grad_norm": 0.043973732739686966, "learning_rate": 0.01, "loss": 1.9863, "step": 50955 }, { "epoch": 5.233439457738523, "grad_norm": 0.034024860709905624, "learning_rate": 0.01, "loss": 1.9867, "step": 50958 }, { "epoch": 5.2337475608503645, "grad_norm": 0.03558748587965965, "learning_rate": 0.01, "loss": 2.0271, "step": 50961 }, { "epoch": 5.234055663962206, "grad_norm": 0.03661567345261574, "learning_rate": 0.01, "loss": 2.0093, "step": 50964 }, { "epoch": 5.234363767074047, "grad_norm": 0.11346925795078278, "learning_rate": 0.01, "loss": 1.9861, "step": 50967 }, { "epoch": 5.234671870185889, "grad_norm": 0.05934759974479675, "learning_rate": 0.01, "loss": 2.0058, "step": 50970 }, { "epoch": 5.2349799732977305, "grad_norm": 0.08264946192502975, "learning_rate": 0.01, "loss": 1.9998, "step": 50973 }, { "epoch": 5.235288076409572, "grad_norm": 0.0470314584672451, "learning_rate": 0.01, "loss": 2.001, "step": 50976 }, { "epoch": 5.235596179521413, "grad_norm": 0.04239676892757416, "learning_rate": 0.01, "loss": 2.0114, "step": 50979 }, { "epoch": 5.235904282633254, "grad_norm": 0.09247265011072159, "learning_rate": 0.01, "loss": 2.0097, "step": 50982 }, { "epoch": 5.236212385745096, "grad_norm": 0.05270032957196236, "learning_rate": 0.01, "loss": 1.9795, "step": 50985 }, { "epoch": 5.236520488856938, "grad_norm": 0.0351402647793293, "learning_rate": 0.01, "loss": 2.0236, "step": 50988 }, { "epoch": 5.236828591968779, "grad_norm": 0.04401613771915436, "learning_rate": 0.01, "loss": 1.9836, "step": 50991 }, { "epoch": 5.23713669508062, "grad_norm": 0.0353761650621891, "learning_rate": 0.01, "loss": 2.0021, "step": 50994 }, { "epoch": 5.2374447981924614, "grad_norm": 0.13582314550876617, "learning_rate": 0.01, "loss": 2.0199, "step": 50997 }, { "epoch": 5.237752901304304, "grad_norm": 0.06481800973415375, "learning_rate": 0.01, "loss": 2.0023, "step": 51000 }, { "epoch": 5.238061004416145, "grad_norm": 0.06499173492193222, "learning_rate": 0.01, "loss": 2.0251, "step": 51003 }, { "epoch": 5.238369107527986, "grad_norm": 0.059027299284935, "learning_rate": 0.01, "loss": 1.9977, "step": 51006 }, { "epoch": 5.238677210639827, "grad_norm": 0.11211716383695602, "learning_rate": 0.01, "loss": 2.0132, "step": 51009 }, { "epoch": 5.238985313751669, "grad_norm": 0.041850294917821884, "learning_rate": 0.01, "loss": 1.9843, "step": 51012 }, { "epoch": 5.23929341686351, "grad_norm": 0.11037513613700867, "learning_rate": 0.01, "loss": 2.0258, "step": 51015 }, { "epoch": 5.239601519975352, "grad_norm": 0.06720243394374847, "learning_rate": 0.01, "loss": 1.9741, "step": 51018 }, { "epoch": 5.239909623087193, "grad_norm": 0.05992870405316353, "learning_rate": 0.01, "loss": 2.0079, "step": 51021 }, { "epoch": 5.240217726199035, "grad_norm": 0.046941906213760376, "learning_rate": 0.01, "loss": 1.9906, "step": 51024 }, { "epoch": 5.240525829310876, "grad_norm": 0.0415794812142849, "learning_rate": 0.01, "loss": 1.9986, "step": 51027 }, { "epoch": 5.240833932422717, "grad_norm": 0.046740952879190445, "learning_rate": 0.01, "loss": 1.9767, "step": 51030 }, { "epoch": 5.241142035534559, "grad_norm": 0.04328366369009018, "learning_rate": 0.01, "loss": 2.0068, "step": 51033 }, { "epoch": 5.2414501386464005, "grad_norm": 0.10686604678630829, "learning_rate": 0.01, "loss": 2.0091, "step": 51036 }, { "epoch": 5.241758241758242, "grad_norm": 0.0445748046040535, "learning_rate": 0.01, "loss": 1.9851, "step": 51039 }, { "epoch": 5.242066344870083, "grad_norm": 0.04299461841583252, "learning_rate": 0.01, "loss": 1.9822, "step": 51042 }, { "epoch": 5.242374447981924, "grad_norm": 0.06391250342130661, "learning_rate": 0.01, "loss": 2.0105, "step": 51045 }, { "epoch": 5.2426825510937665, "grad_norm": 0.03528610244393349, "learning_rate": 0.01, "loss": 2.0047, "step": 51048 }, { "epoch": 5.242990654205608, "grad_norm": 0.030670564621686935, "learning_rate": 0.01, "loss": 1.9809, "step": 51051 }, { "epoch": 5.243298757317449, "grad_norm": 0.11291905492544174, "learning_rate": 0.01, "loss": 2.0204, "step": 51054 }, { "epoch": 5.24360686042929, "grad_norm": 0.08508855104446411, "learning_rate": 0.01, "loss": 1.9977, "step": 51057 }, { "epoch": 5.2439149635411315, "grad_norm": 0.1319185048341751, "learning_rate": 0.01, "loss": 2.0088, "step": 51060 }, { "epoch": 5.244223066652973, "grad_norm": 0.08218291401863098, "learning_rate": 0.01, "loss": 2.0002, "step": 51063 }, { "epoch": 5.244531169764815, "grad_norm": 0.06311830878257751, "learning_rate": 0.01, "loss": 2.0104, "step": 51066 }, { "epoch": 5.244839272876656, "grad_norm": 0.03945956006646156, "learning_rate": 0.01, "loss": 1.9672, "step": 51069 }, { "epoch": 5.2451473759884975, "grad_norm": 0.045635320246219635, "learning_rate": 0.01, "loss": 2.0018, "step": 51072 }, { "epoch": 5.245455479100339, "grad_norm": 0.047721873968839645, "learning_rate": 0.01, "loss": 1.9753, "step": 51075 }, { "epoch": 5.24576358221218, "grad_norm": 0.04936512932181358, "learning_rate": 0.01, "loss": 1.9911, "step": 51078 }, { "epoch": 5.246071685324022, "grad_norm": 0.05856813117861748, "learning_rate": 0.01, "loss": 2.0055, "step": 51081 }, { "epoch": 5.246379788435863, "grad_norm": 0.05068356543779373, "learning_rate": 0.01, "loss": 1.9823, "step": 51084 }, { "epoch": 5.246687891547705, "grad_norm": 0.05900732800364494, "learning_rate": 0.01, "loss": 1.992, "step": 51087 }, { "epoch": 5.246995994659546, "grad_norm": 0.12454086542129517, "learning_rate": 0.01, "loss": 2.02, "step": 51090 }, { "epoch": 5.247304097771387, "grad_norm": 0.037892140448093414, "learning_rate": 0.01, "loss": 2.0078, "step": 51093 }, { "epoch": 5.247612200883229, "grad_norm": 0.07839351892471313, "learning_rate": 0.01, "loss": 1.9993, "step": 51096 }, { "epoch": 5.247920303995071, "grad_norm": 0.04203120246529579, "learning_rate": 0.01, "loss": 1.9974, "step": 51099 }, { "epoch": 5.248228407106912, "grad_norm": 0.07562438398599625, "learning_rate": 0.01, "loss": 1.9896, "step": 51102 }, { "epoch": 5.248536510218753, "grad_norm": 0.044604718685150146, "learning_rate": 0.01, "loss": 1.9879, "step": 51105 }, { "epoch": 5.248844613330594, "grad_norm": 0.09438357502222061, "learning_rate": 0.01, "loss": 2.0028, "step": 51108 }, { "epoch": 5.249152716442436, "grad_norm": 0.04773309826850891, "learning_rate": 0.01, "loss": 1.9631, "step": 51111 }, { "epoch": 5.249460819554278, "grad_norm": 0.033143848180770874, "learning_rate": 0.01, "loss": 1.9766, "step": 51114 }, { "epoch": 5.249768922666119, "grad_norm": 0.0772877037525177, "learning_rate": 0.01, "loss": 1.9921, "step": 51117 }, { "epoch": 5.25007702577796, "grad_norm": 0.054028406739234924, "learning_rate": 0.01, "loss": 2.0047, "step": 51120 }, { "epoch": 5.250385128889802, "grad_norm": 0.07667548954486847, "learning_rate": 0.01, "loss": 2.0012, "step": 51123 }, { "epoch": 5.250693232001643, "grad_norm": 0.06274887174367905, "learning_rate": 0.01, "loss": 1.9972, "step": 51126 }, { "epoch": 5.251001335113485, "grad_norm": 0.09405583888292313, "learning_rate": 0.01, "loss": 1.9902, "step": 51129 }, { "epoch": 5.251309438225326, "grad_norm": 0.06297732889652252, "learning_rate": 0.01, "loss": 1.9995, "step": 51132 }, { "epoch": 5.2516175413371675, "grad_norm": 0.08258040994405746, "learning_rate": 0.01, "loss": 1.9871, "step": 51135 }, { "epoch": 5.251925644449009, "grad_norm": 0.11019886285066605, "learning_rate": 0.01, "loss": 1.9902, "step": 51138 }, { "epoch": 5.25223374756085, "grad_norm": 0.06206972151994705, "learning_rate": 0.01, "loss": 2.0086, "step": 51141 }, { "epoch": 5.252541850672692, "grad_norm": 0.045681048184633255, "learning_rate": 0.01, "loss": 1.9874, "step": 51144 }, { "epoch": 5.2528499537845335, "grad_norm": 0.041622232645750046, "learning_rate": 0.01, "loss": 2.0125, "step": 51147 }, { "epoch": 5.253158056896375, "grad_norm": 0.05589982122182846, "learning_rate": 0.01, "loss": 2.0137, "step": 51150 }, { "epoch": 5.253466160008216, "grad_norm": 0.06394509226083755, "learning_rate": 0.01, "loss": 1.9974, "step": 51153 }, { "epoch": 5.253774263120057, "grad_norm": 0.05354580655694008, "learning_rate": 0.01, "loss": 1.9937, "step": 51156 }, { "epoch": 5.2540823662318985, "grad_norm": 0.06941652297973633, "learning_rate": 0.01, "loss": 2.0012, "step": 51159 }, { "epoch": 5.254390469343741, "grad_norm": 0.0771641731262207, "learning_rate": 0.01, "loss": 1.9945, "step": 51162 }, { "epoch": 5.254698572455582, "grad_norm": 0.08898761868476868, "learning_rate": 0.01, "loss": 2.0024, "step": 51165 }, { "epoch": 5.255006675567423, "grad_norm": 0.03489289805293083, "learning_rate": 0.01, "loss": 1.992, "step": 51168 }, { "epoch": 5.255314778679264, "grad_norm": 0.0783892497420311, "learning_rate": 0.01, "loss": 1.9831, "step": 51171 }, { "epoch": 5.255622881791106, "grad_norm": 0.08177445828914642, "learning_rate": 0.01, "loss": 2.01, "step": 51174 }, { "epoch": 5.255930984902948, "grad_norm": 0.04709051921963692, "learning_rate": 0.01, "loss": 1.9794, "step": 51177 }, { "epoch": 5.256239088014789, "grad_norm": 0.13084626197814941, "learning_rate": 0.01, "loss": 2.0024, "step": 51180 }, { "epoch": 5.25654719112663, "grad_norm": 0.04432738944888115, "learning_rate": 0.01, "loss": 1.9756, "step": 51183 }, { "epoch": 5.256855294238472, "grad_norm": 0.053161200135946274, "learning_rate": 0.01, "loss": 1.9859, "step": 51186 }, { "epoch": 5.257163397350313, "grad_norm": 0.039357706904411316, "learning_rate": 0.01, "loss": 1.9972, "step": 51189 }, { "epoch": 5.257471500462155, "grad_norm": 0.052901457995176315, "learning_rate": 0.01, "loss": 1.9953, "step": 51192 }, { "epoch": 5.257779603573996, "grad_norm": 0.11097205430269241, "learning_rate": 0.01, "loss": 2.0029, "step": 51195 }, { "epoch": 5.258087706685838, "grad_norm": 0.07301553338766098, "learning_rate": 0.01, "loss": 2.0066, "step": 51198 }, { "epoch": 5.258395809797679, "grad_norm": 0.17759805917739868, "learning_rate": 0.01, "loss": 2.0266, "step": 51201 }, { "epoch": 5.25870391290952, "grad_norm": 0.07599959522485733, "learning_rate": 0.01, "loss": 1.982, "step": 51204 }, { "epoch": 5.259012016021362, "grad_norm": 0.13010208308696747, "learning_rate": 0.01, "loss": 2.0013, "step": 51207 }, { "epoch": 5.2593201191332035, "grad_norm": 0.07105369120836258, "learning_rate": 0.01, "loss": 2.0059, "step": 51210 }, { "epoch": 5.259628222245045, "grad_norm": 0.16369540989398956, "learning_rate": 0.01, "loss": 2.0047, "step": 51213 }, { "epoch": 5.259936325356886, "grad_norm": 0.11588908731937408, "learning_rate": 0.01, "loss": 1.9934, "step": 51216 }, { "epoch": 5.260244428468727, "grad_norm": 0.045902661979198456, "learning_rate": 0.01, "loss": 1.9742, "step": 51219 }, { "epoch": 5.260552531580569, "grad_norm": 0.04714805632829666, "learning_rate": 0.01, "loss": 2.0054, "step": 51222 }, { "epoch": 5.260860634692411, "grad_norm": 0.05099105089902878, "learning_rate": 0.01, "loss": 1.9777, "step": 51225 }, { "epoch": 5.261168737804252, "grad_norm": 0.050430044531822205, "learning_rate": 0.01, "loss": 1.9919, "step": 51228 }, { "epoch": 5.261476840916093, "grad_norm": 0.03998453542590141, "learning_rate": 0.01, "loss": 2.0175, "step": 51231 }, { "epoch": 5.2617849440279345, "grad_norm": 0.05709048733115196, "learning_rate": 0.01, "loss": 1.9877, "step": 51234 }, { "epoch": 5.262093047139776, "grad_norm": 0.04667646437883377, "learning_rate": 0.01, "loss": 1.9854, "step": 51237 }, { "epoch": 5.262401150251618, "grad_norm": 0.0413067601621151, "learning_rate": 0.01, "loss": 2.0118, "step": 51240 }, { "epoch": 5.262709253363459, "grad_norm": 0.037300482392311096, "learning_rate": 0.01, "loss": 2.0181, "step": 51243 }, { "epoch": 5.2630173564753004, "grad_norm": 0.10188855230808258, "learning_rate": 0.01, "loss": 1.9705, "step": 51246 }, { "epoch": 5.263325459587142, "grad_norm": 0.060019396245479584, "learning_rate": 0.01, "loss": 2.0153, "step": 51249 }, { "epoch": 5.263633562698983, "grad_norm": 0.07587499171495438, "learning_rate": 0.01, "loss": 2.0006, "step": 51252 }, { "epoch": 5.263941665810824, "grad_norm": 0.17544515430927277, "learning_rate": 0.01, "loss": 1.9913, "step": 51255 }, { "epoch": 5.264249768922666, "grad_norm": 0.046111736446619034, "learning_rate": 0.01, "loss": 2.0078, "step": 51258 }, { "epoch": 5.264557872034508, "grad_norm": 0.03697605058550835, "learning_rate": 0.01, "loss": 2.0069, "step": 51261 }, { "epoch": 5.264865975146349, "grad_norm": 0.04376983270049095, "learning_rate": 0.01, "loss": 1.9866, "step": 51264 }, { "epoch": 5.26517407825819, "grad_norm": 0.06879153102636337, "learning_rate": 0.01, "loss": 1.9983, "step": 51267 }, { "epoch": 5.265482181370031, "grad_norm": 0.05067580193281174, "learning_rate": 0.01, "loss": 1.987, "step": 51270 }, { "epoch": 5.265790284481874, "grad_norm": 0.04876921325922012, "learning_rate": 0.01, "loss": 2.0042, "step": 51273 }, { "epoch": 5.266098387593715, "grad_norm": 0.08174460381269455, "learning_rate": 0.01, "loss": 1.9959, "step": 51276 }, { "epoch": 5.266406490705556, "grad_norm": 0.08350025117397308, "learning_rate": 0.01, "loss": 1.9784, "step": 51279 }, { "epoch": 5.266714593817397, "grad_norm": 0.10765060782432556, "learning_rate": 0.01, "loss": 1.9853, "step": 51282 }, { "epoch": 5.267022696929239, "grad_norm": 0.09596231579780579, "learning_rate": 0.01, "loss": 1.9896, "step": 51285 }, { "epoch": 5.267330800041081, "grad_norm": 0.03738878667354584, "learning_rate": 0.01, "loss": 1.9804, "step": 51288 }, { "epoch": 5.267638903152922, "grad_norm": 0.03415136784315109, "learning_rate": 0.01, "loss": 1.9909, "step": 51291 }, { "epoch": 5.267947006264763, "grad_norm": 0.04650856927037239, "learning_rate": 0.01, "loss": 2.0169, "step": 51294 }, { "epoch": 5.268255109376605, "grad_norm": 0.06548796594142914, "learning_rate": 0.01, "loss": 1.9862, "step": 51297 }, { "epoch": 5.268563212488446, "grad_norm": 0.05420374125242233, "learning_rate": 0.01, "loss": 1.9837, "step": 51300 }, { "epoch": 5.268871315600288, "grad_norm": 0.10747869312763214, "learning_rate": 0.01, "loss": 2.0013, "step": 51303 }, { "epoch": 5.269179418712129, "grad_norm": 0.03331341966986656, "learning_rate": 0.01, "loss": 1.9997, "step": 51306 }, { "epoch": 5.2694875218239705, "grad_norm": 0.10144448280334473, "learning_rate": 0.01, "loss": 2.0065, "step": 51309 }, { "epoch": 5.269795624935812, "grad_norm": 0.08695587515830994, "learning_rate": 0.01, "loss": 2.0069, "step": 51312 }, { "epoch": 5.270103728047653, "grad_norm": 0.053817540407180786, "learning_rate": 0.01, "loss": 2.026, "step": 51315 }, { "epoch": 5.270411831159494, "grad_norm": 0.04269054904580116, "learning_rate": 0.01, "loss": 2.0195, "step": 51318 }, { "epoch": 5.2707199342713364, "grad_norm": 0.048786550760269165, "learning_rate": 0.01, "loss": 1.9877, "step": 51321 }, { "epoch": 5.271028037383178, "grad_norm": 0.0634552389383316, "learning_rate": 0.01, "loss": 2.0173, "step": 51324 }, { "epoch": 5.271336140495019, "grad_norm": 0.14295388758182526, "learning_rate": 0.01, "loss": 2.0062, "step": 51327 }, { "epoch": 5.27164424360686, "grad_norm": 0.05080576241016388, "learning_rate": 0.01, "loss": 1.9895, "step": 51330 }, { "epoch": 5.2719523467187015, "grad_norm": 0.06417543441057205, "learning_rate": 0.01, "loss": 1.9843, "step": 51333 }, { "epoch": 5.272260449830544, "grad_norm": 0.06565088778734207, "learning_rate": 0.01, "loss": 1.9925, "step": 51336 }, { "epoch": 5.272568552942385, "grad_norm": 0.028631992638111115, "learning_rate": 0.01, "loss": 1.9949, "step": 51339 }, { "epoch": 5.272876656054226, "grad_norm": 0.10242610424757004, "learning_rate": 0.01, "loss": 1.9824, "step": 51342 }, { "epoch": 5.273184759166067, "grad_norm": 0.11979396641254425, "learning_rate": 0.01, "loss": 2.0258, "step": 51345 }, { "epoch": 5.273492862277909, "grad_norm": 0.07476961612701416, "learning_rate": 0.01, "loss": 2.0282, "step": 51348 }, { "epoch": 5.273800965389751, "grad_norm": 0.05603867769241333, "learning_rate": 0.01, "loss": 1.991, "step": 51351 }, { "epoch": 5.274109068501592, "grad_norm": 0.04897085204720497, "learning_rate": 0.01, "loss": 2.0187, "step": 51354 }, { "epoch": 5.274417171613433, "grad_norm": 0.03439117968082428, "learning_rate": 0.01, "loss": 1.9989, "step": 51357 }, { "epoch": 5.274725274725275, "grad_norm": 0.08314791321754456, "learning_rate": 0.01, "loss": 2.0117, "step": 51360 }, { "epoch": 5.275033377837116, "grad_norm": 0.0685926079750061, "learning_rate": 0.01, "loss": 1.9787, "step": 51363 }, { "epoch": 5.275341480948958, "grad_norm": 0.088614821434021, "learning_rate": 0.01, "loss": 1.9949, "step": 51366 }, { "epoch": 5.275649584060799, "grad_norm": 0.03873438388109207, "learning_rate": 0.01, "loss": 2.001, "step": 51369 }, { "epoch": 5.275957687172641, "grad_norm": 0.11569857597351074, "learning_rate": 0.01, "loss": 2.0017, "step": 51372 }, { "epoch": 5.276265790284482, "grad_norm": 0.11536852270364761, "learning_rate": 0.01, "loss": 2.024, "step": 51375 }, { "epoch": 5.276573893396323, "grad_norm": 0.040931250900030136, "learning_rate": 0.01, "loss": 2.0085, "step": 51378 }, { "epoch": 5.276881996508164, "grad_norm": 0.13397638499736786, "learning_rate": 0.01, "loss": 2.0248, "step": 51381 }, { "epoch": 5.2771900996200065, "grad_norm": 0.045118771493434906, "learning_rate": 0.01, "loss": 1.9991, "step": 51384 }, { "epoch": 5.277498202731848, "grad_norm": 0.04626978933811188, "learning_rate": 0.01, "loss": 2.0161, "step": 51387 }, { "epoch": 5.277806305843689, "grad_norm": 0.03898259624838829, "learning_rate": 0.01, "loss": 1.9965, "step": 51390 }, { "epoch": 5.27811440895553, "grad_norm": 0.14309214055538177, "learning_rate": 0.01, "loss": 2.0201, "step": 51393 }, { "epoch": 5.278422512067372, "grad_norm": 0.11283177882432938, "learning_rate": 0.01, "loss": 1.986, "step": 51396 }, { "epoch": 5.278730615179214, "grad_norm": 0.07250397652387619, "learning_rate": 0.01, "loss": 2.0112, "step": 51399 }, { "epoch": 5.279038718291055, "grad_norm": 0.04467407613992691, "learning_rate": 0.01, "loss": 1.9964, "step": 51402 }, { "epoch": 5.279346821402896, "grad_norm": 0.03758449852466583, "learning_rate": 0.01, "loss": 2.0086, "step": 51405 }, { "epoch": 5.2796549245147375, "grad_norm": 0.04290134087204933, "learning_rate": 0.01, "loss": 1.9711, "step": 51408 }, { "epoch": 5.279963027626579, "grad_norm": 0.21099735796451569, "learning_rate": 0.01, "loss": 2.0002, "step": 51411 }, { "epoch": 5.28027113073842, "grad_norm": 0.10676965117454529, "learning_rate": 0.01, "loss": 2.006, "step": 51414 }, { "epoch": 5.280579233850262, "grad_norm": 0.11726178228855133, "learning_rate": 0.01, "loss": 2.0066, "step": 51417 }, { "epoch": 5.280887336962103, "grad_norm": 0.06197341904044151, "learning_rate": 0.01, "loss": 2.0134, "step": 51420 }, { "epoch": 5.281195440073945, "grad_norm": 0.04718432575464249, "learning_rate": 0.01, "loss": 2.0004, "step": 51423 }, { "epoch": 5.281503543185786, "grad_norm": 0.049280427396297455, "learning_rate": 0.01, "loss": 1.9822, "step": 51426 }, { "epoch": 5.281811646297627, "grad_norm": 0.036890413612127304, "learning_rate": 0.01, "loss": 1.9858, "step": 51429 }, { "epoch": 5.282119749409469, "grad_norm": 0.03962325677275658, "learning_rate": 0.01, "loss": 1.9882, "step": 51432 }, { "epoch": 5.282427852521311, "grad_norm": 0.03786443546414375, "learning_rate": 0.01, "loss": 1.9874, "step": 51435 }, { "epoch": 5.282735955633152, "grad_norm": 0.06800255179405212, "learning_rate": 0.01, "loss": 1.965, "step": 51438 }, { "epoch": 5.283044058744993, "grad_norm": 0.1254369616508484, "learning_rate": 0.01, "loss": 1.9755, "step": 51441 }, { "epoch": 5.283352161856834, "grad_norm": 0.10241379588842392, "learning_rate": 0.01, "loss": 1.9908, "step": 51444 }, { "epoch": 5.283660264968677, "grad_norm": 0.07036352902650833, "learning_rate": 0.01, "loss": 1.986, "step": 51447 }, { "epoch": 5.283968368080518, "grad_norm": 0.04033416137099266, "learning_rate": 0.01, "loss": 1.9962, "step": 51450 }, { "epoch": 5.284276471192359, "grad_norm": 0.03396594151854515, "learning_rate": 0.01, "loss": 1.9944, "step": 51453 }, { "epoch": 5.2845845743042, "grad_norm": 0.038950271904468536, "learning_rate": 0.01, "loss": 1.99, "step": 51456 }, { "epoch": 5.284892677416042, "grad_norm": 0.12410634011030197, "learning_rate": 0.01, "loss": 1.9932, "step": 51459 }, { "epoch": 5.285200780527884, "grad_norm": 0.08871324360370636, "learning_rate": 0.01, "loss": 2.0303, "step": 51462 }, { "epoch": 5.285508883639725, "grad_norm": 0.06051253527402878, "learning_rate": 0.01, "loss": 2.0056, "step": 51465 }, { "epoch": 5.285816986751566, "grad_norm": 0.08062247186899185, "learning_rate": 0.01, "loss": 2.0108, "step": 51468 }, { "epoch": 5.286125089863408, "grad_norm": 0.09288395196199417, "learning_rate": 0.01, "loss": 2.0187, "step": 51471 }, { "epoch": 5.286433192975249, "grad_norm": 0.11983957886695862, "learning_rate": 0.01, "loss": 1.9799, "step": 51474 }, { "epoch": 5.28674129608709, "grad_norm": 0.12828132510185242, "learning_rate": 0.01, "loss": 2.0113, "step": 51477 }, { "epoch": 5.287049399198932, "grad_norm": 0.09051026403903961, "learning_rate": 0.01, "loss": 2.0068, "step": 51480 }, { "epoch": 5.2873575023107735, "grad_norm": 0.08622095733880997, "learning_rate": 0.01, "loss": 1.9865, "step": 51483 }, { "epoch": 5.287665605422615, "grad_norm": 0.0727437287569046, "learning_rate": 0.01, "loss": 1.9938, "step": 51486 }, { "epoch": 5.287973708534456, "grad_norm": 0.052949920296669006, "learning_rate": 0.01, "loss": 1.9779, "step": 51489 }, { "epoch": 5.288281811646297, "grad_norm": 0.04807107150554657, "learning_rate": 0.01, "loss": 2.0071, "step": 51492 }, { "epoch": 5.288589914758139, "grad_norm": 0.12251218408346176, "learning_rate": 0.01, "loss": 1.9911, "step": 51495 }, { "epoch": 5.288898017869981, "grad_norm": 0.07985778152942657, "learning_rate": 0.01, "loss": 2.0055, "step": 51498 }, { "epoch": 5.289206120981822, "grad_norm": 0.0659778043627739, "learning_rate": 0.01, "loss": 1.9511, "step": 51501 }, { "epoch": 5.289514224093663, "grad_norm": 0.0646664947271347, "learning_rate": 0.01, "loss": 1.9754, "step": 51504 }, { "epoch": 5.2898223272055045, "grad_norm": 0.05297090485692024, "learning_rate": 0.01, "loss": 2.0091, "step": 51507 }, { "epoch": 5.290130430317346, "grad_norm": 0.039955638349056244, "learning_rate": 0.01, "loss": 2.0142, "step": 51510 }, { "epoch": 5.290438533429188, "grad_norm": 0.03601599857211113, "learning_rate": 0.01, "loss": 2.0147, "step": 51513 }, { "epoch": 5.290746636541029, "grad_norm": 0.03554227575659752, "learning_rate": 0.01, "loss": 1.9824, "step": 51516 }, { "epoch": 5.29105473965287, "grad_norm": 0.0429302453994751, "learning_rate": 0.01, "loss": 1.996, "step": 51519 }, { "epoch": 5.291362842764712, "grad_norm": 0.07092410326004028, "learning_rate": 0.01, "loss": 1.9969, "step": 51522 }, { "epoch": 5.291670945876553, "grad_norm": 0.09126897156238556, "learning_rate": 0.01, "loss": 2.01, "step": 51525 }, { "epoch": 5.291979048988395, "grad_norm": 0.08033829182386398, "learning_rate": 0.01, "loss": 2.0035, "step": 51528 }, { "epoch": 5.292287152100236, "grad_norm": 0.0636729821562767, "learning_rate": 0.01, "loss": 1.985, "step": 51531 }, { "epoch": 5.292595255212078, "grad_norm": 0.05228522792458534, "learning_rate": 0.01, "loss": 1.9959, "step": 51534 }, { "epoch": 5.292903358323919, "grad_norm": 0.04770840331912041, "learning_rate": 0.01, "loss": 1.9626, "step": 51537 }, { "epoch": 5.29321146143576, "grad_norm": 0.06592493504285812, "learning_rate": 0.01, "loss": 2.0068, "step": 51540 }, { "epoch": 5.293519564547602, "grad_norm": 0.043730318546295166, "learning_rate": 0.01, "loss": 2.0128, "step": 51543 }, { "epoch": 5.293827667659444, "grad_norm": 0.11756035685539246, "learning_rate": 0.01, "loss": 2.015, "step": 51546 }, { "epoch": 5.294135770771285, "grad_norm": 0.05374009907245636, "learning_rate": 0.01, "loss": 1.978, "step": 51549 }, { "epoch": 5.294443873883126, "grad_norm": 0.03395754098892212, "learning_rate": 0.01, "loss": 1.992, "step": 51552 }, { "epoch": 5.294751976994967, "grad_norm": 0.04565683752298355, "learning_rate": 0.01, "loss": 1.9933, "step": 51555 }, { "epoch": 5.2950600801068095, "grad_norm": 0.06022532284259796, "learning_rate": 0.01, "loss": 2.0165, "step": 51558 }, { "epoch": 5.295368183218651, "grad_norm": 0.03321307152509689, "learning_rate": 0.01, "loss": 1.9962, "step": 51561 }, { "epoch": 5.295676286330492, "grad_norm": 0.09450728446245193, "learning_rate": 0.01, "loss": 2.0153, "step": 51564 }, { "epoch": 5.295984389442333, "grad_norm": 0.07322457432746887, "learning_rate": 0.01, "loss": 2.0003, "step": 51567 }, { "epoch": 5.296292492554175, "grad_norm": 0.07152074575424194, "learning_rate": 0.01, "loss": 1.9916, "step": 51570 }, { "epoch": 5.296600595666016, "grad_norm": 0.09634838998317719, "learning_rate": 0.01, "loss": 1.9888, "step": 51573 }, { "epoch": 5.296908698777858, "grad_norm": 0.055339280515909195, "learning_rate": 0.01, "loss": 1.9801, "step": 51576 }, { "epoch": 5.297216801889699, "grad_norm": 0.03655093163251877, "learning_rate": 0.01, "loss": 1.9844, "step": 51579 }, { "epoch": 5.2975249050015405, "grad_norm": 0.04170520231127739, "learning_rate": 0.01, "loss": 1.989, "step": 51582 }, { "epoch": 5.297833008113382, "grad_norm": 0.04089641198515892, "learning_rate": 0.01, "loss": 1.9996, "step": 51585 }, { "epoch": 5.298141111225223, "grad_norm": 0.03209071606397629, "learning_rate": 0.01, "loss": 1.9937, "step": 51588 }, { "epoch": 5.298449214337065, "grad_norm": 0.040264058858156204, "learning_rate": 0.01, "loss": 1.9813, "step": 51591 }, { "epoch": 5.298757317448906, "grad_norm": 0.040750179439783096, "learning_rate": 0.01, "loss": 1.9884, "step": 51594 }, { "epoch": 5.299065420560748, "grad_norm": 0.09608472883701324, "learning_rate": 0.01, "loss": 2.025, "step": 51597 }, { "epoch": 5.299373523672589, "grad_norm": 0.0965270921587944, "learning_rate": 0.01, "loss": 2.0181, "step": 51600 }, { "epoch": 5.29968162678443, "grad_norm": 0.07110472768545151, "learning_rate": 0.01, "loss": 1.9925, "step": 51603 }, { "epoch": 5.2999897298962715, "grad_norm": 0.08404947817325592, "learning_rate": 0.01, "loss": 2.0145, "step": 51606 }, { "epoch": 5.300297833008114, "grad_norm": 0.04731042683124542, "learning_rate": 0.01, "loss": 1.9832, "step": 51609 }, { "epoch": 5.300605936119955, "grad_norm": 0.045290857553482056, "learning_rate": 0.01, "loss": 2.0133, "step": 51612 }, { "epoch": 5.300914039231796, "grad_norm": 0.04302288219332695, "learning_rate": 0.01, "loss": 2.016, "step": 51615 }, { "epoch": 5.301222142343637, "grad_norm": 0.04258449375629425, "learning_rate": 0.01, "loss": 2.0119, "step": 51618 }, { "epoch": 5.301530245455479, "grad_norm": 0.04786079004406929, "learning_rate": 0.01, "loss": 2.0067, "step": 51621 }, { "epoch": 5.301838348567321, "grad_norm": 0.14586381614208221, "learning_rate": 0.01, "loss": 1.994, "step": 51624 }, { "epoch": 5.302146451679162, "grad_norm": 0.06271131336688995, "learning_rate": 0.01, "loss": 1.9983, "step": 51627 }, { "epoch": 5.302454554791003, "grad_norm": 0.05278899893164635, "learning_rate": 0.01, "loss": 1.9956, "step": 51630 }, { "epoch": 5.302762657902845, "grad_norm": 0.04671400785446167, "learning_rate": 0.01, "loss": 1.9911, "step": 51633 }, { "epoch": 5.303070761014686, "grad_norm": 0.07106892019510269, "learning_rate": 0.01, "loss": 2.0101, "step": 51636 }, { "epoch": 5.303378864126528, "grad_norm": 0.031581420451402664, "learning_rate": 0.01, "loss": 2.0088, "step": 51639 }, { "epoch": 5.303686967238369, "grad_norm": 0.04924008622765541, "learning_rate": 0.01, "loss": 1.9775, "step": 51642 }, { "epoch": 5.303995070350211, "grad_norm": 0.11483835428953171, "learning_rate": 0.01, "loss": 1.9781, "step": 51645 }, { "epoch": 5.304303173462052, "grad_norm": 0.07446176558732986, "learning_rate": 0.01, "loss": 2.0171, "step": 51648 }, { "epoch": 5.304611276573893, "grad_norm": 0.051111143082380295, "learning_rate": 0.01, "loss": 2.0008, "step": 51651 }, { "epoch": 5.304919379685735, "grad_norm": 0.040741320699453354, "learning_rate": 0.01, "loss": 1.9935, "step": 51654 }, { "epoch": 5.3052274827975765, "grad_norm": 0.06887371838092804, "learning_rate": 0.01, "loss": 2.0131, "step": 51657 }, { "epoch": 5.305535585909418, "grad_norm": 0.09351345151662827, "learning_rate": 0.01, "loss": 2.0056, "step": 51660 }, { "epoch": 5.305843689021259, "grad_norm": 0.0568513348698616, "learning_rate": 0.01, "loss": 1.9897, "step": 51663 }, { "epoch": 5.3061517921331, "grad_norm": 0.05117219313979149, "learning_rate": 0.01, "loss": 2.0016, "step": 51666 }, { "epoch": 5.3064598952449415, "grad_norm": 0.04374157264828682, "learning_rate": 0.01, "loss": 1.9708, "step": 51669 }, { "epoch": 5.306767998356784, "grad_norm": 0.04442832991480827, "learning_rate": 0.01, "loss": 2.0164, "step": 51672 }, { "epoch": 5.307076101468625, "grad_norm": 0.0560653991997242, "learning_rate": 0.01, "loss": 2.0034, "step": 51675 }, { "epoch": 5.307384204580466, "grad_norm": 0.11248762905597687, "learning_rate": 0.01, "loss": 2.0026, "step": 51678 }, { "epoch": 5.3076923076923075, "grad_norm": 0.05383045971393585, "learning_rate": 0.01, "loss": 2.0145, "step": 51681 }, { "epoch": 5.308000410804149, "grad_norm": 0.11476562172174454, "learning_rate": 0.01, "loss": 1.9891, "step": 51684 }, { "epoch": 5.308308513915991, "grad_norm": 0.04139787703752518, "learning_rate": 0.01, "loss": 2.0037, "step": 51687 }, { "epoch": 5.308616617027832, "grad_norm": 0.03532332554459572, "learning_rate": 0.01, "loss": 1.9938, "step": 51690 }, { "epoch": 5.308924720139673, "grad_norm": 0.04809233918786049, "learning_rate": 0.01, "loss": 1.9894, "step": 51693 }, { "epoch": 5.309232823251515, "grad_norm": 0.0827130451798439, "learning_rate": 0.01, "loss": 2.0009, "step": 51696 }, { "epoch": 5.309540926363356, "grad_norm": 0.05010407045483589, "learning_rate": 0.01, "loss": 2.0109, "step": 51699 }, { "epoch": 5.309849029475198, "grad_norm": 0.04733329638838768, "learning_rate": 0.01, "loss": 2.0, "step": 51702 }, { "epoch": 5.310157132587039, "grad_norm": 0.03515147417783737, "learning_rate": 0.01, "loss": 2.0103, "step": 51705 }, { "epoch": 5.310465235698881, "grad_norm": 0.12705236673355103, "learning_rate": 0.01, "loss": 2.0031, "step": 51708 }, { "epoch": 5.310773338810722, "grad_norm": 0.04811834171414375, "learning_rate": 0.01, "loss": 1.9934, "step": 51711 }, { "epoch": 5.311081441922563, "grad_norm": 0.04282277077436447, "learning_rate": 0.01, "loss": 2.012, "step": 51714 }, { "epoch": 5.311389545034405, "grad_norm": 0.039072856307029724, "learning_rate": 0.01, "loss": 1.9754, "step": 51717 }, { "epoch": 5.311697648146247, "grad_norm": 0.06571515649557114, "learning_rate": 0.01, "loss": 2.0007, "step": 51720 }, { "epoch": 5.312005751258088, "grad_norm": 0.10408081859350204, "learning_rate": 0.01, "loss": 2.0091, "step": 51723 }, { "epoch": 5.312313854369929, "grad_norm": 0.12248563021421432, "learning_rate": 0.01, "loss": 1.9859, "step": 51726 }, { "epoch": 5.31262195748177, "grad_norm": 0.1178579330444336, "learning_rate": 0.01, "loss": 2.0148, "step": 51729 }, { "epoch": 5.312930060593612, "grad_norm": 0.042379625141620636, "learning_rate": 0.01, "loss": 1.9786, "step": 51732 }, { "epoch": 5.313238163705454, "grad_norm": 0.05351175367832184, "learning_rate": 0.01, "loss": 1.9822, "step": 51735 }, { "epoch": 5.313546266817295, "grad_norm": 0.03492636978626251, "learning_rate": 0.01, "loss": 1.9868, "step": 51738 }, { "epoch": 5.313854369929136, "grad_norm": 0.037354279309511185, "learning_rate": 0.01, "loss": 2.0128, "step": 51741 }, { "epoch": 5.3141624730409776, "grad_norm": 0.0453021377325058, "learning_rate": 0.01, "loss": 1.9709, "step": 51744 }, { "epoch": 5.314470576152819, "grad_norm": 0.040230412036180496, "learning_rate": 0.01, "loss": 2.007, "step": 51747 }, { "epoch": 5.314778679264661, "grad_norm": 0.1004524752497673, "learning_rate": 0.01, "loss": 2.0159, "step": 51750 }, { "epoch": 5.315086782376502, "grad_norm": 0.08435871452093124, "learning_rate": 0.01, "loss": 1.9936, "step": 51753 }, { "epoch": 5.3153948854883435, "grad_norm": 0.04728049412369728, "learning_rate": 0.01, "loss": 1.9695, "step": 51756 }, { "epoch": 5.315702988600185, "grad_norm": 0.043835487216711044, "learning_rate": 0.01, "loss": 1.9875, "step": 51759 }, { "epoch": 5.316011091712026, "grad_norm": 0.03761078417301178, "learning_rate": 0.01, "loss": 1.9879, "step": 51762 }, { "epoch": 5.316319194823867, "grad_norm": 0.11318635195493698, "learning_rate": 0.01, "loss": 2.0072, "step": 51765 }, { "epoch": 5.316627297935709, "grad_norm": 0.06980354338884354, "learning_rate": 0.01, "loss": 1.9965, "step": 51768 }, { "epoch": 5.316935401047551, "grad_norm": 0.09948752075433731, "learning_rate": 0.01, "loss": 1.9912, "step": 51771 }, { "epoch": 5.317243504159392, "grad_norm": 0.07391460239887238, "learning_rate": 0.01, "loss": 2.0158, "step": 51774 }, { "epoch": 5.317551607271233, "grad_norm": 0.06508208811283112, "learning_rate": 0.01, "loss": 2.0109, "step": 51777 }, { "epoch": 5.3178597103830745, "grad_norm": 0.0692305937409401, "learning_rate": 0.01, "loss": 1.9963, "step": 51780 }, { "epoch": 5.318167813494917, "grad_norm": 0.07129498571157455, "learning_rate": 0.01, "loss": 2.0188, "step": 51783 }, { "epoch": 5.318475916606758, "grad_norm": 0.05130002647638321, "learning_rate": 0.01, "loss": 2.0042, "step": 51786 }, { "epoch": 5.318784019718599, "grad_norm": 0.07163992524147034, "learning_rate": 0.01, "loss": 1.9742, "step": 51789 }, { "epoch": 5.31909212283044, "grad_norm": 0.09687676280736923, "learning_rate": 0.01, "loss": 2.0073, "step": 51792 }, { "epoch": 5.319400225942282, "grad_norm": 0.08195064216852188, "learning_rate": 0.01, "loss": 1.9981, "step": 51795 }, { "epoch": 5.319708329054124, "grad_norm": 0.05641806125640869, "learning_rate": 0.01, "loss": 2.0018, "step": 51798 }, { "epoch": 5.320016432165965, "grad_norm": 0.06832972913980484, "learning_rate": 0.01, "loss": 1.9688, "step": 51801 }, { "epoch": 5.320324535277806, "grad_norm": 0.06402765214443207, "learning_rate": 0.01, "loss": 1.9935, "step": 51804 }, { "epoch": 5.320632638389648, "grad_norm": 0.045856647193431854, "learning_rate": 0.01, "loss": 1.9857, "step": 51807 }, { "epoch": 5.320940741501489, "grad_norm": 0.10627341270446777, "learning_rate": 0.01, "loss": 2.0, "step": 51810 }, { "epoch": 5.321248844613331, "grad_norm": 0.07530563324689865, "learning_rate": 0.01, "loss": 1.9766, "step": 51813 }, { "epoch": 5.321556947725172, "grad_norm": 0.06049705296754837, "learning_rate": 0.01, "loss": 2.0227, "step": 51816 }, { "epoch": 5.3218650508370136, "grad_norm": 0.05264494568109512, "learning_rate": 0.01, "loss": 1.9785, "step": 51819 }, { "epoch": 5.322173153948855, "grad_norm": 0.03524477034807205, "learning_rate": 0.01, "loss": 2.0069, "step": 51822 }, { "epoch": 5.322481257060696, "grad_norm": 0.03644990921020508, "learning_rate": 0.01, "loss": 2.0254, "step": 51825 }, { "epoch": 5.322789360172537, "grad_norm": 0.10562828928232193, "learning_rate": 0.01, "loss": 1.9894, "step": 51828 }, { "epoch": 5.3230974632843795, "grad_norm": 0.06203228980302811, "learning_rate": 0.01, "loss": 1.9915, "step": 51831 }, { "epoch": 5.323405566396221, "grad_norm": 0.06565918028354645, "learning_rate": 0.01, "loss": 1.9969, "step": 51834 }, { "epoch": 5.323713669508062, "grad_norm": 0.03533428907394409, "learning_rate": 0.01, "loss": 1.9765, "step": 51837 }, { "epoch": 5.324021772619903, "grad_norm": 0.054743893444538116, "learning_rate": 0.01, "loss": 1.9843, "step": 51840 }, { "epoch": 5.3243298757317445, "grad_norm": 0.04253426194190979, "learning_rate": 0.01, "loss": 2.0179, "step": 51843 }, { "epoch": 5.324637978843587, "grad_norm": 0.05230151116847992, "learning_rate": 0.01, "loss": 2.0335, "step": 51846 }, { "epoch": 5.324946081955428, "grad_norm": 0.08154330402612686, "learning_rate": 0.01, "loss": 1.972, "step": 51849 }, { "epoch": 5.325254185067269, "grad_norm": 0.0635039433836937, "learning_rate": 0.01, "loss": 2.0136, "step": 51852 }, { "epoch": 5.3255622881791105, "grad_norm": 0.07905185222625732, "learning_rate": 0.01, "loss": 1.9801, "step": 51855 }, { "epoch": 5.325870391290952, "grad_norm": 0.04034522920846939, "learning_rate": 0.01, "loss": 1.9908, "step": 51858 }, { "epoch": 5.326178494402793, "grad_norm": 0.045876212418079376, "learning_rate": 0.01, "loss": 2.0189, "step": 51861 }, { "epoch": 5.326486597514635, "grad_norm": 0.03529779985547066, "learning_rate": 0.01, "loss": 2.0107, "step": 51864 }, { "epoch": 5.326794700626476, "grad_norm": 0.04534045606851578, "learning_rate": 0.01, "loss": 1.9734, "step": 51867 }, { "epoch": 5.327102803738318, "grad_norm": 0.126200869679451, "learning_rate": 0.01, "loss": 1.9621, "step": 51870 }, { "epoch": 5.327410906850159, "grad_norm": 0.052106015384197235, "learning_rate": 0.01, "loss": 1.9906, "step": 51873 }, { "epoch": 5.327719009962, "grad_norm": 0.048572082072496414, "learning_rate": 0.01, "loss": 2.0044, "step": 51876 }, { "epoch": 5.328027113073842, "grad_norm": 0.057247593998909, "learning_rate": 0.01, "loss": 1.9794, "step": 51879 }, { "epoch": 5.328335216185684, "grad_norm": 0.05806770548224449, "learning_rate": 0.01, "loss": 1.989, "step": 51882 }, { "epoch": 5.328643319297525, "grad_norm": 0.13235436379909515, "learning_rate": 0.01, "loss": 2.0009, "step": 51885 }, { "epoch": 5.328951422409366, "grad_norm": 0.05209680646657944, "learning_rate": 0.01, "loss": 2.0027, "step": 51888 }, { "epoch": 5.329259525521207, "grad_norm": 0.04446623846888542, "learning_rate": 0.01, "loss": 2.0015, "step": 51891 }, { "epoch": 5.32956762863305, "grad_norm": 0.0674331933259964, "learning_rate": 0.01, "loss": 1.99, "step": 51894 }, { "epoch": 5.329875731744891, "grad_norm": 0.11401736736297607, "learning_rate": 0.01, "loss": 1.9766, "step": 51897 }, { "epoch": 5.330183834856732, "grad_norm": 0.08108768612146378, "learning_rate": 0.01, "loss": 1.9956, "step": 51900 }, { "epoch": 5.330491937968573, "grad_norm": 0.06601007282733917, "learning_rate": 0.01, "loss": 1.9955, "step": 51903 }, { "epoch": 5.330800041080415, "grad_norm": 0.05304531753063202, "learning_rate": 0.01, "loss": 1.9967, "step": 51906 }, { "epoch": 5.331108144192257, "grad_norm": 0.04129822179675102, "learning_rate": 0.01, "loss": 2.0316, "step": 51909 }, { "epoch": 5.331416247304098, "grad_norm": 0.06261187046766281, "learning_rate": 0.01, "loss": 1.9957, "step": 51912 }, { "epoch": 5.331724350415939, "grad_norm": 0.06908193230628967, "learning_rate": 0.01, "loss": 1.9841, "step": 51915 }, { "epoch": 5.3320324535277805, "grad_norm": 0.10063374042510986, "learning_rate": 0.01, "loss": 2.0091, "step": 51918 }, { "epoch": 5.332340556639622, "grad_norm": 0.11441531032323837, "learning_rate": 0.01, "loss": 1.9971, "step": 51921 }, { "epoch": 5.332648659751463, "grad_norm": 0.058185406029224396, "learning_rate": 0.01, "loss": 2.0023, "step": 51924 }, { "epoch": 5.332956762863305, "grad_norm": 0.041803937405347824, "learning_rate": 0.01, "loss": 2.0083, "step": 51927 }, { "epoch": 5.3332648659751465, "grad_norm": 0.12652553617954254, "learning_rate": 0.01, "loss": 2.0096, "step": 51930 }, { "epoch": 5.333572969086988, "grad_norm": 0.04237103462219238, "learning_rate": 0.01, "loss": 1.9789, "step": 51933 }, { "epoch": 5.333881072198829, "grad_norm": 0.052531465888023376, "learning_rate": 0.01, "loss": 2.0056, "step": 51936 }, { "epoch": 5.33418917531067, "grad_norm": 0.0361291877925396, "learning_rate": 0.01, "loss": 2.0085, "step": 51939 }, { "epoch": 5.334497278422512, "grad_norm": 0.17653311789035797, "learning_rate": 0.01, "loss": 2.0144, "step": 51942 }, { "epoch": 5.334805381534354, "grad_norm": 0.04208230972290039, "learning_rate": 0.01, "loss": 1.9929, "step": 51945 }, { "epoch": 5.335113484646195, "grad_norm": 0.05036168918013573, "learning_rate": 0.01, "loss": 1.9819, "step": 51948 }, { "epoch": 5.335421587758036, "grad_norm": 0.032406702637672424, "learning_rate": 0.01, "loss": 1.9909, "step": 51951 }, { "epoch": 5.3357296908698775, "grad_norm": 0.03707049414515495, "learning_rate": 0.01, "loss": 1.9886, "step": 51954 }, { "epoch": 5.33603779398172, "grad_norm": 0.10224796086549759, "learning_rate": 0.01, "loss": 2.0212, "step": 51957 }, { "epoch": 5.336345897093561, "grad_norm": 0.07522287964820862, "learning_rate": 0.01, "loss": 2.0236, "step": 51960 }, { "epoch": 5.336654000205402, "grad_norm": 0.08081628382205963, "learning_rate": 0.01, "loss": 2.0055, "step": 51963 }, { "epoch": 5.336962103317243, "grad_norm": 0.03779719024896622, "learning_rate": 0.01, "loss": 2.0006, "step": 51966 }, { "epoch": 5.337270206429085, "grad_norm": 0.05938563495874405, "learning_rate": 0.01, "loss": 1.9933, "step": 51969 }, { "epoch": 5.337578309540927, "grad_norm": 0.058824703097343445, "learning_rate": 0.01, "loss": 1.9977, "step": 51972 }, { "epoch": 5.337886412652768, "grad_norm": 0.09814310073852539, "learning_rate": 0.01, "loss": 2.004, "step": 51975 }, { "epoch": 5.338194515764609, "grad_norm": 0.038881540298461914, "learning_rate": 0.01, "loss": 1.9776, "step": 51978 }, { "epoch": 5.338502618876451, "grad_norm": 0.06858659535646439, "learning_rate": 0.01, "loss": 1.9769, "step": 51981 }, { "epoch": 5.338810721988292, "grad_norm": 0.05999214947223663, "learning_rate": 0.01, "loss": 2.0051, "step": 51984 }, { "epoch": 5.339118825100133, "grad_norm": 0.13660067319869995, "learning_rate": 0.01, "loss": 2.0003, "step": 51987 }, { "epoch": 5.339426928211975, "grad_norm": 0.039648644626140594, "learning_rate": 0.01, "loss": 2.0287, "step": 51990 }, { "epoch": 5.3397350313238165, "grad_norm": 0.04098828509449959, "learning_rate": 0.01, "loss": 2.0028, "step": 51993 }, { "epoch": 5.340043134435658, "grad_norm": 0.07291056960821152, "learning_rate": 0.01, "loss": 1.9797, "step": 51996 }, { "epoch": 5.340351237547499, "grad_norm": 0.07260385155677795, "learning_rate": 0.01, "loss": 2.0081, "step": 51999 }, { "epoch": 5.34065934065934, "grad_norm": 0.08830993622541428, "learning_rate": 0.01, "loss": 1.9863, "step": 52002 }, { "epoch": 5.3409674437711825, "grad_norm": 0.053973227739334106, "learning_rate": 0.01, "loss": 1.9924, "step": 52005 }, { "epoch": 5.341275546883024, "grad_norm": 0.06421860307455063, "learning_rate": 0.01, "loss": 1.9937, "step": 52008 }, { "epoch": 5.341583649994865, "grad_norm": 0.06905265897512436, "learning_rate": 0.01, "loss": 2.0258, "step": 52011 }, { "epoch": 5.341891753106706, "grad_norm": 0.1201087012887001, "learning_rate": 0.01, "loss": 2.0152, "step": 52014 }, { "epoch": 5.3421998562185475, "grad_norm": 0.0491718128323555, "learning_rate": 0.01, "loss": 1.9998, "step": 52017 }, { "epoch": 5.342507959330389, "grad_norm": 0.04123944044113159, "learning_rate": 0.01, "loss": 1.9924, "step": 52020 }, { "epoch": 5.342816062442231, "grad_norm": 0.059747952967882156, "learning_rate": 0.01, "loss": 1.9803, "step": 52023 }, { "epoch": 5.343124165554072, "grad_norm": 0.07639419287443161, "learning_rate": 0.01, "loss": 2.0135, "step": 52026 }, { "epoch": 5.3434322686659135, "grad_norm": 0.04553646221756935, "learning_rate": 0.01, "loss": 1.9915, "step": 52029 }, { "epoch": 5.343740371777755, "grad_norm": 0.05324438959360123, "learning_rate": 0.01, "loss": 2.011, "step": 52032 }, { "epoch": 5.344048474889596, "grad_norm": 0.07225726544857025, "learning_rate": 0.01, "loss": 1.988, "step": 52035 }, { "epoch": 5.344356578001438, "grad_norm": 0.10456754267215729, "learning_rate": 0.01, "loss": 2.0171, "step": 52038 }, { "epoch": 5.344664681113279, "grad_norm": 0.06349442899227142, "learning_rate": 0.01, "loss": 2.0006, "step": 52041 }, { "epoch": 5.344972784225121, "grad_norm": 0.08539248257875443, "learning_rate": 0.01, "loss": 1.9894, "step": 52044 }, { "epoch": 5.345280887336962, "grad_norm": 0.04877861216664314, "learning_rate": 0.01, "loss": 1.9805, "step": 52047 }, { "epoch": 5.345588990448803, "grad_norm": 0.0842355415225029, "learning_rate": 0.01, "loss": 2.0135, "step": 52050 }, { "epoch": 5.345897093560645, "grad_norm": 0.03320346772670746, "learning_rate": 0.01, "loss": 2.0038, "step": 52053 }, { "epoch": 5.346205196672487, "grad_norm": 0.09604234993457794, "learning_rate": 0.01, "loss": 2.0152, "step": 52056 }, { "epoch": 5.346513299784328, "grad_norm": 0.04435954615473747, "learning_rate": 0.01, "loss": 1.9884, "step": 52059 }, { "epoch": 5.346821402896169, "grad_norm": 0.07951738685369492, "learning_rate": 0.01, "loss": 1.9918, "step": 52062 }, { "epoch": 5.34712950600801, "grad_norm": 0.05883636698126793, "learning_rate": 0.01, "loss": 2.0099, "step": 52065 }, { "epoch": 5.3474376091198526, "grad_norm": 0.08984318375587463, "learning_rate": 0.01, "loss": 1.98, "step": 52068 }, { "epoch": 5.347745712231694, "grad_norm": 0.11542551219463348, "learning_rate": 0.01, "loss": 1.9835, "step": 52071 }, { "epoch": 5.348053815343535, "grad_norm": 0.07631759345531464, "learning_rate": 0.01, "loss": 2.0099, "step": 52074 }, { "epoch": 5.348361918455376, "grad_norm": 0.05458938702940941, "learning_rate": 0.01, "loss": 1.9866, "step": 52077 }, { "epoch": 5.348670021567218, "grad_norm": 0.03696439042687416, "learning_rate": 0.01, "loss": 2.0131, "step": 52080 }, { "epoch": 5.348978124679059, "grad_norm": 0.05871397256851196, "learning_rate": 0.01, "loss": 1.974, "step": 52083 }, { "epoch": 5.349286227790901, "grad_norm": 0.046202853322029114, "learning_rate": 0.01, "loss": 2.0081, "step": 52086 }, { "epoch": 5.349594330902742, "grad_norm": 0.035825759172439575, "learning_rate": 0.01, "loss": 1.9752, "step": 52089 }, { "epoch": 5.3499024340145835, "grad_norm": 0.12834282219409943, "learning_rate": 0.01, "loss": 1.9967, "step": 52092 }, { "epoch": 5.350210537126425, "grad_norm": 0.05964813381433487, "learning_rate": 0.01, "loss": 2.0028, "step": 52095 }, { "epoch": 5.350518640238266, "grad_norm": 0.0718245729804039, "learning_rate": 0.01, "loss": 2.0141, "step": 52098 }, { "epoch": 5.350826743350108, "grad_norm": 0.043250247836112976, "learning_rate": 0.01, "loss": 1.9815, "step": 52101 }, { "epoch": 5.3511348464619495, "grad_norm": 0.0491093210875988, "learning_rate": 0.01, "loss": 2.0078, "step": 52104 }, { "epoch": 5.351442949573791, "grad_norm": 0.06597597897052765, "learning_rate": 0.01, "loss": 1.9824, "step": 52107 }, { "epoch": 5.351751052685632, "grad_norm": 0.1260969340801239, "learning_rate": 0.01, "loss": 1.9889, "step": 52110 }, { "epoch": 5.352059155797473, "grad_norm": 0.04004066437482834, "learning_rate": 0.01, "loss": 2.0006, "step": 52113 }, { "epoch": 5.3523672589093145, "grad_norm": 0.03897303342819214, "learning_rate": 0.01, "loss": 1.9727, "step": 52116 }, { "epoch": 5.352675362021157, "grad_norm": 0.05720753222703934, "learning_rate": 0.01, "loss": 1.9941, "step": 52119 }, { "epoch": 5.352983465132998, "grad_norm": 0.09778839349746704, "learning_rate": 0.01, "loss": 1.9913, "step": 52122 }, { "epoch": 5.353291568244839, "grad_norm": 0.10825862735509872, "learning_rate": 0.01, "loss": 1.986, "step": 52125 }, { "epoch": 5.3535996713566805, "grad_norm": 0.06417248398065567, "learning_rate": 0.01, "loss": 1.9936, "step": 52128 }, { "epoch": 5.353907774468522, "grad_norm": 0.03844156488776207, "learning_rate": 0.01, "loss": 1.9876, "step": 52131 }, { "epoch": 5.354215877580364, "grad_norm": 0.05346846207976341, "learning_rate": 0.01, "loss": 2.0165, "step": 52134 }, { "epoch": 5.354523980692205, "grad_norm": 0.07128198444843292, "learning_rate": 0.01, "loss": 2.0159, "step": 52137 }, { "epoch": 5.354832083804046, "grad_norm": 0.03174986317753792, "learning_rate": 0.01, "loss": 1.9946, "step": 52140 }, { "epoch": 5.355140186915888, "grad_norm": 0.044908635318279266, "learning_rate": 0.01, "loss": 1.9926, "step": 52143 }, { "epoch": 5.355448290027729, "grad_norm": 0.14737063646316528, "learning_rate": 0.01, "loss": 2.0202, "step": 52146 }, { "epoch": 5.355756393139571, "grad_norm": 0.08280967175960541, "learning_rate": 0.01, "loss": 2.0071, "step": 52149 }, { "epoch": 5.356064496251412, "grad_norm": 0.037819184362888336, "learning_rate": 0.01, "loss": 1.9795, "step": 52152 }, { "epoch": 5.356372599363254, "grad_norm": 0.03838292881846428, "learning_rate": 0.01, "loss": 1.9886, "step": 52155 }, { "epoch": 5.356680702475095, "grad_norm": 0.04593195393681526, "learning_rate": 0.01, "loss": 1.9731, "step": 52158 }, { "epoch": 5.356988805586936, "grad_norm": 0.12287592142820358, "learning_rate": 0.01, "loss": 1.9913, "step": 52161 }, { "epoch": 5.357296908698778, "grad_norm": 0.05081895366311073, "learning_rate": 0.01, "loss": 1.9746, "step": 52164 }, { "epoch": 5.3576050118106195, "grad_norm": 0.15700340270996094, "learning_rate": 0.01, "loss": 2.0194, "step": 52167 }, { "epoch": 5.357913114922461, "grad_norm": 0.049683380872011185, "learning_rate": 0.01, "loss": 2.003, "step": 52170 }, { "epoch": 5.358221218034302, "grad_norm": 0.03398337587714195, "learning_rate": 0.01, "loss": 2.0134, "step": 52173 }, { "epoch": 5.358529321146143, "grad_norm": 0.04002157971262932, "learning_rate": 0.01, "loss": 1.9572, "step": 52176 }, { "epoch": 5.358837424257985, "grad_norm": 0.037759970873594284, "learning_rate": 0.01, "loss": 2.0035, "step": 52179 }, { "epoch": 5.359145527369827, "grad_norm": 0.04797700420022011, "learning_rate": 0.01, "loss": 1.9837, "step": 52182 }, { "epoch": 5.359453630481668, "grad_norm": 0.06852797418832779, "learning_rate": 0.01, "loss": 2.0288, "step": 52185 }, { "epoch": 5.359761733593509, "grad_norm": 0.04743816703557968, "learning_rate": 0.01, "loss": 1.9938, "step": 52188 }, { "epoch": 5.3600698367053505, "grad_norm": 0.05316634103655815, "learning_rate": 0.01, "loss": 2.0035, "step": 52191 }, { "epoch": 5.360377939817192, "grad_norm": 0.07262757420539856, "learning_rate": 0.01, "loss": 1.992, "step": 52194 }, { "epoch": 5.360686042929034, "grad_norm": 0.13863062858581543, "learning_rate": 0.01, "loss": 2.0122, "step": 52197 }, { "epoch": 5.360994146040875, "grad_norm": 0.0368843711912632, "learning_rate": 0.01, "loss": 1.9797, "step": 52200 }, { "epoch": 5.3613022491527165, "grad_norm": 0.05429408326745033, "learning_rate": 0.01, "loss": 1.9711, "step": 52203 }, { "epoch": 5.361610352264558, "grad_norm": 0.04265204817056656, "learning_rate": 0.01, "loss": 2.0074, "step": 52206 }, { "epoch": 5.361918455376399, "grad_norm": 0.04122883826494217, "learning_rate": 0.01, "loss": 1.9974, "step": 52209 }, { "epoch": 5.362226558488241, "grad_norm": 0.04695272073149681, "learning_rate": 0.01, "loss": 1.9856, "step": 52212 }, { "epoch": 5.362534661600082, "grad_norm": 0.04153517633676529, "learning_rate": 0.01, "loss": 2.0172, "step": 52215 }, { "epoch": 5.362842764711924, "grad_norm": 0.09016060084104538, "learning_rate": 0.01, "loss": 1.9984, "step": 52218 }, { "epoch": 5.363150867823765, "grad_norm": 0.06228369474411011, "learning_rate": 0.01, "loss": 1.9792, "step": 52221 }, { "epoch": 5.363458970935606, "grad_norm": 0.09304657578468323, "learning_rate": 0.01, "loss": 2.0058, "step": 52224 }, { "epoch": 5.363767074047448, "grad_norm": 0.07483693957328796, "learning_rate": 0.01, "loss": 1.9941, "step": 52227 }, { "epoch": 5.36407517715929, "grad_norm": 0.08632933348417282, "learning_rate": 0.01, "loss": 2.0113, "step": 52230 }, { "epoch": 5.364383280271131, "grad_norm": 0.08443045616149902, "learning_rate": 0.01, "loss": 2.0161, "step": 52233 }, { "epoch": 5.364691383382972, "grad_norm": 0.05415327101945877, "learning_rate": 0.01, "loss": 2.0251, "step": 52236 }, { "epoch": 5.364999486494813, "grad_norm": 0.048056915402412415, "learning_rate": 0.01, "loss": 1.9952, "step": 52239 }, { "epoch": 5.365307589606655, "grad_norm": 0.03430894389748573, "learning_rate": 0.01, "loss": 1.9957, "step": 52242 }, { "epoch": 5.365615692718497, "grad_norm": 0.10147974640130997, "learning_rate": 0.01, "loss": 2.0247, "step": 52245 }, { "epoch": 5.365923795830338, "grad_norm": 0.09672293066978455, "learning_rate": 0.01, "loss": 1.9989, "step": 52248 }, { "epoch": 5.366231898942179, "grad_norm": 0.08904405683279037, "learning_rate": 0.01, "loss": 2.0167, "step": 52251 }, { "epoch": 5.366540002054021, "grad_norm": 0.05163189023733139, "learning_rate": 0.01, "loss": 1.9928, "step": 52254 }, { "epoch": 5.366848105165862, "grad_norm": 0.03694668784737587, "learning_rate": 0.01, "loss": 1.9909, "step": 52257 }, { "epoch": 5.367156208277704, "grad_norm": 0.036716192960739136, "learning_rate": 0.01, "loss": 2.004, "step": 52260 }, { "epoch": 5.367464311389545, "grad_norm": 0.046369098126888275, "learning_rate": 0.01, "loss": 2.0128, "step": 52263 }, { "epoch": 5.3677724145013865, "grad_norm": 0.06647578626871109, "learning_rate": 0.01, "loss": 1.9564, "step": 52266 }, { "epoch": 5.368080517613228, "grad_norm": 0.11546402424573898, "learning_rate": 0.01, "loss": 2.0125, "step": 52269 }, { "epoch": 5.368388620725069, "grad_norm": 0.08381645381450653, "learning_rate": 0.01, "loss": 1.9736, "step": 52272 }, { "epoch": 5.36869672383691, "grad_norm": 0.09374178946018219, "learning_rate": 0.01, "loss": 2.0108, "step": 52275 }, { "epoch": 5.3690048269487525, "grad_norm": 0.13528801500797272, "learning_rate": 0.01, "loss": 1.9889, "step": 52278 }, { "epoch": 5.369312930060594, "grad_norm": 0.09537555277347565, "learning_rate": 0.01, "loss": 1.9928, "step": 52281 }, { "epoch": 5.369621033172435, "grad_norm": 0.06615101546049118, "learning_rate": 0.01, "loss": 2.0193, "step": 52284 }, { "epoch": 5.369929136284276, "grad_norm": 0.03951498121023178, "learning_rate": 0.01, "loss": 1.9919, "step": 52287 }, { "epoch": 5.3702372393961175, "grad_norm": 0.04514235630631447, "learning_rate": 0.01, "loss": 2.0002, "step": 52290 }, { "epoch": 5.37054534250796, "grad_norm": 0.05101289227604866, "learning_rate": 0.01, "loss": 1.9996, "step": 52293 }, { "epoch": 5.370853445619801, "grad_norm": 0.10146338492631912, "learning_rate": 0.01, "loss": 1.9955, "step": 52296 }, { "epoch": 5.371161548731642, "grad_norm": 0.06167877838015556, "learning_rate": 0.01, "loss": 1.9842, "step": 52299 }, { "epoch": 5.3714696518434835, "grad_norm": 0.10302039980888367, "learning_rate": 0.01, "loss": 1.9917, "step": 52302 }, { "epoch": 5.371777754955325, "grad_norm": 0.07716668397188187, "learning_rate": 0.01, "loss": 2.0046, "step": 52305 }, { "epoch": 5.372085858067167, "grad_norm": 0.08410248160362244, "learning_rate": 0.01, "loss": 1.9643, "step": 52308 }, { "epoch": 5.372393961179008, "grad_norm": 0.05326802283525467, "learning_rate": 0.01, "loss": 2.0048, "step": 52311 }, { "epoch": 5.372702064290849, "grad_norm": 0.08413115888834, "learning_rate": 0.01, "loss": 1.9889, "step": 52314 }, { "epoch": 5.373010167402691, "grad_norm": 0.053719133138656616, "learning_rate": 0.01, "loss": 1.9995, "step": 52317 }, { "epoch": 5.373318270514532, "grad_norm": 0.07752490788698196, "learning_rate": 0.01, "loss": 2.0065, "step": 52320 }, { "epoch": 5.373626373626374, "grad_norm": 0.0719638466835022, "learning_rate": 0.01, "loss": 2.0005, "step": 52323 }, { "epoch": 5.373934476738215, "grad_norm": 0.0800856202840805, "learning_rate": 0.01, "loss": 1.9823, "step": 52326 }, { "epoch": 5.374242579850057, "grad_norm": 0.0889526829123497, "learning_rate": 0.01, "loss": 1.9985, "step": 52329 }, { "epoch": 5.374550682961898, "grad_norm": 0.08525931090116501, "learning_rate": 0.01, "loss": 1.9964, "step": 52332 }, { "epoch": 5.374858786073739, "grad_norm": 0.038573917001485825, "learning_rate": 0.01, "loss": 2.0186, "step": 52335 }, { "epoch": 5.37516688918558, "grad_norm": 0.03686724230647087, "learning_rate": 0.01, "loss": 1.9865, "step": 52338 }, { "epoch": 5.3754749922974225, "grad_norm": 0.042029861360788345, "learning_rate": 0.01, "loss": 1.9885, "step": 52341 }, { "epoch": 5.375783095409264, "grad_norm": 0.08990278095006943, "learning_rate": 0.01, "loss": 2.0018, "step": 52344 }, { "epoch": 5.376091198521105, "grad_norm": 0.09406303614377975, "learning_rate": 0.01, "loss": 1.9758, "step": 52347 }, { "epoch": 5.376399301632946, "grad_norm": 0.1285853236913681, "learning_rate": 0.01, "loss": 1.9883, "step": 52350 }, { "epoch": 5.376707404744788, "grad_norm": 0.06205267086625099, "learning_rate": 0.01, "loss": 2.01, "step": 52353 }, { "epoch": 5.37701550785663, "grad_norm": 0.09469316154718399, "learning_rate": 0.01, "loss": 1.998, "step": 52356 }, { "epoch": 5.377323610968471, "grad_norm": 0.08087198436260223, "learning_rate": 0.01, "loss": 1.9878, "step": 52359 }, { "epoch": 5.377631714080312, "grad_norm": 0.10511235147714615, "learning_rate": 0.01, "loss": 2.0175, "step": 52362 }, { "epoch": 5.3779398171921535, "grad_norm": 0.07102752476930618, "learning_rate": 0.01, "loss": 2.0019, "step": 52365 }, { "epoch": 5.378247920303995, "grad_norm": 0.04023940861225128, "learning_rate": 0.01, "loss": 1.9922, "step": 52368 }, { "epoch": 5.378556023415836, "grad_norm": 0.06366323679685593, "learning_rate": 0.01, "loss": 1.9837, "step": 52371 }, { "epoch": 5.378864126527678, "grad_norm": 0.045444704592227936, "learning_rate": 0.01, "loss": 1.9989, "step": 52374 }, { "epoch": 5.3791722296395195, "grad_norm": 0.07034427672624588, "learning_rate": 0.01, "loss": 1.9917, "step": 52377 }, { "epoch": 5.379480332751361, "grad_norm": 0.04844088852405548, "learning_rate": 0.01, "loss": 1.9746, "step": 52380 }, { "epoch": 5.379788435863202, "grad_norm": 0.05628515034914017, "learning_rate": 0.01, "loss": 1.9995, "step": 52383 }, { "epoch": 5.380096538975043, "grad_norm": 0.10569090396165848, "learning_rate": 0.01, "loss": 1.9886, "step": 52386 }, { "epoch": 5.380404642086885, "grad_norm": 0.14752137660980225, "learning_rate": 0.01, "loss": 1.9925, "step": 52389 }, { "epoch": 5.380712745198727, "grad_norm": 0.09316124767065048, "learning_rate": 0.01, "loss": 1.978, "step": 52392 }, { "epoch": 5.381020848310568, "grad_norm": 0.0789172351360321, "learning_rate": 0.01, "loss": 2.0041, "step": 52395 }, { "epoch": 5.381328951422409, "grad_norm": 0.04636065661907196, "learning_rate": 0.01, "loss": 1.9603, "step": 52398 }, { "epoch": 5.3816370545342505, "grad_norm": 0.051946815103292465, "learning_rate": 0.01, "loss": 1.9709, "step": 52401 }, { "epoch": 5.381945157646093, "grad_norm": 0.0423317514359951, "learning_rate": 0.01, "loss": 2.0133, "step": 52404 }, { "epoch": 5.382253260757934, "grad_norm": 0.03624933212995529, "learning_rate": 0.01, "loss": 1.9734, "step": 52407 }, { "epoch": 5.382561363869775, "grad_norm": 0.0415869876742363, "learning_rate": 0.01, "loss": 1.9978, "step": 52410 }, { "epoch": 5.382869466981616, "grad_norm": 0.1304771602153778, "learning_rate": 0.01, "loss": 2.0065, "step": 52413 }, { "epoch": 5.383177570093458, "grad_norm": 0.05917195603251457, "learning_rate": 0.01, "loss": 2.0216, "step": 52416 }, { "epoch": 5.3834856732053, "grad_norm": 0.04846251383423805, "learning_rate": 0.01, "loss": 2.0213, "step": 52419 }, { "epoch": 5.383793776317141, "grad_norm": 0.03447849303483963, "learning_rate": 0.01, "loss": 1.9879, "step": 52422 }, { "epoch": 5.384101879428982, "grad_norm": 0.042299896478652954, "learning_rate": 0.01, "loss": 1.9572, "step": 52425 }, { "epoch": 5.384409982540824, "grad_norm": 0.053316473960876465, "learning_rate": 0.01, "loss": 2.0188, "step": 52428 }, { "epoch": 5.384718085652665, "grad_norm": 0.05238528177142143, "learning_rate": 0.01, "loss": 2.0125, "step": 52431 }, { "epoch": 5.385026188764506, "grad_norm": 0.05088873207569122, "learning_rate": 0.01, "loss": 2.0015, "step": 52434 }, { "epoch": 5.385334291876348, "grad_norm": 0.03723147138953209, "learning_rate": 0.01, "loss": 2.0062, "step": 52437 }, { "epoch": 5.3856423949881895, "grad_norm": 0.04240123927593231, "learning_rate": 0.01, "loss": 1.9994, "step": 52440 }, { "epoch": 5.385950498100031, "grad_norm": 0.04458059370517731, "learning_rate": 0.01, "loss": 1.9832, "step": 52443 }, { "epoch": 5.386258601211872, "grad_norm": 0.20143818855285645, "learning_rate": 0.01, "loss": 1.9968, "step": 52446 }, { "epoch": 5.386566704323713, "grad_norm": 0.05507873743772507, "learning_rate": 0.01, "loss": 1.9985, "step": 52449 }, { "epoch": 5.3868748074355555, "grad_norm": 0.040351588279008865, "learning_rate": 0.01, "loss": 1.9684, "step": 52452 }, { "epoch": 5.387182910547397, "grad_norm": 0.044757384806871414, "learning_rate": 0.01, "loss": 1.9724, "step": 52455 }, { "epoch": 5.387491013659238, "grad_norm": 0.05201313644647598, "learning_rate": 0.01, "loss": 2.0204, "step": 52458 }, { "epoch": 5.387799116771079, "grad_norm": 0.053258076310157776, "learning_rate": 0.01, "loss": 2.0027, "step": 52461 }, { "epoch": 5.3881072198829205, "grad_norm": 0.08738038688898087, "learning_rate": 0.01, "loss": 2.0029, "step": 52464 }, { "epoch": 5.388415322994762, "grad_norm": 0.06154690682888031, "learning_rate": 0.01, "loss": 1.9737, "step": 52467 }, { "epoch": 5.388723426106604, "grad_norm": 0.045472290366888046, "learning_rate": 0.01, "loss": 1.9912, "step": 52470 }, { "epoch": 5.389031529218445, "grad_norm": 0.05027802661061287, "learning_rate": 0.01, "loss": 1.99, "step": 52473 }, { "epoch": 5.3893396323302865, "grad_norm": 0.06531531363725662, "learning_rate": 0.01, "loss": 2.0118, "step": 52476 }, { "epoch": 5.389647735442128, "grad_norm": 0.0642043873667717, "learning_rate": 0.01, "loss": 1.9922, "step": 52479 }, { "epoch": 5.389955838553969, "grad_norm": 0.03432301804423332, "learning_rate": 0.01, "loss": 1.995, "step": 52482 }, { "epoch": 5.390263941665811, "grad_norm": 0.14385442435741425, "learning_rate": 0.01, "loss": 1.9903, "step": 52485 }, { "epoch": 5.390572044777652, "grad_norm": 0.05068526789546013, "learning_rate": 0.01, "loss": 2.0122, "step": 52488 }, { "epoch": 5.390880147889494, "grad_norm": 0.0718868151307106, "learning_rate": 0.01, "loss": 2.0141, "step": 52491 }, { "epoch": 5.391188251001335, "grad_norm": 0.06522323191165924, "learning_rate": 0.01, "loss": 2.0136, "step": 52494 }, { "epoch": 5.391496354113176, "grad_norm": 0.03200114890933037, "learning_rate": 0.01, "loss": 1.9779, "step": 52497 }, { "epoch": 5.391804457225018, "grad_norm": 0.11630173027515411, "learning_rate": 0.01, "loss": 2.0095, "step": 52500 }, { "epoch": 5.39211256033686, "grad_norm": 0.07612688839435577, "learning_rate": 0.01, "loss": 2.0146, "step": 52503 }, { "epoch": 5.392420663448701, "grad_norm": 0.09313171356916428, "learning_rate": 0.01, "loss": 2.0002, "step": 52506 }, { "epoch": 5.392728766560542, "grad_norm": 0.07034334540367126, "learning_rate": 0.01, "loss": 1.9957, "step": 52509 }, { "epoch": 5.393036869672383, "grad_norm": 0.07270947843790054, "learning_rate": 0.01, "loss": 2.0186, "step": 52512 }, { "epoch": 5.3933449727842255, "grad_norm": 0.051841702312231064, "learning_rate": 0.01, "loss": 2.0039, "step": 52515 }, { "epoch": 5.393653075896067, "grad_norm": 0.04291679337620735, "learning_rate": 0.01, "loss": 1.9774, "step": 52518 }, { "epoch": 5.393961179007908, "grad_norm": 0.03652627766132355, "learning_rate": 0.01, "loss": 1.9567, "step": 52521 }, { "epoch": 5.394269282119749, "grad_norm": 0.03804773837327957, "learning_rate": 0.01, "loss": 1.9994, "step": 52524 }, { "epoch": 5.394577385231591, "grad_norm": 0.05173708498477936, "learning_rate": 0.01, "loss": 2.0167, "step": 52527 }, { "epoch": 5.394885488343432, "grad_norm": 0.07177083939313889, "learning_rate": 0.01, "loss": 1.994, "step": 52530 }, { "epoch": 5.395193591455274, "grad_norm": 0.08862923830747604, "learning_rate": 0.01, "loss": 2.008, "step": 52533 }, { "epoch": 5.395501694567115, "grad_norm": 0.09389739483594894, "learning_rate": 0.01, "loss": 1.9873, "step": 52536 }, { "epoch": 5.3958097976789565, "grad_norm": 0.03884744644165039, "learning_rate": 0.01, "loss": 1.997, "step": 52539 }, { "epoch": 5.396117900790798, "grad_norm": 0.04176723584532738, "learning_rate": 0.01, "loss": 1.9845, "step": 52542 }, { "epoch": 5.396426003902639, "grad_norm": 0.04131205379962921, "learning_rate": 0.01, "loss": 1.9914, "step": 52545 }, { "epoch": 5.396734107014481, "grad_norm": 0.03634188696742058, "learning_rate": 0.01, "loss": 1.999, "step": 52548 }, { "epoch": 5.3970422101263225, "grad_norm": 0.09134528040885925, "learning_rate": 0.01, "loss": 2.0024, "step": 52551 }, { "epoch": 5.397350313238164, "grad_norm": 0.07570212334394455, "learning_rate": 0.01, "loss": 2.003, "step": 52554 }, { "epoch": 5.397658416350005, "grad_norm": 0.11560734361410141, "learning_rate": 0.01, "loss": 2.0023, "step": 52557 }, { "epoch": 5.397966519461846, "grad_norm": 0.03760908916592598, "learning_rate": 0.01, "loss": 1.9684, "step": 52560 }, { "epoch": 5.398274622573688, "grad_norm": 0.05205194652080536, "learning_rate": 0.01, "loss": 2.0133, "step": 52563 }, { "epoch": 5.39858272568553, "grad_norm": 0.07257603853940964, "learning_rate": 0.01, "loss": 1.9981, "step": 52566 }, { "epoch": 5.398890828797371, "grad_norm": 0.07266250997781754, "learning_rate": 0.01, "loss": 2.003, "step": 52569 }, { "epoch": 5.399198931909212, "grad_norm": 0.0367962047457695, "learning_rate": 0.01, "loss": 1.9909, "step": 52572 }, { "epoch": 5.3995070350210534, "grad_norm": 0.05613946169614792, "learning_rate": 0.01, "loss": 2.0038, "step": 52575 }, { "epoch": 5.399815138132896, "grad_norm": 0.10363534092903137, "learning_rate": 0.01, "loss": 1.99, "step": 52578 }, { "epoch": 5.400123241244737, "grad_norm": 0.09217333793640137, "learning_rate": 0.01, "loss": 1.9849, "step": 52581 }, { "epoch": 5.400431344356578, "grad_norm": 0.0713183805346489, "learning_rate": 0.01, "loss": 2.0056, "step": 52584 }, { "epoch": 5.400739447468419, "grad_norm": 0.0670512244105339, "learning_rate": 0.01, "loss": 1.9996, "step": 52587 }, { "epoch": 5.401047550580261, "grad_norm": 0.10012122988700867, "learning_rate": 0.01, "loss": 1.9857, "step": 52590 }, { "epoch": 5.401355653692102, "grad_norm": 0.047828782349824905, "learning_rate": 0.01, "loss": 1.99, "step": 52593 }, { "epoch": 5.401663756803944, "grad_norm": 0.09758254140615463, "learning_rate": 0.01, "loss": 2.0082, "step": 52596 }, { "epoch": 5.401971859915785, "grad_norm": 0.044294700026512146, "learning_rate": 0.01, "loss": 1.9965, "step": 52599 }, { "epoch": 5.402279963027627, "grad_norm": 0.04579133540391922, "learning_rate": 0.01, "loss": 2.0004, "step": 52602 }, { "epoch": 5.402588066139468, "grad_norm": 0.04279174283146858, "learning_rate": 0.01, "loss": 1.9895, "step": 52605 }, { "epoch": 5.402896169251309, "grad_norm": 0.08676068484783173, "learning_rate": 0.01, "loss": 1.9876, "step": 52608 }, { "epoch": 5.403204272363151, "grad_norm": 0.09505254775285721, "learning_rate": 0.01, "loss": 2.0037, "step": 52611 }, { "epoch": 5.4035123754749925, "grad_norm": 0.06927873194217682, "learning_rate": 0.01, "loss": 2.0129, "step": 52614 }, { "epoch": 5.403820478586834, "grad_norm": 0.09732890874147415, "learning_rate": 0.01, "loss": 1.9787, "step": 52617 }, { "epoch": 5.404128581698675, "grad_norm": 0.03702055662870407, "learning_rate": 0.01, "loss": 1.9943, "step": 52620 }, { "epoch": 5.404436684810516, "grad_norm": 0.08636049926280975, "learning_rate": 0.01, "loss": 2.0194, "step": 52623 }, { "epoch": 5.404744787922358, "grad_norm": 0.07209763675928116, "learning_rate": 0.01, "loss": 1.9983, "step": 52626 }, { "epoch": 5.4050528910342, "grad_norm": 0.05202876031398773, "learning_rate": 0.01, "loss": 1.9788, "step": 52629 }, { "epoch": 5.405360994146041, "grad_norm": 0.0709710493683815, "learning_rate": 0.01, "loss": 2.0176, "step": 52632 }, { "epoch": 5.405669097257882, "grad_norm": 0.03506077453494072, "learning_rate": 0.01, "loss": 2.0064, "step": 52635 }, { "epoch": 5.4059772003697235, "grad_norm": 0.04027107357978821, "learning_rate": 0.01, "loss": 2.0006, "step": 52638 }, { "epoch": 5.406285303481565, "grad_norm": 0.07768195122480392, "learning_rate": 0.01, "loss": 1.9824, "step": 52641 }, { "epoch": 5.406593406593407, "grad_norm": 0.13171711564064026, "learning_rate": 0.01, "loss": 1.9865, "step": 52644 }, { "epoch": 5.406901509705248, "grad_norm": 0.13514290750026703, "learning_rate": 0.01, "loss": 1.9987, "step": 52647 }, { "epoch": 5.4072096128170895, "grad_norm": 0.05603281781077385, "learning_rate": 0.01, "loss": 1.9865, "step": 52650 }, { "epoch": 5.407517715928931, "grad_norm": 0.07667485624551773, "learning_rate": 0.01, "loss": 2.0062, "step": 52653 }, { "epoch": 5.407825819040772, "grad_norm": 0.06463679671287537, "learning_rate": 0.01, "loss": 1.9842, "step": 52656 }, { "epoch": 5.408133922152614, "grad_norm": 0.06655814498662949, "learning_rate": 0.01, "loss": 1.9808, "step": 52659 }, { "epoch": 5.408442025264455, "grad_norm": 0.1041889637708664, "learning_rate": 0.01, "loss": 2.0085, "step": 52662 }, { "epoch": 5.408750128376297, "grad_norm": 0.06591400504112244, "learning_rate": 0.01, "loss": 1.9947, "step": 52665 }, { "epoch": 5.409058231488138, "grad_norm": 0.033431414514780045, "learning_rate": 0.01, "loss": 1.9908, "step": 52668 }, { "epoch": 5.409366334599979, "grad_norm": 0.04565748572349548, "learning_rate": 0.01, "loss": 1.9918, "step": 52671 }, { "epoch": 5.409674437711821, "grad_norm": 0.04200183227658272, "learning_rate": 0.01, "loss": 1.9882, "step": 52674 }, { "epoch": 5.409982540823663, "grad_norm": 0.03807186335325241, "learning_rate": 0.01, "loss": 2.0075, "step": 52677 }, { "epoch": 5.410290643935504, "grad_norm": 0.09112047404050827, "learning_rate": 0.01, "loss": 1.9751, "step": 52680 }, { "epoch": 5.410598747047345, "grad_norm": 0.10211756825447083, "learning_rate": 0.01, "loss": 1.9996, "step": 52683 }, { "epoch": 5.410906850159186, "grad_norm": 0.1345423012971878, "learning_rate": 0.01, "loss": 1.9803, "step": 52686 }, { "epoch": 5.411214953271028, "grad_norm": 0.07363910228013992, "learning_rate": 0.01, "loss": 2.0172, "step": 52689 }, { "epoch": 5.41152305638287, "grad_norm": 0.08829092979431152, "learning_rate": 0.01, "loss": 1.9759, "step": 52692 }, { "epoch": 5.411831159494711, "grad_norm": 0.06006823107600212, "learning_rate": 0.01, "loss": 1.9683, "step": 52695 }, { "epoch": 5.412139262606552, "grad_norm": 0.06111016124486923, "learning_rate": 0.01, "loss": 1.9713, "step": 52698 }, { "epoch": 5.412447365718394, "grad_norm": 0.057088688015937805, "learning_rate": 0.01, "loss": 1.9623, "step": 52701 }, { "epoch": 5.412755468830235, "grad_norm": 0.04059537127614021, "learning_rate": 0.01, "loss": 1.989, "step": 52704 }, { "epoch": 5.413063571942077, "grad_norm": 0.2159087210893631, "learning_rate": 0.01, "loss": 1.9993, "step": 52707 }, { "epoch": 5.413371675053918, "grad_norm": 0.07151810079813004, "learning_rate": 0.01, "loss": 1.9845, "step": 52710 }, { "epoch": 5.4136797781657595, "grad_norm": 0.09309381246566772, "learning_rate": 0.01, "loss": 2.0023, "step": 52713 }, { "epoch": 5.413987881277601, "grad_norm": 0.039323315024375916, "learning_rate": 0.01, "loss": 1.972, "step": 52716 }, { "epoch": 5.414295984389442, "grad_norm": 0.09738846868276596, "learning_rate": 0.01, "loss": 2.0016, "step": 52719 }, { "epoch": 5.414604087501283, "grad_norm": 0.07482850551605225, "learning_rate": 0.01, "loss": 2.0146, "step": 52722 }, { "epoch": 5.4149121906131255, "grad_norm": 0.05960662290453911, "learning_rate": 0.01, "loss": 1.9899, "step": 52725 }, { "epoch": 5.415220293724967, "grad_norm": 0.061568450182676315, "learning_rate": 0.01, "loss": 2.0127, "step": 52728 }, { "epoch": 5.415528396836808, "grad_norm": 0.05249075964093208, "learning_rate": 0.01, "loss": 2.0298, "step": 52731 }, { "epoch": 5.415836499948649, "grad_norm": 0.04260426387190819, "learning_rate": 0.01, "loss": 2.0029, "step": 52734 }, { "epoch": 5.4161446030604905, "grad_norm": 0.035422783344984055, "learning_rate": 0.01, "loss": 1.9778, "step": 52737 }, { "epoch": 5.416452706172333, "grad_norm": 0.03902021422982216, "learning_rate": 0.01, "loss": 1.9983, "step": 52740 }, { "epoch": 5.416760809284174, "grad_norm": 0.04080606997013092, "learning_rate": 0.01, "loss": 2.0131, "step": 52743 }, { "epoch": 5.417068912396015, "grad_norm": 0.04044007509946823, "learning_rate": 0.01, "loss": 1.9852, "step": 52746 }, { "epoch": 5.417377015507856, "grad_norm": 0.04506811872124672, "learning_rate": 0.01, "loss": 2.0027, "step": 52749 }, { "epoch": 5.417685118619698, "grad_norm": 0.053388748317956924, "learning_rate": 0.01, "loss": 1.9848, "step": 52752 }, { "epoch": 5.41799322173154, "grad_norm": 0.09019593149423599, "learning_rate": 0.01, "loss": 1.9914, "step": 52755 }, { "epoch": 5.418301324843381, "grad_norm": 0.12063855677843094, "learning_rate": 0.01, "loss": 2.0137, "step": 52758 }, { "epoch": 5.418609427955222, "grad_norm": 0.07498898357152939, "learning_rate": 0.01, "loss": 1.9965, "step": 52761 }, { "epoch": 5.418917531067064, "grad_norm": 0.07565269619226456, "learning_rate": 0.01, "loss": 2.0146, "step": 52764 }, { "epoch": 5.419225634178905, "grad_norm": 0.037328194826841354, "learning_rate": 0.01, "loss": 1.9905, "step": 52767 }, { "epoch": 5.419533737290747, "grad_norm": 0.059995539486408234, "learning_rate": 0.01, "loss": 2.0149, "step": 52770 }, { "epoch": 5.419841840402588, "grad_norm": 0.18053491413593292, "learning_rate": 0.01, "loss": 2.0116, "step": 52773 }, { "epoch": 5.42014994351443, "grad_norm": 0.1632491648197174, "learning_rate": 0.01, "loss": 1.9898, "step": 52776 }, { "epoch": 5.420458046626271, "grad_norm": 0.06863465160131454, "learning_rate": 0.01, "loss": 1.9998, "step": 52779 }, { "epoch": 5.420766149738112, "grad_norm": 0.05202522501349449, "learning_rate": 0.01, "loss": 1.9974, "step": 52782 }, { "epoch": 5.421074252849953, "grad_norm": 0.039888009428977966, "learning_rate": 0.01, "loss": 1.9838, "step": 52785 }, { "epoch": 5.4213823559617955, "grad_norm": 0.04122290015220642, "learning_rate": 0.01, "loss": 2.004, "step": 52788 }, { "epoch": 5.421690459073637, "grad_norm": 0.03863810375332832, "learning_rate": 0.01, "loss": 1.9912, "step": 52791 }, { "epoch": 5.421998562185478, "grad_norm": 0.04215069115161896, "learning_rate": 0.01, "loss": 1.9936, "step": 52794 }, { "epoch": 5.422306665297319, "grad_norm": 0.09779973328113556, "learning_rate": 0.01, "loss": 2.0071, "step": 52797 }, { "epoch": 5.422614768409161, "grad_norm": 0.0620935894548893, "learning_rate": 0.01, "loss": 1.9928, "step": 52800 }, { "epoch": 5.422922871521003, "grad_norm": 0.05332612618803978, "learning_rate": 0.01, "loss": 1.9945, "step": 52803 }, { "epoch": 5.423230974632844, "grad_norm": 0.04671257734298706, "learning_rate": 0.01, "loss": 1.9965, "step": 52806 }, { "epoch": 5.423539077744685, "grad_norm": 0.03246668353676796, "learning_rate": 0.01, "loss": 2.0076, "step": 52809 }, { "epoch": 5.4238471808565265, "grad_norm": 0.04667939245700836, "learning_rate": 0.01, "loss": 2.0136, "step": 52812 }, { "epoch": 5.424155283968368, "grad_norm": 0.08633271604776382, "learning_rate": 0.01, "loss": 2.0249, "step": 52815 }, { "epoch": 5.42446338708021, "grad_norm": 0.07084902375936508, "learning_rate": 0.01, "loss": 2.0185, "step": 52818 }, { "epoch": 5.424771490192051, "grad_norm": 0.05388319492340088, "learning_rate": 0.01, "loss": 1.9986, "step": 52821 }, { "epoch": 5.4250795933038924, "grad_norm": 0.05652158334851265, "learning_rate": 0.01, "loss": 1.9961, "step": 52824 }, { "epoch": 5.425387696415734, "grad_norm": 0.09929019957780838, "learning_rate": 0.01, "loss": 1.9703, "step": 52827 }, { "epoch": 5.425695799527575, "grad_norm": 0.1725517362356186, "learning_rate": 0.01, "loss": 2.0257, "step": 52830 }, { "epoch": 5.426003902639417, "grad_norm": 0.06725597381591797, "learning_rate": 0.01, "loss": 1.9997, "step": 52833 }, { "epoch": 5.426312005751258, "grad_norm": 0.03839759901165962, "learning_rate": 0.01, "loss": 1.9665, "step": 52836 }, { "epoch": 5.4266201088631, "grad_norm": 0.03678586333990097, "learning_rate": 0.01, "loss": 1.9787, "step": 52839 }, { "epoch": 5.426928211974941, "grad_norm": 0.04450935125350952, "learning_rate": 0.01, "loss": 2.0098, "step": 52842 }, { "epoch": 5.427236315086782, "grad_norm": 0.05059666931629181, "learning_rate": 0.01, "loss": 2.0007, "step": 52845 }, { "epoch": 5.427544418198623, "grad_norm": 0.05548638850450516, "learning_rate": 0.01, "loss": 1.9928, "step": 52848 }, { "epoch": 5.427852521310466, "grad_norm": 0.03727211058139801, "learning_rate": 0.01, "loss": 2.0055, "step": 52851 }, { "epoch": 5.428160624422307, "grad_norm": 0.1027202233672142, "learning_rate": 0.01, "loss": 2.0112, "step": 52854 }, { "epoch": 5.428468727534148, "grad_norm": 0.07482005655765533, "learning_rate": 0.01, "loss": 1.9833, "step": 52857 }, { "epoch": 5.428776830645989, "grad_norm": 0.06325170397758484, "learning_rate": 0.01, "loss": 2.0173, "step": 52860 }, { "epoch": 5.429084933757831, "grad_norm": 0.11125578731298447, "learning_rate": 0.01, "loss": 1.9847, "step": 52863 }, { "epoch": 5.429393036869673, "grad_norm": 0.07175584882497787, "learning_rate": 0.01, "loss": 2.0186, "step": 52866 }, { "epoch": 5.429701139981514, "grad_norm": 0.03667625039815903, "learning_rate": 0.01, "loss": 1.9743, "step": 52869 }, { "epoch": 5.430009243093355, "grad_norm": 0.0515107661485672, "learning_rate": 0.01, "loss": 1.9745, "step": 52872 }, { "epoch": 5.430317346205197, "grad_norm": 0.09832940995693207, "learning_rate": 0.01, "loss": 1.991, "step": 52875 }, { "epoch": 5.430625449317038, "grad_norm": 0.06182995066046715, "learning_rate": 0.01, "loss": 1.9875, "step": 52878 }, { "epoch": 5.430933552428879, "grad_norm": 0.1432095468044281, "learning_rate": 0.01, "loss": 1.9853, "step": 52881 }, { "epoch": 5.431241655540721, "grad_norm": 0.11832471191883087, "learning_rate": 0.01, "loss": 1.9924, "step": 52884 }, { "epoch": 5.4315497586525625, "grad_norm": 0.07653117924928665, "learning_rate": 0.01, "loss": 1.9937, "step": 52887 }, { "epoch": 5.431857861764404, "grad_norm": 0.06316263228654861, "learning_rate": 0.01, "loss": 1.9854, "step": 52890 }, { "epoch": 5.432165964876245, "grad_norm": 0.058272287249565125, "learning_rate": 0.01, "loss": 1.9918, "step": 52893 }, { "epoch": 5.432474067988086, "grad_norm": 0.06886950880289078, "learning_rate": 0.01, "loss": 1.987, "step": 52896 }, { "epoch": 5.4327821710999284, "grad_norm": 0.053393036127090454, "learning_rate": 0.01, "loss": 2.0044, "step": 52899 }, { "epoch": 5.43309027421177, "grad_norm": 0.06651714444160461, "learning_rate": 0.01, "loss": 2.0033, "step": 52902 }, { "epoch": 5.433398377323611, "grad_norm": 0.04366718977689743, "learning_rate": 0.01, "loss": 1.9734, "step": 52905 }, { "epoch": 5.433706480435452, "grad_norm": 0.040242016315460205, "learning_rate": 0.01, "loss": 2.0101, "step": 52908 }, { "epoch": 5.4340145835472935, "grad_norm": 0.0598304383456707, "learning_rate": 0.01, "loss": 1.9785, "step": 52911 }, { "epoch": 5.434322686659136, "grad_norm": 0.17316143214702606, "learning_rate": 0.01, "loss": 1.9767, "step": 52914 }, { "epoch": 5.434630789770977, "grad_norm": 0.13133604824543, "learning_rate": 0.01, "loss": 1.9819, "step": 52917 }, { "epoch": 5.434938892882818, "grad_norm": 0.0751945972442627, "learning_rate": 0.01, "loss": 1.9745, "step": 52920 }, { "epoch": 5.435246995994659, "grad_norm": 0.06649640202522278, "learning_rate": 0.01, "loss": 1.9884, "step": 52923 }, { "epoch": 5.435555099106501, "grad_norm": 0.13051864504814148, "learning_rate": 0.01, "loss": 1.9994, "step": 52926 }, { "epoch": 5.435863202218343, "grad_norm": 0.06402359157800674, "learning_rate": 0.01, "loss": 1.9606, "step": 52929 }, { "epoch": 5.436171305330184, "grad_norm": 0.06441336125135422, "learning_rate": 0.01, "loss": 1.9815, "step": 52932 }, { "epoch": 5.436479408442025, "grad_norm": 0.09019657224416733, "learning_rate": 0.01, "loss": 1.9954, "step": 52935 }, { "epoch": 5.436787511553867, "grad_norm": 0.04703563451766968, "learning_rate": 0.01, "loss": 1.9938, "step": 52938 }, { "epoch": 5.437095614665708, "grad_norm": 0.03909967467188835, "learning_rate": 0.01, "loss": 1.9879, "step": 52941 }, { "epoch": 5.437403717777549, "grad_norm": 0.05746940150856972, "learning_rate": 0.01, "loss": 1.9935, "step": 52944 }, { "epoch": 5.437711820889391, "grad_norm": 0.11890576034784317, "learning_rate": 0.01, "loss": 2.0062, "step": 52947 }, { "epoch": 5.438019924001233, "grad_norm": 0.07725408673286438, "learning_rate": 0.01, "loss": 1.9977, "step": 52950 }, { "epoch": 5.438328027113074, "grad_norm": 0.05112382769584656, "learning_rate": 0.01, "loss": 1.9891, "step": 52953 }, { "epoch": 5.438636130224915, "grad_norm": 0.08786381781101227, "learning_rate": 0.01, "loss": 1.9671, "step": 52956 }, { "epoch": 5.438944233336756, "grad_norm": 0.07547960430383682, "learning_rate": 0.01, "loss": 1.9953, "step": 52959 }, { "epoch": 5.4392523364485985, "grad_norm": 0.06332848221063614, "learning_rate": 0.01, "loss": 1.9917, "step": 52962 }, { "epoch": 5.43956043956044, "grad_norm": 0.09106019139289856, "learning_rate": 0.01, "loss": 2.0042, "step": 52965 }, { "epoch": 5.439868542672281, "grad_norm": 0.1387149691581726, "learning_rate": 0.01, "loss": 2.005, "step": 52968 }, { "epoch": 5.440176645784122, "grad_norm": 0.07359499484300613, "learning_rate": 0.01, "loss": 1.986, "step": 52971 }, { "epoch": 5.440484748895964, "grad_norm": 0.06554149091243744, "learning_rate": 0.01, "loss": 2.0092, "step": 52974 }, { "epoch": 5.440792852007805, "grad_norm": 0.08161085098981857, "learning_rate": 0.01, "loss": 1.9857, "step": 52977 }, { "epoch": 5.441100955119647, "grad_norm": 0.07675690948963165, "learning_rate": 0.01, "loss": 2.0121, "step": 52980 }, { "epoch": 5.441409058231488, "grad_norm": 0.04832153394818306, "learning_rate": 0.01, "loss": 1.9818, "step": 52983 }, { "epoch": 5.4417171613433295, "grad_norm": 0.03355338051915169, "learning_rate": 0.01, "loss": 1.9913, "step": 52986 }, { "epoch": 5.442025264455171, "grad_norm": 0.03371288999915123, "learning_rate": 0.01, "loss": 1.9981, "step": 52989 }, { "epoch": 5.442333367567012, "grad_norm": 0.11185171455144882, "learning_rate": 0.01, "loss": 1.9831, "step": 52992 }, { "epoch": 5.442641470678854, "grad_norm": 0.059647444635629654, "learning_rate": 0.01, "loss": 1.9779, "step": 52995 }, { "epoch": 5.442949573790695, "grad_norm": 0.03346579894423485, "learning_rate": 0.01, "loss": 1.9663, "step": 52998 }, { "epoch": 5.443257676902537, "grad_norm": 0.0569952167570591, "learning_rate": 0.01, "loss": 1.9747, "step": 53001 }, { "epoch": 5.443565780014378, "grad_norm": 0.07441236078739166, "learning_rate": 0.01, "loss": 1.991, "step": 53004 }, { "epoch": 5.443873883126219, "grad_norm": 0.11744049191474915, "learning_rate": 0.01, "loss": 1.9986, "step": 53007 }, { "epoch": 5.444181986238061, "grad_norm": 0.04918253421783447, "learning_rate": 0.01, "loss": 2.0148, "step": 53010 }, { "epoch": 5.444490089349903, "grad_norm": 0.03746315464377403, "learning_rate": 0.01, "loss": 2.0005, "step": 53013 }, { "epoch": 5.444798192461744, "grad_norm": 0.055012766271829605, "learning_rate": 0.01, "loss": 1.998, "step": 53016 }, { "epoch": 5.445106295573585, "grad_norm": 0.06424736231565475, "learning_rate": 0.01, "loss": 1.998, "step": 53019 }, { "epoch": 5.445414398685426, "grad_norm": 0.09382134675979614, "learning_rate": 0.01, "loss": 2.0071, "step": 53022 }, { "epoch": 5.445722501797269, "grad_norm": 0.048184242099523544, "learning_rate": 0.01, "loss": 1.985, "step": 53025 }, { "epoch": 5.44603060490911, "grad_norm": 0.05549463629722595, "learning_rate": 0.01, "loss": 2.0039, "step": 53028 }, { "epoch": 5.446338708020951, "grad_norm": 0.10756690055131912, "learning_rate": 0.01, "loss": 2.0116, "step": 53031 }, { "epoch": 5.446646811132792, "grad_norm": 0.03808826580643654, "learning_rate": 0.01, "loss": 1.9782, "step": 53034 }, { "epoch": 5.446954914244634, "grad_norm": 0.050678886473178864, "learning_rate": 0.01, "loss": 1.9925, "step": 53037 }, { "epoch": 5.447263017356475, "grad_norm": 0.03573575243353844, "learning_rate": 0.01, "loss": 2.0233, "step": 53040 }, { "epoch": 5.447571120468317, "grad_norm": 0.05302232876420021, "learning_rate": 0.01, "loss": 1.9904, "step": 53043 }, { "epoch": 5.447879223580158, "grad_norm": 0.03617763891816139, "learning_rate": 0.01, "loss": 1.9862, "step": 53046 }, { "epoch": 5.448187326692, "grad_norm": 0.039550527930259705, "learning_rate": 0.01, "loss": 2.0038, "step": 53049 }, { "epoch": 5.448495429803841, "grad_norm": 0.041170112788677216, "learning_rate": 0.01, "loss": 1.9428, "step": 53052 }, { "epoch": 5.448803532915682, "grad_norm": 0.04932108521461487, "learning_rate": 0.01, "loss": 2.0008, "step": 53055 }, { "epoch": 5.449111636027524, "grad_norm": 0.03305281326174736, "learning_rate": 0.01, "loss": 2.0141, "step": 53058 }, { "epoch": 5.4494197391393655, "grad_norm": 0.04628564044833183, "learning_rate": 0.01, "loss": 2.009, "step": 53061 }, { "epoch": 5.449727842251207, "grad_norm": 0.04561088606715202, "learning_rate": 0.01, "loss": 1.9911, "step": 53064 }, { "epoch": 5.450035945363048, "grad_norm": 0.11011772602796555, "learning_rate": 0.01, "loss": 1.9988, "step": 53067 }, { "epoch": 5.450344048474889, "grad_norm": 0.06649364531040192, "learning_rate": 0.01, "loss": 2.0095, "step": 53070 }, { "epoch": 5.450652151586731, "grad_norm": 0.1190907210111618, "learning_rate": 0.01, "loss": 1.9776, "step": 53073 }, { "epoch": 5.450960254698573, "grad_norm": 0.03680254891514778, "learning_rate": 0.01, "loss": 2.0242, "step": 53076 }, { "epoch": 5.451268357810414, "grad_norm": 0.04634016007184982, "learning_rate": 0.01, "loss": 2.0011, "step": 53079 }, { "epoch": 5.451576460922255, "grad_norm": 0.04958662390708923, "learning_rate": 0.01, "loss": 2.0121, "step": 53082 }, { "epoch": 5.4518845640340965, "grad_norm": 0.04827871546149254, "learning_rate": 0.01, "loss": 2.0159, "step": 53085 }, { "epoch": 5.452192667145939, "grad_norm": 0.10182766616344452, "learning_rate": 0.01, "loss": 2.0107, "step": 53088 }, { "epoch": 5.45250077025778, "grad_norm": 0.08067356050014496, "learning_rate": 0.01, "loss": 1.9764, "step": 53091 }, { "epoch": 5.452808873369621, "grad_norm": 0.06457395106554031, "learning_rate": 0.01, "loss": 2.0003, "step": 53094 }, { "epoch": 5.453116976481462, "grad_norm": 0.09059132635593414, "learning_rate": 0.01, "loss": 1.9763, "step": 53097 }, { "epoch": 5.453425079593304, "grad_norm": 0.05704977363348007, "learning_rate": 0.01, "loss": 1.9937, "step": 53100 }, { "epoch": 5.453733182705145, "grad_norm": 0.09887251257896423, "learning_rate": 0.01, "loss": 1.9857, "step": 53103 }, { "epoch": 5.454041285816987, "grad_norm": 0.03840193152427673, "learning_rate": 0.01, "loss": 1.9847, "step": 53106 }, { "epoch": 5.454349388928828, "grad_norm": 0.08743079751729965, "learning_rate": 0.01, "loss": 1.9904, "step": 53109 }, { "epoch": 5.45465749204067, "grad_norm": 0.054274316877126694, "learning_rate": 0.01, "loss": 2.0035, "step": 53112 }, { "epoch": 5.454965595152511, "grad_norm": 0.033519454300403595, "learning_rate": 0.01, "loss": 1.9915, "step": 53115 }, { "epoch": 5.455273698264352, "grad_norm": 0.1241462305188179, "learning_rate": 0.01, "loss": 2.003, "step": 53118 }, { "epoch": 5.455581801376194, "grad_norm": 0.11901737749576569, "learning_rate": 0.01, "loss": 2.0022, "step": 53121 }, { "epoch": 5.455889904488036, "grad_norm": 0.06983425468206406, "learning_rate": 0.01, "loss": 1.9779, "step": 53124 }, { "epoch": 5.456198007599877, "grad_norm": 0.07202869653701782, "learning_rate": 0.01, "loss": 1.9962, "step": 53127 }, { "epoch": 5.456506110711718, "grad_norm": 0.049013327807188034, "learning_rate": 0.01, "loss": 1.9848, "step": 53130 }, { "epoch": 5.456814213823559, "grad_norm": 0.04855305328965187, "learning_rate": 0.01, "loss": 2.0079, "step": 53133 }, { "epoch": 5.457122316935401, "grad_norm": 0.05479707196354866, "learning_rate": 0.01, "loss": 1.9877, "step": 53136 }, { "epoch": 5.457430420047243, "grad_norm": 0.04120011255145073, "learning_rate": 0.01, "loss": 2.0043, "step": 53139 }, { "epoch": 5.457738523159084, "grad_norm": 0.11232541501522064, "learning_rate": 0.01, "loss": 1.9851, "step": 53142 }, { "epoch": 5.458046626270925, "grad_norm": 0.0826067104935646, "learning_rate": 0.01, "loss": 2.0187, "step": 53145 }, { "epoch": 5.458354729382767, "grad_norm": 0.0358428992331028, "learning_rate": 0.01, "loss": 1.9936, "step": 53148 }, { "epoch": 5.458662832494608, "grad_norm": 0.06724508851766586, "learning_rate": 0.01, "loss": 2.0097, "step": 53151 }, { "epoch": 5.45897093560645, "grad_norm": 0.05402735620737076, "learning_rate": 0.01, "loss": 1.9626, "step": 53154 }, { "epoch": 5.459279038718291, "grad_norm": 0.10397180914878845, "learning_rate": 0.01, "loss": 2.0058, "step": 53157 }, { "epoch": 5.4595871418301325, "grad_norm": 0.16697724163532257, "learning_rate": 0.01, "loss": 2.0045, "step": 53160 }, { "epoch": 5.459895244941974, "grad_norm": 0.12541674077510834, "learning_rate": 0.01, "loss": 1.9965, "step": 53163 }, { "epoch": 5.460203348053815, "grad_norm": 0.11177428066730499, "learning_rate": 0.01, "loss": 1.9943, "step": 53166 }, { "epoch": 5.460511451165657, "grad_norm": 0.12203194200992584, "learning_rate": 0.01, "loss": 2.0014, "step": 53169 }, { "epoch": 5.460819554277498, "grad_norm": 0.03482053428888321, "learning_rate": 0.01, "loss": 1.9957, "step": 53172 }, { "epoch": 5.46112765738934, "grad_norm": 0.03225795179605484, "learning_rate": 0.01, "loss": 1.9601, "step": 53175 }, { "epoch": 5.461435760501181, "grad_norm": 0.03760567680001259, "learning_rate": 0.01, "loss": 2.0061, "step": 53178 }, { "epoch": 5.461743863613022, "grad_norm": 0.12543334066867828, "learning_rate": 0.01, "loss": 1.9754, "step": 53181 }, { "epoch": 5.462051966724864, "grad_norm": 0.04533065855503082, "learning_rate": 0.01, "loss": 2.0104, "step": 53184 }, { "epoch": 5.462360069836706, "grad_norm": 0.07519727945327759, "learning_rate": 0.01, "loss": 1.994, "step": 53187 }, { "epoch": 5.462668172948547, "grad_norm": 0.11682818084955215, "learning_rate": 0.01, "loss": 1.9819, "step": 53190 }, { "epoch": 5.462976276060388, "grad_norm": 0.043735865503549576, "learning_rate": 0.01, "loss": 2.0068, "step": 53193 }, { "epoch": 5.463284379172229, "grad_norm": 0.09902381896972656, "learning_rate": 0.01, "loss": 2.0089, "step": 53196 }, { "epoch": 5.463592482284071, "grad_norm": 0.05974416807293892, "learning_rate": 0.01, "loss": 1.9999, "step": 53199 }, { "epoch": 5.463900585395913, "grad_norm": 0.07005894929170609, "learning_rate": 0.01, "loss": 1.9903, "step": 53202 }, { "epoch": 5.464208688507754, "grad_norm": 0.055365189909935, "learning_rate": 0.01, "loss": 1.9798, "step": 53205 }, { "epoch": 5.464516791619595, "grad_norm": 0.07333406060934067, "learning_rate": 0.01, "loss": 1.9972, "step": 53208 }, { "epoch": 5.464824894731437, "grad_norm": 0.03612162545323372, "learning_rate": 0.01, "loss": 1.9946, "step": 53211 }, { "epoch": 5.465132997843278, "grad_norm": 0.036687567830085754, "learning_rate": 0.01, "loss": 1.9966, "step": 53214 }, { "epoch": 5.46544110095512, "grad_norm": 0.0842447429895401, "learning_rate": 0.01, "loss": 2.0063, "step": 53217 }, { "epoch": 5.465749204066961, "grad_norm": 0.06415588408708572, "learning_rate": 0.01, "loss": 1.9863, "step": 53220 }, { "epoch": 5.466057307178803, "grad_norm": 0.0578574612736702, "learning_rate": 0.01, "loss": 2.0036, "step": 53223 }, { "epoch": 5.466365410290644, "grad_norm": 0.04436985403299332, "learning_rate": 0.01, "loss": 1.9813, "step": 53226 }, { "epoch": 5.466673513402485, "grad_norm": 0.1035226359963417, "learning_rate": 0.01, "loss": 1.9639, "step": 53229 }, { "epoch": 5.466981616514326, "grad_norm": 0.04867973551154137, "learning_rate": 0.01, "loss": 2.0105, "step": 53232 }, { "epoch": 5.4672897196261685, "grad_norm": 0.0710483267903328, "learning_rate": 0.01, "loss": 1.9955, "step": 53235 }, { "epoch": 5.46759782273801, "grad_norm": 0.052401233464479446, "learning_rate": 0.01, "loss": 2.0043, "step": 53238 }, { "epoch": 5.467905925849851, "grad_norm": 0.036160893738269806, "learning_rate": 0.01, "loss": 1.9953, "step": 53241 }, { "epoch": 5.468214028961692, "grad_norm": 0.04797692224383354, "learning_rate": 0.01, "loss": 1.9814, "step": 53244 }, { "epoch": 5.4685221320735335, "grad_norm": 0.037899672985076904, "learning_rate": 0.01, "loss": 1.9793, "step": 53247 }, { "epoch": 5.468830235185376, "grad_norm": 0.054781220853328705, "learning_rate": 0.01, "loss": 1.9936, "step": 53250 }, { "epoch": 5.469138338297217, "grad_norm": 0.09595558792352676, "learning_rate": 0.01, "loss": 1.9983, "step": 53253 }, { "epoch": 5.469446441409058, "grad_norm": 0.0511094331741333, "learning_rate": 0.01, "loss": 2.0078, "step": 53256 }, { "epoch": 5.4697545445208995, "grad_norm": 0.05115760117769241, "learning_rate": 0.01, "loss": 1.9845, "step": 53259 }, { "epoch": 5.470062647632741, "grad_norm": 0.05593256279826164, "learning_rate": 0.01, "loss": 2.019, "step": 53262 }, { "epoch": 5.470370750744583, "grad_norm": 0.048911042511463165, "learning_rate": 0.01, "loss": 1.9953, "step": 53265 }, { "epoch": 5.470678853856424, "grad_norm": 0.05227271467447281, "learning_rate": 0.01, "loss": 2.0013, "step": 53268 }, { "epoch": 5.470986956968265, "grad_norm": 0.11259990185499191, "learning_rate": 0.01, "loss": 1.987, "step": 53271 }, { "epoch": 5.471295060080107, "grad_norm": 0.04125455394387245, "learning_rate": 0.01, "loss": 2.0069, "step": 53274 }, { "epoch": 5.471603163191948, "grad_norm": 0.11485335975885391, "learning_rate": 0.01, "loss": 1.9832, "step": 53277 }, { "epoch": 5.47191126630379, "grad_norm": 0.07149787247180939, "learning_rate": 0.01, "loss": 1.9752, "step": 53280 }, { "epoch": 5.472219369415631, "grad_norm": 0.05775618925690651, "learning_rate": 0.01, "loss": 2.0204, "step": 53283 }, { "epoch": 5.472527472527473, "grad_norm": 0.04515690356492996, "learning_rate": 0.01, "loss": 1.99, "step": 53286 }, { "epoch": 5.472835575639314, "grad_norm": 0.0433974415063858, "learning_rate": 0.01, "loss": 1.9746, "step": 53289 }, { "epoch": 5.473143678751155, "grad_norm": 0.03472794219851494, "learning_rate": 0.01, "loss": 2.0019, "step": 53292 }, { "epoch": 5.473451781862996, "grad_norm": 0.04648016393184662, "learning_rate": 0.01, "loss": 1.9567, "step": 53295 }, { "epoch": 5.473759884974839, "grad_norm": 0.048154812306165695, "learning_rate": 0.01, "loss": 1.988, "step": 53298 }, { "epoch": 5.47406798808668, "grad_norm": 0.040329910814762115, "learning_rate": 0.01, "loss": 1.9847, "step": 53301 }, { "epoch": 5.474376091198521, "grad_norm": 0.10805533081293106, "learning_rate": 0.01, "loss": 1.9859, "step": 53304 }, { "epoch": 5.474684194310362, "grad_norm": 0.04139288142323494, "learning_rate": 0.01, "loss": 1.9787, "step": 53307 }, { "epoch": 5.474992297422204, "grad_norm": 0.08323890715837479, "learning_rate": 0.01, "loss": 1.9751, "step": 53310 }, { "epoch": 5.475300400534046, "grad_norm": 0.07098772376775742, "learning_rate": 0.01, "loss": 2.001, "step": 53313 }, { "epoch": 5.475608503645887, "grad_norm": 0.09589933604001999, "learning_rate": 0.01, "loss": 2.0163, "step": 53316 }, { "epoch": 5.475916606757728, "grad_norm": 0.09487750381231308, "learning_rate": 0.01, "loss": 1.9941, "step": 53319 }, { "epoch": 5.4762247098695696, "grad_norm": 0.04062903672456741, "learning_rate": 0.01, "loss": 1.9845, "step": 53322 }, { "epoch": 5.476532812981411, "grad_norm": 0.036831121891736984, "learning_rate": 0.01, "loss": 1.9833, "step": 53325 }, { "epoch": 5.476840916093252, "grad_norm": 0.05887876823544502, "learning_rate": 0.01, "loss": 1.9974, "step": 53328 }, { "epoch": 5.477149019205094, "grad_norm": 0.07680708169937134, "learning_rate": 0.01, "loss": 1.9925, "step": 53331 }, { "epoch": 5.4774571223169355, "grad_norm": 0.06624645739793777, "learning_rate": 0.01, "loss": 2.0242, "step": 53334 }, { "epoch": 5.477765225428777, "grad_norm": 0.051530107855796814, "learning_rate": 0.01, "loss": 1.9942, "step": 53337 }, { "epoch": 5.478073328540618, "grad_norm": 0.046162448823451996, "learning_rate": 0.01, "loss": 1.9859, "step": 53340 }, { "epoch": 5.478381431652459, "grad_norm": 0.038336243480443954, "learning_rate": 0.01, "loss": 2.0043, "step": 53343 }, { "epoch": 5.478689534764301, "grad_norm": 0.03402607515454292, "learning_rate": 0.01, "loss": 1.9678, "step": 53346 }, { "epoch": 5.478997637876143, "grad_norm": 0.10580262541770935, "learning_rate": 0.01, "loss": 1.9766, "step": 53349 }, { "epoch": 5.479305740987984, "grad_norm": 0.13053545355796814, "learning_rate": 0.01, "loss": 1.9978, "step": 53352 }, { "epoch": 5.479613844099825, "grad_norm": 0.050016142427921295, "learning_rate": 0.01, "loss": 1.9924, "step": 53355 }, { "epoch": 5.4799219472116665, "grad_norm": 0.03820059821009636, "learning_rate": 0.01, "loss": 1.9905, "step": 53358 }, { "epoch": 5.480230050323509, "grad_norm": 0.04353965446352959, "learning_rate": 0.01, "loss": 2.0286, "step": 53361 }, { "epoch": 5.48053815343535, "grad_norm": 0.04177290201187134, "learning_rate": 0.01, "loss": 2.0074, "step": 53364 }, { "epoch": 5.480846256547191, "grad_norm": 0.03511528670787811, "learning_rate": 0.01, "loss": 1.9861, "step": 53367 }, { "epoch": 5.481154359659032, "grad_norm": 0.10579612106084824, "learning_rate": 0.01, "loss": 1.9883, "step": 53370 }, { "epoch": 5.481462462770874, "grad_norm": 0.08428878337144852, "learning_rate": 0.01, "loss": 2.0179, "step": 53373 }, { "epoch": 5.481770565882716, "grad_norm": 0.045665886253118515, "learning_rate": 0.01, "loss": 1.9834, "step": 53376 }, { "epoch": 5.482078668994557, "grad_norm": 0.10956547409296036, "learning_rate": 0.01, "loss": 1.9911, "step": 53379 }, { "epoch": 5.482386772106398, "grad_norm": 0.07283762842416763, "learning_rate": 0.01, "loss": 2.0061, "step": 53382 }, { "epoch": 5.48269487521824, "grad_norm": 0.11426263302564621, "learning_rate": 0.01, "loss": 2.0013, "step": 53385 }, { "epoch": 5.483002978330081, "grad_norm": 0.06644517928361893, "learning_rate": 0.01, "loss": 1.9973, "step": 53388 }, { "epoch": 5.483311081441922, "grad_norm": 0.06233721226453781, "learning_rate": 0.01, "loss": 2.0111, "step": 53391 }, { "epoch": 5.483619184553764, "grad_norm": 0.06707890331745148, "learning_rate": 0.01, "loss": 2.0027, "step": 53394 }, { "epoch": 5.4839272876656056, "grad_norm": 0.06242217868566513, "learning_rate": 0.01, "loss": 1.9823, "step": 53397 }, { "epoch": 5.484235390777447, "grad_norm": 0.03913586214184761, "learning_rate": 0.01, "loss": 1.9915, "step": 53400 }, { "epoch": 5.484543493889288, "grad_norm": 0.10017285495996475, "learning_rate": 0.01, "loss": 1.9768, "step": 53403 }, { "epoch": 5.484851597001129, "grad_norm": 0.07849404960870743, "learning_rate": 0.01, "loss": 2.017, "step": 53406 }, { "epoch": 5.4851597001129715, "grad_norm": 0.06911784410476685, "learning_rate": 0.01, "loss": 2.0207, "step": 53409 }, { "epoch": 5.485467803224813, "grad_norm": 0.07752948254346848, "learning_rate": 0.01, "loss": 2.0029, "step": 53412 }, { "epoch": 5.485775906336654, "grad_norm": 0.0855301171541214, "learning_rate": 0.01, "loss": 1.9779, "step": 53415 }, { "epoch": 5.486084009448495, "grad_norm": 0.039153728634119034, "learning_rate": 0.01, "loss": 1.9984, "step": 53418 }, { "epoch": 5.4863921125603365, "grad_norm": 0.09871362149715424, "learning_rate": 0.01, "loss": 1.9849, "step": 53421 }, { "epoch": 5.486700215672179, "grad_norm": 0.05920962989330292, "learning_rate": 0.01, "loss": 1.9729, "step": 53424 }, { "epoch": 5.48700831878402, "grad_norm": 0.04103608801960945, "learning_rate": 0.01, "loss": 2.0363, "step": 53427 }, { "epoch": 5.487316421895861, "grad_norm": 0.08447948098182678, "learning_rate": 0.01, "loss": 1.9856, "step": 53430 }, { "epoch": 5.4876245250077025, "grad_norm": 0.055456507951021194, "learning_rate": 0.01, "loss": 1.9746, "step": 53433 }, { "epoch": 5.487932628119544, "grad_norm": 0.05828576907515526, "learning_rate": 0.01, "loss": 1.983, "step": 53436 }, { "epoch": 5.488240731231386, "grad_norm": 0.09117809683084488, "learning_rate": 0.01, "loss": 2.0269, "step": 53439 }, { "epoch": 5.488548834343227, "grad_norm": 0.08137334883213043, "learning_rate": 0.01, "loss": 1.9983, "step": 53442 }, { "epoch": 5.488856937455068, "grad_norm": 0.08830280601978302, "learning_rate": 0.01, "loss": 2.0016, "step": 53445 }, { "epoch": 5.48916504056691, "grad_norm": 0.04386366158723831, "learning_rate": 0.01, "loss": 1.9987, "step": 53448 }, { "epoch": 5.489473143678751, "grad_norm": 0.0500885471701622, "learning_rate": 0.01, "loss": 1.997, "step": 53451 }, { "epoch": 5.489781246790592, "grad_norm": 0.044326718896627426, "learning_rate": 0.01, "loss": 1.9894, "step": 53454 }, { "epoch": 5.490089349902434, "grad_norm": 0.1106194257736206, "learning_rate": 0.01, "loss": 1.9871, "step": 53457 }, { "epoch": 5.490397453014276, "grad_norm": 0.05222434550523758, "learning_rate": 0.01, "loss": 1.9999, "step": 53460 }, { "epoch": 5.490705556126117, "grad_norm": 0.08016736060380936, "learning_rate": 0.01, "loss": 1.9846, "step": 53463 }, { "epoch": 5.491013659237958, "grad_norm": 0.06921573728322983, "learning_rate": 0.01, "loss": 2.0048, "step": 53466 }, { "epoch": 5.491321762349799, "grad_norm": 0.10140072554349899, "learning_rate": 0.01, "loss": 1.9885, "step": 53469 }, { "epoch": 5.491629865461642, "grad_norm": 0.05930350720882416, "learning_rate": 0.01, "loss": 2.0103, "step": 53472 }, { "epoch": 5.491937968573483, "grad_norm": 0.054523758590221405, "learning_rate": 0.01, "loss": 1.9926, "step": 53475 }, { "epoch": 5.492246071685324, "grad_norm": 0.06033066287636757, "learning_rate": 0.01, "loss": 2.0146, "step": 53478 }, { "epoch": 5.492554174797165, "grad_norm": 0.11593367904424667, "learning_rate": 0.01, "loss": 1.9988, "step": 53481 }, { "epoch": 5.492862277909007, "grad_norm": 0.054470766335725784, "learning_rate": 0.01, "loss": 2.0093, "step": 53484 }, { "epoch": 5.493170381020848, "grad_norm": 0.05710428208112717, "learning_rate": 0.01, "loss": 1.9988, "step": 53487 }, { "epoch": 5.49347848413269, "grad_norm": 0.08135402947664261, "learning_rate": 0.01, "loss": 1.9895, "step": 53490 }, { "epoch": 5.493786587244531, "grad_norm": 0.13751734793186188, "learning_rate": 0.01, "loss": 2.0027, "step": 53493 }, { "epoch": 5.4940946903563725, "grad_norm": 0.07379119843244553, "learning_rate": 0.01, "loss": 1.9757, "step": 53496 }, { "epoch": 5.494402793468214, "grad_norm": 0.0595991387963295, "learning_rate": 0.01, "loss": 1.9961, "step": 53499 }, { "epoch": 5.494710896580055, "grad_norm": 0.05081510916352272, "learning_rate": 0.01, "loss": 1.9804, "step": 53502 }, { "epoch": 5.495018999691897, "grad_norm": 0.040366947650909424, "learning_rate": 0.01, "loss": 1.9703, "step": 53505 }, { "epoch": 5.4953271028037385, "grad_norm": 0.10154601186513901, "learning_rate": 0.01, "loss": 1.9979, "step": 53508 }, { "epoch": 5.49563520591558, "grad_norm": 0.0562141053378582, "learning_rate": 0.01, "loss": 2.0076, "step": 53511 }, { "epoch": 5.495943309027421, "grad_norm": 0.07368075847625732, "learning_rate": 0.01, "loss": 2.0106, "step": 53514 }, { "epoch": 5.496251412139262, "grad_norm": 0.04702366143465042, "learning_rate": 0.01, "loss": 1.9734, "step": 53517 }, { "epoch": 5.496559515251104, "grad_norm": 0.05341365188360214, "learning_rate": 0.01, "loss": 1.9943, "step": 53520 }, { "epoch": 5.496867618362946, "grad_norm": 0.10820145905017853, "learning_rate": 0.01, "loss": 2.0101, "step": 53523 }, { "epoch": 5.497175721474787, "grad_norm": 0.053429488092660904, "learning_rate": 0.01, "loss": 1.9723, "step": 53526 }, { "epoch": 5.497483824586628, "grad_norm": 0.1272108554840088, "learning_rate": 0.01, "loss": 1.9985, "step": 53529 }, { "epoch": 5.4977919276984695, "grad_norm": 0.043057285249233246, "learning_rate": 0.01, "loss": 1.9961, "step": 53532 }, { "epoch": 5.498100030810312, "grad_norm": 0.055451132357120514, "learning_rate": 0.01, "loss": 1.9723, "step": 53535 }, { "epoch": 5.498408133922153, "grad_norm": 0.11000920832157135, "learning_rate": 0.01, "loss": 2.0072, "step": 53538 }, { "epoch": 5.498716237033994, "grad_norm": 0.043610602617263794, "learning_rate": 0.01, "loss": 1.9832, "step": 53541 }, { "epoch": 5.499024340145835, "grad_norm": 0.12951324880123138, "learning_rate": 0.01, "loss": 1.9833, "step": 53544 }, { "epoch": 5.499332443257677, "grad_norm": 0.1155344694852829, "learning_rate": 0.01, "loss": 2.0095, "step": 53547 }, { "epoch": 5.499640546369518, "grad_norm": 0.04202111065387726, "learning_rate": 0.01, "loss": 1.9721, "step": 53550 }, { "epoch": 5.49994864948136, "grad_norm": 0.05797869712114334, "learning_rate": 0.01, "loss": 1.9742, "step": 53553 }, { "epoch": 5.500256752593201, "grad_norm": 0.055514171719551086, "learning_rate": 0.01, "loss": 2.002, "step": 53556 }, { "epoch": 5.500564855705043, "grad_norm": 0.032696306705474854, "learning_rate": 0.01, "loss": 1.9675, "step": 53559 }, { "epoch": 5.500872958816884, "grad_norm": 0.07791826128959656, "learning_rate": 0.01, "loss": 2.0007, "step": 53562 }, { "epoch": 5.501181061928725, "grad_norm": 0.12042553722858429, "learning_rate": 0.01, "loss": 1.9967, "step": 53565 }, { "epoch": 5.501489165040567, "grad_norm": 0.08734118938446045, "learning_rate": 0.01, "loss": 1.9866, "step": 53568 }, { "epoch": 5.5017972681524085, "grad_norm": 0.1427556276321411, "learning_rate": 0.01, "loss": 1.9747, "step": 53571 }, { "epoch": 5.50210537126425, "grad_norm": 0.05691046267747879, "learning_rate": 0.01, "loss": 1.9907, "step": 53574 }, { "epoch": 5.502413474376091, "grad_norm": 0.05428704619407654, "learning_rate": 0.01, "loss": 1.9803, "step": 53577 }, { "epoch": 5.502721577487932, "grad_norm": 0.07390681654214859, "learning_rate": 0.01, "loss": 1.9959, "step": 53580 }, { "epoch": 5.503029680599774, "grad_norm": 0.04650009423494339, "learning_rate": 0.01, "loss": 2.0132, "step": 53583 }, { "epoch": 5.503337783711616, "grad_norm": 0.06623617559671402, "learning_rate": 0.01, "loss": 2.0145, "step": 53586 }, { "epoch": 5.503645886823457, "grad_norm": 0.08254354447126389, "learning_rate": 0.01, "loss": 2.0073, "step": 53589 }, { "epoch": 5.503953989935298, "grad_norm": 0.07407466322183609, "learning_rate": 0.01, "loss": 2.0043, "step": 53592 }, { "epoch": 5.5042620930471395, "grad_norm": 0.05553009733557701, "learning_rate": 0.01, "loss": 1.9923, "step": 53595 }, { "epoch": 5.504570196158982, "grad_norm": 0.068050317466259, "learning_rate": 0.01, "loss": 1.9975, "step": 53598 }, { "epoch": 5.504878299270823, "grad_norm": 0.036538973450660706, "learning_rate": 0.01, "loss": 2.0111, "step": 53601 }, { "epoch": 5.505186402382664, "grad_norm": 0.04083050787448883, "learning_rate": 0.01, "loss": 1.9942, "step": 53604 }, { "epoch": 5.5054945054945055, "grad_norm": 0.03994308039546013, "learning_rate": 0.01, "loss": 1.9752, "step": 53607 }, { "epoch": 5.505802608606347, "grad_norm": 0.039935458451509476, "learning_rate": 0.01, "loss": 2.0083, "step": 53610 }, { "epoch": 5.506110711718188, "grad_norm": 0.09762649983167648, "learning_rate": 0.01, "loss": 1.9997, "step": 53613 }, { "epoch": 5.50641881483003, "grad_norm": 0.058254171162843704, "learning_rate": 0.01, "loss": 1.9831, "step": 53616 }, { "epoch": 5.506726917941871, "grad_norm": 0.11991842836141586, "learning_rate": 0.01, "loss": 2.0198, "step": 53619 }, { "epoch": 5.507035021053713, "grad_norm": 0.04026370868086815, "learning_rate": 0.01, "loss": 1.9698, "step": 53622 }, { "epoch": 5.507343124165554, "grad_norm": 0.03855542093515396, "learning_rate": 0.01, "loss": 2.0004, "step": 53625 }, { "epoch": 5.507651227277395, "grad_norm": 0.04439062252640724, "learning_rate": 0.01, "loss": 1.9776, "step": 53628 }, { "epoch": 5.507959330389237, "grad_norm": 0.09283201396465302, "learning_rate": 0.01, "loss": 1.9897, "step": 53631 }, { "epoch": 5.508267433501079, "grad_norm": 0.05495656281709671, "learning_rate": 0.01, "loss": 1.9517, "step": 53634 }, { "epoch": 5.50857553661292, "grad_norm": 0.08464661240577698, "learning_rate": 0.01, "loss": 1.9946, "step": 53637 }, { "epoch": 5.508883639724761, "grad_norm": 0.06856382638216019, "learning_rate": 0.01, "loss": 1.9549, "step": 53640 }, { "epoch": 5.509191742836602, "grad_norm": 0.10367941111326218, "learning_rate": 0.01, "loss": 2.0157, "step": 53643 }, { "epoch": 5.509499845948444, "grad_norm": 0.07614094018936157, "learning_rate": 0.01, "loss": 1.9837, "step": 53646 }, { "epoch": 5.509807949060286, "grad_norm": 0.07301559299230576, "learning_rate": 0.01, "loss": 1.9778, "step": 53649 }, { "epoch": 5.510116052172127, "grad_norm": 0.03928094357252121, "learning_rate": 0.01, "loss": 1.9903, "step": 53652 }, { "epoch": 5.510424155283968, "grad_norm": 0.04542843624949455, "learning_rate": 0.01, "loss": 2.0183, "step": 53655 }, { "epoch": 5.51073225839581, "grad_norm": 0.08662543445825577, "learning_rate": 0.01, "loss": 1.9868, "step": 53658 }, { "epoch": 5.511040361507651, "grad_norm": 0.045004189014434814, "learning_rate": 0.01, "loss": 2.0143, "step": 53661 }, { "epoch": 5.511348464619493, "grad_norm": 0.03661379590630531, "learning_rate": 0.01, "loss": 1.9887, "step": 53664 }, { "epoch": 5.511656567731334, "grad_norm": 0.05117429420351982, "learning_rate": 0.01, "loss": 1.9681, "step": 53667 }, { "epoch": 5.5119646708431755, "grad_norm": 0.06222992762923241, "learning_rate": 0.01, "loss": 1.9904, "step": 53670 }, { "epoch": 5.512272773955017, "grad_norm": 0.04274081811308861, "learning_rate": 0.01, "loss": 1.977, "step": 53673 }, { "epoch": 5.512580877066858, "grad_norm": 0.09774081408977509, "learning_rate": 0.01, "loss": 1.9789, "step": 53676 }, { "epoch": 5.512888980178699, "grad_norm": 0.15694968402385712, "learning_rate": 0.01, "loss": 1.9814, "step": 53679 }, { "epoch": 5.5131970832905415, "grad_norm": 0.06906448304653168, "learning_rate": 0.01, "loss": 1.9997, "step": 53682 }, { "epoch": 5.513505186402383, "grad_norm": 0.04059962183237076, "learning_rate": 0.01, "loss": 1.9894, "step": 53685 }, { "epoch": 5.513813289514224, "grad_norm": 0.03401316702365875, "learning_rate": 0.01, "loss": 1.9862, "step": 53688 }, { "epoch": 5.514121392626065, "grad_norm": 0.03932545334100723, "learning_rate": 0.01, "loss": 1.9991, "step": 53691 }, { "epoch": 5.514429495737907, "grad_norm": 0.04314441978931427, "learning_rate": 0.01, "loss": 1.9895, "step": 53694 }, { "epoch": 5.514737598849749, "grad_norm": 0.04282282292842865, "learning_rate": 0.01, "loss": 2.0311, "step": 53697 }, { "epoch": 5.51504570196159, "grad_norm": 0.07264575362205505, "learning_rate": 0.01, "loss": 1.9968, "step": 53700 }, { "epoch": 5.515353805073431, "grad_norm": 0.031450774520635605, "learning_rate": 0.01, "loss": 2.0088, "step": 53703 }, { "epoch": 5.5156619081852725, "grad_norm": 0.04431367665529251, "learning_rate": 0.01, "loss": 2.0218, "step": 53706 }, { "epoch": 5.515970011297114, "grad_norm": 0.059152550995349884, "learning_rate": 0.01, "loss": 1.9776, "step": 53709 }, { "epoch": 5.516278114408956, "grad_norm": 0.05627863109111786, "learning_rate": 0.01, "loss": 2.0023, "step": 53712 }, { "epoch": 5.516586217520797, "grad_norm": 0.12007083743810654, "learning_rate": 0.01, "loss": 2.003, "step": 53715 }, { "epoch": 5.516894320632638, "grad_norm": 0.04498304799199104, "learning_rate": 0.01, "loss": 2.0171, "step": 53718 }, { "epoch": 5.51720242374448, "grad_norm": 0.05469832196831703, "learning_rate": 0.01, "loss": 2.0195, "step": 53721 }, { "epoch": 5.517510526856321, "grad_norm": 0.12083622813224792, "learning_rate": 0.01, "loss": 1.9909, "step": 53724 }, { "epoch": 5.517818629968163, "grad_norm": 0.05532558634877205, "learning_rate": 0.01, "loss": 2.0263, "step": 53727 }, { "epoch": 5.518126733080004, "grad_norm": 0.0655357614159584, "learning_rate": 0.01, "loss": 2.0141, "step": 53730 }, { "epoch": 5.518434836191846, "grad_norm": 0.03844601288437843, "learning_rate": 0.01, "loss": 2.0126, "step": 53733 }, { "epoch": 5.518742939303687, "grad_norm": 0.08736705034971237, "learning_rate": 0.01, "loss": 1.9982, "step": 53736 }, { "epoch": 5.519051042415528, "grad_norm": 0.03798610344529152, "learning_rate": 0.01, "loss": 2.0017, "step": 53739 }, { "epoch": 5.519359145527369, "grad_norm": 0.08266885578632355, "learning_rate": 0.01, "loss": 1.9744, "step": 53742 }, { "epoch": 5.5196672486392115, "grad_norm": 0.12414561212062836, "learning_rate": 0.01, "loss": 1.9877, "step": 53745 }, { "epoch": 5.519975351751053, "grad_norm": 0.07735760509967804, "learning_rate": 0.01, "loss": 1.9847, "step": 53748 }, { "epoch": 5.520283454862894, "grad_norm": 0.07708865404129028, "learning_rate": 0.01, "loss": 1.9994, "step": 53751 }, { "epoch": 5.520591557974735, "grad_norm": 0.08443256467580795, "learning_rate": 0.01, "loss": 1.9776, "step": 53754 }, { "epoch": 5.520899661086577, "grad_norm": 0.061041250824928284, "learning_rate": 0.01, "loss": 2.0072, "step": 53757 }, { "epoch": 5.521207764198419, "grad_norm": 0.06206076219677925, "learning_rate": 0.01, "loss": 1.9791, "step": 53760 }, { "epoch": 5.52151586731026, "grad_norm": 0.07099471986293793, "learning_rate": 0.01, "loss": 1.9812, "step": 53763 }, { "epoch": 5.521823970422101, "grad_norm": 0.08134863525629044, "learning_rate": 0.01, "loss": 1.9825, "step": 53766 }, { "epoch": 5.5221320735339425, "grad_norm": 0.08037443459033966, "learning_rate": 0.01, "loss": 2.0215, "step": 53769 }, { "epoch": 5.522440176645784, "grad_norm": 0.06751944124698639, "learning_rate": 0.01, "loss": 2.0003, "step": 53772 }, { "epoch": 5.522748279757625, "grad_norm": 0.04069173336029053, "learning_rate": 0.01, "loss": 1.9942, "step": 53775 }, { "epoch": 5.523056382869467, "grad_norm": 0.04320038482546806, "learning_rate": 0.01, "loss": 2.016, "step": 53778 }, { "epoch": 5.5233644859813085, "grad_norm": 0.11609040200710297, "learning_rate": 0.01, "loss": 1.9983, "step": 53781 }, { "epoch": 5.52367258909315, "grad_norm": 0.050336502492427826, "learning_rate": 0.01, "loss": 1.9971, "step": 53784 }, { "epoch": 5.523980692204991, "grad_norm": 0.039348434656858444, "learning_rate": 0.01, "loss": 1.9847, "step": 53787 }, { "epoch": 5.524288795316833, "grad_norm": 0.08109190315008163, "learning_rate": 0.01, "loss": 1.961, "step": 53790 }, { "epoch": 5.524596898428674, "grad_norm": 0.07181521505117416, "learning_rate": 0.01, "loss": 1.9684, "step": 53793 }, { "epoch": 5.524905001540516, "grad_norm": 0.04678622633218765, "learning_rate": 0.01, "loss": 1.967, "step": 53796 }, { "epoch": 5.525213104652357, "grad_norm": 0.079826720058918, "learning_rate": 0.01, "loss": 2.0056, "step": 53799 }, { "epoch": 5.525521207764198, "grad_norm": 0.08414063602685928, "learning_rate": 0.01, "loss": 1.9946, "step": 53802 }, { "epoch": 5.5258293108760395, "grad_norm": 0.04829956591129303, "learning_rate": 0.01, "loss": 1.9831, "step": 53805 }, { "epoch": 5.526137413987882, "grad_norm": 0.0862230584025383, "learning_rate": 0.01, "loss": 1.9738, "step": 53808 }, { "epoch": 5.526445517099723, "grad_norm": 0.06035618856549263, "learning_rate": 0.01, "loss": 2.0033, "step": 53811 }, { "epoch": 5.526753620211564, "grad_norm": 0.04363265261054039, "learning_rate": 0.01, "loss": 1.9914, "step": 53814 }, { "epoch": 5.527061723323405, "grad_norm": 0.06226656585931778, "learning_rate": 0.01, "loss": 1.9977, "step": 53817 }, { "epoch": 5.527369826435247, "grad_norm": 0.10821603238582611, "learning_rate": 0.01, "loss": 2.0045, "step": 53820 }, { "epoch": 5.527677929547089, "grad_norm": 0.06734979152679443, "learning_rate": 0.01, "loss": 2.0031, "step": 53823 }, { "epoch": 5.52798603265893, "grad_norm": 0.07929663360118866, "learning_rate": 0.01, "loss": 1.9829, "step": 53826 }, { "epoch": 5.528294135770771, "grad_norm": 0.11255302280187607, "learning_rate": 0.01, "loss": 1.99, "step": 53829 }, { "epoch": 5.528602238882613, "grad_norm": 0.06797608733177185, "learning_rate": 0.01, "loss": 1.9903, "step": 53832 }, { "epoch": 5.528910341994454, "grad_norm": 0.07606469094753265, "learning_rate": 0.01, "loss": 1.9926, "step": 53835 }, { "epoch": 5.529218445106295, "grad_norm": 0.04698120057582855, "learning_rate": 0.01, "loss": 1.9661, "step": 53838 }, { "epoch": 5.529526548218137, "grad_norm": 0.07087966799736023, "learning_rate": 0.01, "loss": 2.0004, "step": 53841 }, { "epoch": 5.5298346513299785, "grad_norm": 0.048692237585783005, "learning_rate": 0.01, "loss": 1.9858, "step": 53844 }, { "epoch": 5.53014275444182, "grad_norm": 0.04359065368771553, "learning_rate": 0.01, "loss": 1.9956, "step": 53847 }, { "epoch": 5.530450857553661, "grad_norm": 0.05424369126558304, "learning_rate": 0.01, "loss": 2.0108, "step": 53850 }, { "epoch": 5.530758960665503, "grad_norm": 0.13464900851249695, "learning_rate": 0.01, "loss": 1.9907, "step": 53853 }, { "epoch": 5.5310670637773445, "grad_norm": 0.038927361369132996, "learning_rate": 0.01, "loss": 1.9933, "step": 53856 }, { "epoch": 5.531375166889186, "grad_norm": 0.040312107652425766, "learning_rate": 0.01, "loss": 1.9988, "step": 53859 }, { "epoch": 5.531683270001027, "grad_norm": 0.05727381631731987, "learning_rate": 0.01, "loss": 1.9879, "step": 53862 }, { "epoch": 5.531991373112868, "grad_norm": 0.06750021874904633, "learning_rate": 0.01, "loss": 1.9842, "step": 53865 }, { "epoch": 5.5322994762247095, "grad_norm": 0.08220100402832031, "learning_rate": 0.01, "loss": 1.9826, "step": 53868 }, { "epoch": 5.532607579336552, "grad_norm": 0.05215999484062195, "learning_rate": 0.01, "loss": 2.026, "step": 53871 }, { "epoch": 5.532915682448393, "grad_norm": 0.06510480493307114, "learning_rate": 0.01, "loss": 2.0101, "step": 53874 }, { "epoch": 5.533223785560234, "grad_norm": 0.04641730710864067, "learning_rate": 0.01, "loss": 2.0139, "step": 53877 }, { "epoch": 5.5335318886720755, "grad_norm": 0.04414188116788864, "learning_rate": 0.01, "loss": 2.0314, "step": 53880 }, { "epoch": 5.533839991783917, "grad_norm": 0.041879378259181976, "learning_rate": 0.01, "loss": 1.9848, "step": 53883 }, { "epoch": 5.534148094895759, "grad_norm": 0.039413318037986755, "learning_rate": 0.01, "loss": 1.9972, "step": 53886 }, { "epoch": 5.5344561980076, "grad_norm": 0.0784526839852333, "learning_rate": 0.01, "loss": 2.0144, "step": 53889 }, { "epoch": 5.534764301119441, "grad_norm": 0.1049879789352417, "learning_rate": 0.01, "loss": 2.0088, "step": 53892 }, { "epoch": 5.535072404231283, "grad_norm": 0.15476803481578827, "learning_rate": 0.01, "loss": 2.017, "step": 53895 }, { "epoch": 5.535380507343124, "grad_norm": 0.1224508136510849, "learning_rate": 0.01, "loss": 1.9856, "step": 53898 }, { "epoch": 5.535688610454965, "grad_norm": 0.060283955186605453, "learning_rate": 0.01, "loss": 1.9801, "step": 53901 }, { "epoch": 5.535996713566807, "grad_norm": 0.05532943457365036, "learning_rate": 0.01, "loss": 1.9924, "step": 53904 }, { "epoch": 5.536304816678649, "grad_norm": 0.04745487496256828, "learning_rate": 0.01, "loss": 2.0121, "step": 53907 }, { "epoch": 5.53661291979049, "grad_norm": 0.05770549178123474, "learning_rate": 0.01, "loss": 1.9754, "step": 53910 }, { "epoch": 5.536921022902331, "grad_norm": 0.05675048008561134, "learning_rate": 0.01, "loss": 2.004, "step": 53913 }, { "epoch": 5.537229126014172, "grad_norm": 0.05538811534643173, "learning_rate": 0.01, "loss": 1.9991, "step": 53916 }, { "epoch": 5.5375372291260145, "grad_norm": 0.05474154278635979, "learning_rate": 0.01, "loss": 2.0032, "step": 53919 }, { "epoch": 5.537845332237856, "grad_norm": 0.1373576670885086, "learning_rate": 0.01, "loss": 1.9917, "step": 53922 }, { "epoch": 5.538153435349697, "grad_norm": 0.04938695207238197, "learning_rate": 0.01, "loss": 2.0097, "step": 53925 }, { "epoch": 5.538461538461538, "grad_norm": 0.08616656810045242, "learning_rate": 0.01, "loss": 1.9829, "step": 53928 }, { "epoch": 5.53876964157338, "grad_norm": 0.0794534832239151, "learning_rate": 0.01, "loss": 2.0125, "step": 53931 }, { "epoch": 5.539077744685221, "grad_norm": 0.07034434378147125, "learning_rate": 0.01, "loss": 2.0093, "step": 53934 }, { "epoch": 5.539385847797063, "grad_norm": 0.09252175688743591, "learning_rate": 0.01, "loss": 1.9782, "step": 53937 }, { "epoch": 5.539693950908904, "grad_norm": 0.05770866200327873, "learning_rate": 0.01, "loss": 2.0054, "step": 53940 }, { "epoch": 5.5400020540207455, "grad_norm": 0.06952711939811707, "learning_rate": 0.01, "loss": 2.0007, "step": 53943 }, { "epoch": 5.540310157132587, "grad_norm": 0.06332490593194962, "learning_rate": 0.01, "loss": 1.972, "step": 53946 }, { "epoch": 5.540618260244429, "grad_norm": 0.03297451511025429, "learning_rate": 0.01, "loss": 1.9877, "step": 53949 }, { "epoch": 5.54092636335627, "grad_norm": 0.03710363432765007, "learning_rate": 0.01, "loss": 1.9966, "step": 53952 }, { "epoch": 5.5412344664681115, "grad_norm": 0.05471167713403702, "learning_rate": 0.01, "loss": 1.9889, "step": 53955 }, { "epoch": 5.541542569579953, "grad_norm": 0.0861392468214035, "learning_rate": 0.01, "loss": 1.9909, "step": 53958 }, { "epoch": 5.541850672691794, "grad_norm": 0.08588635921478271, "learning_rate": 0.01, "loss": 2.003, "step": 53961 }, { "epoch": 5.542158775803635, "grad_norm": 0.09388336539268494, "learning_rate": 0.01, "loss": 1.9866, "step": 53964 }, { "epoch": 5.542466878915477, "grad_norm": 0.1397831290960312, "learning_rate": 0.01, "loss": 1.9989, "step": 53967 }, { "epoch": 5.542774982027319, "grad_norm": 0.10498721152544022, "learning_rate": 0.01, "loss": 2.0093, "step": 53970 }, { "epoch": 5.54308308513916, "grad_norm": 0.0574629008769989, "learning_rate": 0.01, "loss": 2.02, "step": 53973 }, { "epoch": 5.543391188251001, "grad_norm": 0.03409456089138985, "learning_rate": 0.01, "loss": 1.9942, "step": 53976 }, { "epoch": 5.5436992913628425, "grad_norm": 0.03830345720052719, "learning_rate": 0.01, "loss": 1.9648, "step": 53979 }, { "epoch": 5.544007394474685, "grad_norm": 0.0545862540602684, "learning_rate": 0.01, "loss": 1.9916, "step": 53982 }, { "epoch": 5.544315497586526, "grad_norm": 0.047009505331516266, "learning_rate": 0.01, "loss": 1.9757, "step": 53985 }, { "epoch": 5.544623600698367, "grad_norm": 0.0546221099793911, "learning_rate": 0.01, "loss": 1.9836, "step": 53988 }, { "epoch": 5.544931703810208, "grad_norm": 0.05153699591755867, "learning_rate": 0.01, "loss": 1.9832, "step": 53991 }, { "epoch": 5.54523980692205, "grad_norm": 0.1104668453335762, "learning_rate": 0.01, "loss": 1.9944, "step": 53994 }, { "epoch": 5.545547910033891, "grad_norm": 0.09211790561676025, "learning_rate": 0.01, "loss": 1.978, "step": 53997 }, { "epoch": 5.545856013145733, "grad_norm": 0.03746125102043152, "learning_rate": 0.01, "loss": 1.9822, "step": 54000 }, { "epoch": 5.546164116257574, "grad_norm": 0.05226179584860802, "learning_rate": 0.01, "loss": 1.981, "step": 54003 }, { "epoch": 5.546472219369416, "grad_norm": 0.08278732746839523, "learning_rate": 0.01, "loss": 1.9914, "step": 54006 }, { "epoch": 5.546780322481257, "grad_norm": 0.08003208786249161, "learning_rate": 0.01, "loss": 2.0169, "step": 54009 }, { "epoch": 5.547088425593098, "grad_norm": 0.09529578685760498, "learning_rate": 0.01, "loss": 1.98, "step": 54012 }, { "epoch": 5.54739652870494, "grad_norm": 0.08121462911367416, "learning_rate": 0.01, "loss": 2.0033, "step": 54015 }, { "epoch": 5.5477046318167815, "grad_norm": 0.07625698298215866, "learning_rate": 0.01, "loss": 1.9979, "step": 54018 }, { "epoch": 5.548012734928623, "grad_norm": 0.11102813482284546, "learning_rate": 0.01, "loss": 2.0028, "step": 54021 }, { "epoch": 5.548320838040464, "grad_norm": 0.05425316095352173, "learning_rate": 0.01, "loss": 1.9911, "step": 54024 }, { "epoch": 5.548628941152305, "grad_norm": 0.050913844257593155, "learning_rate": 0.01, "loss": 1.9919, "step": 54027 }, { "epoch": 5.548937044264147, "grad_norm": 0.049932949244976044, "learning_rate": 0.01, "loss": 1.985, "step": 54030 }, { "epoch": 5.549245147375989, "grad_norm": 0.058598097413778305, "learning_rate": 0.01, "loss": 1.9949, "step": 54033 }, { "epoch": 5.54955325048783, "grad_norm": 0.06767188012599945, "learning_rate": 0.01, "loss": 2.0074, "step": 54036 }, { "epoch": 5.549861353599671, "grad_norm": 0.0709863007068634, "learning_rate": 0.01, "loss": 1.97, "step": 54039 }, { "epoch": 5.5501694567115125, "grad_norm": 0.09273546189069748, "learning_rate": 0.01, "loss": 1.9545, "step": 54042 }, { "epoch": 5.550477559823355, "grad_norm": 0.04681755229830742, "learning_rate": 0.01, "loss": 1.982, "step": 54045 }, { "epoch": 5.550785662935196, "grad_norm": 0.08744305372238159, "learning_rate": 0.01, "loss": 1.9904, "step": 54048 }, { "epoch": 5.551093766047037, "grad_norm": 0.03497626259922981, "learning_rate": 0.01, "loss": 1.9794, "step": 54051 }, { "epoch": 5.5514018691588785, "grad_norm": 0.05988140404224396, "learning_rate": 0.01, "loss": 1.9883, "step": 54054 }, { "epoch": 5.55170997227072, "grad_norm": 0.0464041493833065, "learning_rate": 0.01, "loss": 1.9822, "step": 54057 }, { "epoch": 5.552018075382561, "grad_norm": 0.051982734352350235, "learning_rate": 0.01, "loss": 1.9965, "step": 54060 }, { "epoch": 5.552326178494403, "grad_norm": 0.045320626348257065, "learning_rate": 0.01, "loss": 1.9806, "step": 54063 }, { "epoch": 5.552634281606244, "grad_norm": 0.04230513423681259, "learning_rate": 0.01, "loss": 2.0064, "step": 54066 }, { "epoch": 5.552942384718086, "grad_norm": 0.0393347293138504, "learning_rate": 0.01, "loss": 2.0179, "step": 54069 }, { "epoch": 5.553250487829927, "grad_norm": 0.1013195738196373, "learning_rate": 0.01, "loss": 1.984, "step": 54072 }, { "epoch": 5.553558590941768, "grad_norm": 0.08008227497339249, "learning_rate": 0.01, "loss": 2.0113, "step": 54075 }, { "epoch": 5.55386669405361, "grad_norm": 0.0729672759771347, "learning_rate": 0.01, "loss": 1.9885, "step": 54078 }, { "epoch": 5.554174797165452, "grad_norm": 0.0624358095228672, "learning_rate": 0.01, "loss": 2.02, "step": 54081 }, { "epoch": 5.554482900277293, "grad_norm": 0.03352997824549675, "learning_rate": 0.01, "loss": 1.988, "step": 54084 }, { "epoch": 5.554791003389134, "grad_norm": 0.03241400793194771, "learning_rate": 0.01, "loss": 2.0163, "step": 54087 }, { "epoch": 5.555099106500975, "grad_norm": 0.033829499036073685, "learning_rate": 0.01, "loss": 2.0074, "step": 54090 }, { "epoch": 5.555407209612817, "grad_norm": 0.03398764133453369, "learning_rate": 0.01, "loss": 1.9916, "step": 54093 }, { "epoch": 5.555715312724659, "grad_norm": 0.04630080237984657, "learning_rate": 0.01, "loss": 1.9941, "step": 54096 }, { "epoch": 5.5560234158365, "grad_norm": 0.08986920863389969, "learning_rate": 0.01, "loss": 1.9908, "step": 54099 }, { "epoch": 5.556331518948341, "grad_norm": 0.07276762276887894, "learning_rate": 0.01, "loss": 1.985, "step": 54102 }, { "epoch": 5.556639622060183, "grad_norm": 0.07180680334568024, "learning_rate": 0.01, "loss": 1.9875, "step": 54105 }, { "epoch": 5.556947725172025, "grad_norm": 0.0470985509455204, "learning_rate": 0.01, "loss": 1.9998, "step": 54108 }, { "epoch": 5.557255828283866, "grad_norm": 0.07641714811325073, "learning_rate": 0.01, "loss": 1.9903, "step": 54111 }, { "epoch": 5.557563931395707, "grad_norm": 0.11065167188644409, "learning_rate": 0.01, "loss": 1.9954, "step": 54114 }, { "epoch": 5.5578720345075485, "grad_norm": 0.10439474880695343, "learning_rate": 0.01, "loss": 1.9979, "step": 54117 }, { "epoch": 5.55818013761939, "grad_norm": 0.044907502830028534, "learning_rate": 0.01, "loss": 2.0008, "step": 54120 }, { "epoch": 5.558488240731231, "grad_norm": 0.052572451531887054, "learning_rate": 0.01, "loss": 1.9945, "step": 54123 }, { "epoch": 5.558796343843073, "grad_norm": 0.03218472748994827, "learning_rate": 0.01, "loss": 1.995, "step": 54126 }, { "epoch": 5.5591044469549145, "grad_norm": 0.04737766459584236, "learning_rate": 0.01, "loss": 1.9572, "step": 54129 }, { "epoch": 5.559412550066756, "grad_norm": 0.045241083949804306, "learning_rate": 0.01, "loss": 1.9938, "step": 54132 }, { "epoch": 5.559720653178597, "grad_norm": 0.08022741228342056, "learning_rate": 0.01, "loss": 1.9641, "step": 54135 }, { "epoch": 5.560028756290438, "grad_norm": 0.1389225870370865, "learning_rate": 0.01, "loss": 1.9806, "step": 54138 }, { "epoch": 5.56033685940228, "grad_norm": 0.1731427162885666, "learning_rate": 0.01, "loss": 1.9961, "step": 54141 }, { "epoch": 5.560644962514122, "grad_norm": 0.13785654306411743, "learning_rate": 0.01, "loss": 1.9695, "step": 54144 }, { "epoch": 5.560953065625963, "grad_norm": 0.08374970406293869, "learning_rate": 0.01, "loss": 1.9702, "step": 54147 }, { "epoch": 5.561261168737804, "grad_norm": 0.038511909544467926, "learning_rate": 0.01, "loss": 1.9911, "step": 54150 }, { "epoch": 5.5615692718496454, "grad_norm": 0.04264768585562706, "learning_rate": 0.01, "loss": 1.9935, "step": 54153 }, { "epoch": 5.561877374961487, "grad_norm": 0.04369993880391121, "learning_rate": 0.01, "loss": 1.9771, "step": 54156 }, { "epoch": 5.562185478073329, "grad_norm": 0.04195026680827141, "learning_rate": 0.01, "loss": 2.0044, "step": 54159 }, { "epoch": 5.56249358118517, "grad_norm": 0.04474294185638428, "learning_rate": 0.01, "loss": 1.9984, "step": 54162 }, { "epoch": 5.562801684297011, "grad_norm": 0.049389854073524475, "learning_rate": 0.01, "loss": 2.027, "step": 54165 }, { "epoch": 5.563109787408853, "grad_norm": 0.06065073981881142, "learning_rate": 0.01, "loss": 2.0184, "step": 54168 }, { "epoch": 5.563417890520694, "grad_norm": 0.1028941348195076, "learning_rate": 0.01, "loss": 2.0015, "step": 54171 }, { "epoch": 5.563725993632536, "grad_norm": 0.05602087453007698, "learning_rate": 0.01, "loss": 2.0044, "step": 54174 }, { "epoch": 5.564034096744377, "grad_norm": 0.09422741830348969, "learning_rate": 0.01, "loss": 1.9656, "step": 54177 }, { "epoch": 5.564342199856219, "grad_norm": 0.08502856642007828, "learning_rate": 0.01, "loss": 2.0054, "step": 54180 }, { "epoch": 5.56465030296806, "grad_norm": 0.1169920489192009, "learning_rate": 0.01, "loss": 1.9849, "step": 54183 }, { "epoch": 5.564958406079901, "grad_norm": 0.08334633708000183, "learning_rate": 0.01, "loss": 2.0013, "step": 54186 }, { "epoch": 5.565266509191742, "grad_norm": 0.04524173587560654, "learning_rate": 0.01, "loss": 1.9991, "step": 54189 }, { "epoch": 5.5655746123035845, "grad_norm": 0.04702974855899811, "learning_rate": 0.01, "loss": 1.9957, "step": 54192 }, { "epoch": 5.565882715415426, "grad_norm": 0.052152637392282486, "learning_rate": 0.01, "loss": 1.9934, "step": 54195 }, { "epoch": 5.566190818527267, "grad_norm": 0.07976489514112473, "learning_rate": 0.01, "loss": 1.9818, "step": 54198 }, { "epoch": 5.566498921639108, "grad_norm": 0.10382959991693497, "learning_rate": 0.01, "loss": 2.0228, "step": 54201 }, { "epoch": 5.5668070247509505, "grad_norm": 0.062233816832304, "learning_rate": 0.01, "loss": 2.0144, "step": 54204 }, { "epoch": 5.567115127862792, "grad_norm": 0.0779416561126709, "learning_rate": 0.01, "loss": 2.0182, "step": 54207 }, { "epoch": 5.567423230974633, "grad_norm": 0.05779222771525383, "learning_rate": 0.01, "loss": 1.9977, "step": 54210 }, { "epoch": 5.567731334086474, "grad_norm": 0.035859446972608566, "learning_rate": 0.01, "loss": 1.983, "step": 54213 }, { "epoch": 5.5680394371983155, "grad_norm": 0.03528286889195442, "learning_rate": 0.01, "loss": 1.9969, "step": 54216 }, { "epoch": 5.568347540310157, "grad_norm": 0.03673080727458, "learning_rate": 0.01, "loss": 1.975, "step": 54219 }, { "epoch": 5.568655643421999, "grad_norm": 0.10049016028642654, "learning_rate": 0.01, "loss": 1.999, "step": 54222 }, { "epoch": 5.56896374653384, "grad_norm": 0.09414393454790115, "learning_rate": 0.01, "loss": 2.0009, "step": 54225 }, { "epoch": 5.5692718496456814, "grad_norm": 0.09574013203382492, "learning_rate": 0.01, "loss": 2.0118, "step": 54228 }, { "epoch": 5.569579952757523, "grad_norm": 0.06100035086274147, "learning_rate": 0.01, "loss": 2.0029, "step": 54231 }, { "epoch": 5.569888055869364, "grad_norm": 0.03801640495657921, "learning_rate": 0.01, "loss": 1.9899, "step": 54234 }, { "epoch": 5.570196158981206, "grad_norm": 0.04721992835402489, "learning_rate": 0.01, "loss": 1.9916, "step": 54237 }, { "epoch": 5.570504262093047, "grad_norm": 0.14547844231128693, "learning_rate": 0.01, "loss": 1.9996, "step": 54240 }, { "epoch": 5.570812365204889, "grad_norm": 0.0706636980175972, "learning_rate": 0.01, "loss": 1.9924, "step": 54243 }, { "epoch": 5.57112046831673, "grad_norm": 0.05795181170105934, "learning_rate": 0.01, "loss": 1.9894, "step": 54246 }, { "epoch": 5.571428571428571, "grad_norm": 0.04111074283719063, "learning_rate": 0.01, "loss": 1.9824, "step": 54249 }, { "epoch": 5.571736674540412, "grad_norm": 0.07275404036045074, "learning_rate": 0.01, "loss": 2.0116, "step": 54252 }, { "epoch": 5.572044777652255, "grad_norm": 0.06297045946121216, "learning_rate": 0.01, "loss": 1.9876, "step": 54255 }, { "epoch": 5.572352880764096, "grad_norm": 0.03748214617371559, "learning_rate": 0.01, "loss": 1.9963, "step": 54258 }, { "epoch": 5.572660983875937, "grad_norm": 0.0806899219751358, "learning_rate": 0.01, "loss": 1.9804, "step": 54261 }, { "epoch": 5.572969086987778, "grad_norm": 0.07261806726455688, "learning_rate": 0.01, "loss": 1.9871, "step": 54264 }, { "epoch": 5.57327719009962, "grad_norm": 0.09828619658946991, "learning_rate": 0.01, "loss": 2.0007, "step": 54267 }, { "epoch": 5.573585293211462, "grad_norm": 0.057755280286073685, "learning_rate": 0.01, "loss": 2.0058, "step": 54270 }, { "epoch": 5.573893396323303, "grad_norm": 0.053999438881874084, "learning_rate": 0.01, "loss": 1.992, "step": 54273 }, { "epoch": 5.574201499435144, "grad_norm": 0.058348219841718674, "learning_rate": 0.01, "loss": 1.9964, "step": 54276 }, { "epoch": 5.574509602546986, "grad_norm": 0.0748370811343193, "learning_rate": 0.01, "loss": 1.9881, "step": 54279 }, { "epoch": 5.574817705658827, "grad_norm": 0.12268301099538803, "learning_rate": 0.01, "loss": 1.9805, "step": 54282 }, { "epoch": 5.575125808770668, "grad_norm": 0.04664819315075874, "learning_rate": 0.01, "loss": 2.0232, "step": 54285 }, { "epoch": 5.57543391188251, "grad_norm": 0.061234939843416214, "learning_rate": 0.01, "loss": 1.9799, "step": 54288 }, { "epoch": 5.5757420149943515, "grad_norm": 0.034011028707027435, "learning_rate": 0.01, "loss": 2.0084, "step": 54291 }, { "epoch": 5.576050118106193, "grad_norm": 0.03644438087940216, "learning_rate": 0.01, "loss": 1.9863, "step": 54294 }, { "epoch": 5.576358221218034, "grad_norm": 0.03703255578875542, "learning_rate": 0.01, "loss": 2.01, "step": 54297 }, { "epoch": 5.576666324329876, "grad_norm": 0.06192943826317787, "learning_rate": 0.01, "loss": 1.9909, "step": 54300 }, { "epoch": 5.5769744274417175, "grad_norm": 0.07393354922533035, "learning_rate": 0.01, "loss": 2.0098, "step": 54303 }, { "epoch": 5.577282530553559, "grad_norm": 0.10186754167079926, "learning_rate": 0.01, "loss": 2.0114, "step": 54306 }, { "epoch": 5.5775906336654, "grad_norm": 0.05970088392496109, "learning_rate": 0.01, "loss": 1.9897, "step": 54309 }, { "epoch": 5.577898736777241, "grad_norm": 0.07863925397396088, "learning_rate": 0.01, "loss": 2.0041, "step": 54312 }, { "epoch": 5.5782068398890825, "grad_norm": 0.08788790553808212, "learning_rate": 0.01, "loss": 1.9686, "step": 54315 }, { "epoch": 5.578514943000925, "grad_norm": 0.04963497817516327, "learning_rate": 0.01, "loss": 2.0, "step": 54318 }, { "epoch": 5.578823046112766, "grad_norm": 0.06898415833711624, "learning_rate": 0.01, "loss": 1.9785, "step": 54321 }, { "epoch": 5.579131149224607, "grad_norm": 0.07146736234426498, "learning_rate": 0.01, "loss": 1.9857, "step": 54324 }, { "epoch": 5.579439252336448, "grad_norm": 0.08930405974388123, "learning_rate": 0.01, "loss": 1.9994, "step": 54327 }, { "epoch": 5.57974735544829, "grad_norm": 0.11615147441625595, "learning_rate": 0.01, "loss": 1.9866, "step": 54330 }, { "epoch": 5.580055458560132, "grad_norm": 0.12027255445718765, "learning_rate": 0.01, "loss": 1.9856, "step": 54333 }, { "epoch": 5.580363561671973, "grad_norm": 0.09193901717662811, "learning_rate": 0.01, "loss": 1.9878, "step": 54336 }, { "epoch": 5.580671664783814, "grad_norm": 0.0674210712313652, "learning_rate": 0.01, "loss": 2.0032, "step": 54339 }, { "epoch": 5.580979767895656, "grad_norm": 0.05408472940325737, "learning_rate": 0.01, "loss": 1.9864, "step": 54342 }, { "epoch": 5.581287871007497, "grad_norm": 0.04289040341973305, "learning_rate": 0.01, "loss": 1.9922, "step": 54345 }, { "epoch": 5.581595974119338, "grad_norm": 0.04341943934559822, "learning_rate": 0.01, "loss": 1.9803, "step": 54348 }, { "epoch": 5.58190407723118, "grad_norm": 0.03630705177783966, "learning_rate": 0.01, "loss": 2.0038, "step": 54351 }, { "epoch": 5.582212180343022, "grad_norm": 0.10383928567171097, "learning_rate": 0.01, "loss": 1.9807, "step": 54354 }, { "epoch": 5.582520283454863, "grad_norm": 0.042623065412044525, "learning_rate": 0.01, "loss": 2.0048, "step": 54357 }, { "epoch": 5.582828386566704, "grad_norm": 0.07011984288692474, "learning_rate": 0.01, "loss": 2.0105, "step": 54360 }, { "epoch": 5.583136489678545, "grad_norm": 0.09444596618413925, "learning_rate": 0.01, "loss": 1.989, "step": 54363 }, { "epoch": 5.5834445927903875, "grad_norm": 0.0737951472401619, "learning_rate": 0.01, "loss": 1.9701, "step": 54366 }, { "epoch": 5.583752695902229, "grad_norm": 0.06040029227733612, "learning_rate": 0.01, "loss": 1.9901, "step": 54369 }, { "epoch": 5.58406079901407, "grad_norm": 0.059084903448820114, "learning_rate": 0.01, "loss": 1.9991, "step": 54372 }, { "epoch": 5.584368902125911, "grad_norm": 0.043141599744558334, "learning_rate": 0.01, "loss": 1.9886, "step": 54375 }, { "epoch": 5.584677005237753, "grad_norm": 0.07285004109144211, "learning_rate": 0.01, "loss": 1.9833, "step": 54378 }, { "epoch": 5.584985108349595, "grad_norm": 0.05245014652609825, "learning_rate": 0.01, "loss": 1.9755, "step": 54381 }, { "epoch": 5.585293211461436, "grad_norm": 0.06724348664283752, "learning_rate": 0.01, "loss": 1.984, "step": 54384 }, { "epoch": 5.585601314573277, "grad_norm": 0.0695512667298317, "learning_rate": 0.01, "loss": 1.9735, "step": 54387 }, { "epoch": 5.5859094176851185, "grad_norm": 0.08221160620450974, "learning_rate": 0.01, "loss": 2.0008, "step": 54390 }, { "epoch": 5.58621752079696, "grad_norm": 0.04641614854335785, "learning_rate": 0.01, "loss": 1.9788, "step": 54393 }, { "epoch": 5.586525623908802, "grad_norm": 0.04592394083738327, "learning_rate": 0.01, "loss": 2.0142, "step": 54396 }, { "epoch": 5.586833727020643, "grad_norm": 0.04568556696176529, "learning_rate": 0.01, "loss": 2.0158, "step": 54399 }, { "epoch": 5.587141830132484, "grad_norm": 0.05011072754859924, "learning_rate": 0.01, "loss": 1.9939, "step": 54402 }, { "epoch": 5.587449933244326, "grad_norm": 0.05919931083917618, "learning_rate": 0.01, "loss": 1.9815, "step": 54405 }, { "epoch": 5.587758036356167, "grad_norm": 0.1045934408903122, "learning_rate": 0.01, "loss": 1.9926, "step": 54408 }, { "epoch": 5.588066139468008, "grad_norm": 0.06998443603515625, "learning_rate": 0.01, "loss": 1.9896, "step": 54411 }, { "epoch": 5.58837424257985, "grad_norm": 0.07034524530172348, "learning_rate": 0.01, "loss": 1.9891, "step": 54414 }, { "epoch": 5.588682345691692, "grad_norm": 0.10572990775108337, "learning_rate": 0.01, "loss": 1.9869, "step": 54417 }, { "epoch": 5.588990448803533, "grad_norm": 0.04444350302219391, "learning_rate": 0.01, "loss": 1.9848, "step": 54420 }, { "epoch": 5.589298551915374, "grad_norm": 0.06670062988996506, "learning_rate": 0.01, "loss": 1.998, "step": 54423 }, { "epoch": 5.589606655027215, "grad_norm": 0.08078444004058838, "learning_rate": 0.01, "loss": 1.991, "step": 54426 }, { "epoch": 5.589914758139058, "grad_norm": 0.06824928522109985, "learning_rate": 0.01, "loss": 1.9973, "step": 54429 }, { "epoch": 5.590222861250899, "grad_norm": 0.04053177684545517, "learning_rate": 0.01, "loss": 2.0025, "step": 54432 }, { "epoch": 5.59053096436274, "grad_norm": 0.07821489870548248, "learning_rate": 0.01, "loss": 1.9914, "step": 54435 }, { "epoch": 5.590839067474581, "grad_norm": 0.09564534574747086, "learning_rate": 0.01, "loss": 2.0017, "step": 54438 }, { "epoch": 5.591147170586423, "grad_norm": 0.06303589791059494, "learning_rate": 0.01, "loss": 1.984, "step": 54441 }, { "epoch": 5.591455273698264, "grad_norm": 0.08767011016607285, "learning_rate": 0.01, "loss": 1.986, "step": 54444 }, { "epoch": 5.591763376810106, "grad_norm": 0.06719005852937698, "learning_rate": 0.01, "loss": 1.9885, "step": 54447 }, { "epoch": 5.592071479921947, "grad_norm": 0.0828474760055542, "learning_rate": 0.01, "loss": 2.0041, "step": 54450 }, { "epoch": 5.592379583033789, "grad_norm": 0.07154206931591034, "learning_rate": 0.01, "loss": 1.9997, "step": 54453 }, { "epoch": 5.59268768614563, "grad_norm": 0.0818270891904831, "learning_rate": 0.01, "loss": 1.9933, "step": 54456 }, { "epoch": 5.592995789257472, "grad_norm": 0.046949781477451324, "learning_rate": 0.01, "loss": 2.0029, "step": 54459 }, { "epoch": 5.593303892369313, "grad_norm": 0.17068155109882355, "learning_rate": 0.01, "loss": 1.9964, "step": 54462 }, { "epoch": 5.5936119954811545, "grad_norm": 0.14092612266540527, "learning_rate": 0.01, "loss": 2.0142, "step": 54465 }, { "epoch": 5.593920098592996, "grad_norm": 0.12458490580320358, "learning_rate": 0.01, "loss": 1.9845, "step": 54468 }, { "epoch": 5.594228201704837, "grad_norm": 0.07861830294132233, "learning_rate": 0.01, "loss": 1.9777, "step": 54471 }, { "epoch": 5.594536304816678, "grad_norm": 0.0819336324930191, "learning_rate": 0.01, "loss": 1.9998, "step": 54474 }, { "epoch": 5.5948444079285204, "grad_norm": 0.04734665900468826, "learning_rate": 0.01, "loss": 1.999, "step": 54477 }, { "epoch": 5.595152511040362, "grad_norm": 0.04812568798661232, "learning_rate": 0.01, "loss": 2.0041, "step": 54480 }, { "epoch": 5.595460614152203, "grad_norm": 0.03981874883174896, "learning_rate": 0.01, "loss": 1.9971, "step": 54483 }, { "epoch": 5.595768717264044, "grad_norm": 0.034884098917245865, "learning_rate": 0.01, "loss": 1.9842, "step": 54486 }, { "epoch": 5.5960768203758855, "grad_norm": 0.04170473292469978, "learning_rate": 0.01, "loss": 1.98, "step": 54489 }, { "epoch": 5.596384923487728, "grad_norm": 0.06037883087992668, "learning_rate": 0.01, "loss": 1.9795, "step": 54492 }, { "epoch": 5.596693026599569, "grad_norm": 0.06271529197692871, "learning_rate": 0.01, "loss": 1.976, "step": 54495 }, { "epoch": 5.59700112971141, "grad_norm": 0.0646272599697113, "learning_rate": 0.01, "loss": 1.9708, "step": 54498 }, { "epoch": 5.597309232823251, "grad_norm": 0.05499435216188431, "learning_rate": 0.01, "loss": 2.0049, "step": 54501 }, { "epoch": 5.597617335935093, "grad_norm": 0.06327464431524277, "learning_rate": 0.01, "loss": 2.0053, "step": 54504 }, { "epoch": 5.597925439046934, "grad_norm": 0.07843179255723953, "learning_rate": 0.01, "loss": 2.0019, "step": 54507 }, { "epoch": 5.598233542158776, "grad_norm": 0.13546060025691986, "learning_rate": 0.01, "loss": 1.9699, "step": 54510 }, { "epoch": 5.598541645270617, "grad_norm": 0.05903356522321701, "learning_rate": 0.01, "loss": 1.9957, "step": 54513 }, { "epoch": 5.598849748382459, "grad_norm": 0.10746420919895172, "learning_rate": 0.01, "loss": 1.9966, "step": 54516 }, { "epoch": 5.5991578514943, "grad_norm": 0.07332249730825424, "learning_rate": 0.01, "loss": 2.004, "step": 54519 }, { "epoch": 5.599465954606141, "grad_norm": 0.05419987067580223, "learning_rate": 0.01, "loss": 1.9958, "step": 54522 }, { "epoch": 5.599774057717983, "grad_norm": 0.06261706352233887, "learning_rate": 0.01, "loss": 1.9917, "step": 54525 }, { "epoch": 5.600082160829825, "grad_norm": 0.06889716535806656, "learning_rate": 0.01, "loss": 1.9771, "step": 54528 }, { "epoch": 5.600390263941666, "grad_norm": 0.07229785621166229, "learning_rate": 0.01, "loss": 1.9874, "step": 54531 }, { "epoch": 5.600698367053507, "grad_norm": 0.07027054578065872, "learning_rate": 0.01, "loss": 1.983, "step": 54534 }, { "epoch": 5.601006470165348, "grad_norm": 0.0769757479429245, "learning_rate": 0.01, "loss": 1.9774, "step": 54537 }, { "epoch": 5.60131457327719, "grad_norm": 0.07835566997528076, "learning_rate": 0.01, "loss": 2.0014, "step": 54540 }, { "epoch": 5.601622676389032, "grad_norm": 0.059996653348207474, "learning_rate": 0.01, "loss": 2.0125, "step": 54543 }, { "epoch": 5.601930779500873, "grad_norm": 0.0982513576745987, "learning_rate": 0.01, "loss": 1.9933, "step": 54546 }, { "epoch": 5.602238882612714, "grad_norm": 0.08362521976232529, "learning_rate": 0.01, "loss": 1.9973, "step": 54549 }, { "epoch": 5.602546985724556, "grad_norm": 0.0593801811337471, "learning_rate": 0.01, "loss": 2.0037, "step": 54552 }, { "epoch": 5.602855088836398, "grad_norm": 0.09806974232196808, "learning_rate": 0.01, "loss": 1.9792, "step": 54555 }, { "epoch": 5.603163191948239, "grad_norm": 0.041621893644332886, "learning_rate": 0.01, "loss": 2.0033, "step": 54558 }, { "epoch": 5.60347129506008, "grad_norm": 0.04981600120663643, "learning_rate": 0.01, "loss": 1.9978, "step": 54561 }, { "epoch": 5.6037793981719215, "grad_norm": 0.08302900940179825, "learning_rate": 0.01, "loss": 2.0004, "step": 54564 }, { "epoch": 5.604087501283763, "grad_norm": 0.03515220060944557, "learning_rate": 0.01, "loss": 1.991, "step": 54567 }, { "epoch": 5.604395604395604, "grad_norm": 0.0783744752407074, "learning_rate": 0.01, "loss": 2.0215, "step": 54570 }, { "epoch": 5.604703707507446, "grad_norm": 0.03960327431559563, "learning_rate": 0.01, "loss": 2.0049, "step": 54573 }, { "epoch": 5.605011810619287, "grad_norm": 0.04865705221891403, "learning_rate": 0.01, "loss": 1.9862, "step": 54576 }, { "epoch": 5.605319913731129, "grad_norm": 0.03959539160132408, "learning_rate": 0.01, "loss": 1.9959, "step": 54579 }, { "epoch": 5.60562801684297, "grad_norm": 0.08320534974336624, "learning_rate": 0.01, "loss": 1.9979, "step": 54582 }, { "epoch": 5.605936119954811, "grad_norm": 0.038179684430360794, "learning_rate": 0.01, "loss": 1.9987, "step": 54585 }, { "epoch": 5.606244223066653, "grad_norm": 0.04269682243466377, "learning_rate": 0.01, "loss": 1.9976, "step": 54588 }, { "epoch": 5.606552326178495, "grad_norm": 0.08585328608751297, "learning_rate": 0.01, "loss": 1.9669, "step": 54591 }, { "epoch": 5.606860429290336, "grad_norm": 0.04968829080462456, "learning_rate": 0.01, "loss": 1.9976, "step": 54594 }, { "epoch": 5.607168532402177, "grad_norm": 0.04485391452908516, "learning_rate": 0.01, "loss": 1.9963, "step": 54597 }, { "epoch": 5.607476635514018, "grad_norm": 0.03415564075112343, "learning_rate": 0.01, "loss": 1.9878, "step": 54600 }, { "epoch": 5.60778473862586, "grad_norm": 0.10459756851196289, "learning_rate": 0.01, "loss": 1.9868, "step": 54603 }, { "epoch": 5.608092841737702, "grad_norm": 0.04644683003425598, "learning_rate": 0.01, "loss": 1.995, "step": 54606 }, { "epoch": 5.608400944849543, "grad_norm": 0.09354628622531891, "learning_rate": 0.01, "loss": 1.9941, "step": 54609 }, { "epoch": 5.608709047961384, "grad_norm": 0.07625284790992737, "learning_rate": 0.01, "loss": 2.017, "step": 54612 }, { "epoch": 5.609017151073226, "grad_norm": 0.07845180481672287, "learning_rate": 0.01, "loss": 1.99, "step": 54615 }, { "epoch": 5.609325254185067, "grad_norm": 0.060051124542951584, "learning_rate": 0.01, "loss": 2.001, "step": 54618 }, { "epoch": 5.609633357296909, "grad_norm": 0.07250028848648071, "learning_rate": 0.01, "loss": 1.9713, "step": 54621 }, { "epoch": 5.60994146040875, "grad_norm": 0.10221341997385025, "learning_rate": 0.01, "loss": 1.9864, "step": 54624 }, { "epoch": 5.610249563520592, "grad_norm": 0.03648064285516739, "learning_rate": 0.01, "loss": 1.9845, "step": 54627 }, { "epoch": 5.610557666632433, "grad_norm": 0.04965173453092575, "learning_rate": 0.01, "loss": 1.9911, "step": 54630 }, { "epoch": 5.610865769744274, "grad_norm": 0.04213662073016167, "learning_rate": 0.01, "loss": 1.9916, "step": 54633 }, { "epoch": 5.611173872856115, "grad_norm": 0.11422538757324219, "learning_rate": 0.01, "loss": 1.9985, "step": 54636 }, { "epoch": 5.6114819759679575, "grad_norm": 0.054801687598228455, "learning_rate": 0.01, "loss": 1.9578, "step": 54639 }, { "epoch": 5.611790079079799, "grad_norm": 0.06683303415775299, "learning_rate": 0.01, "loss": 1.9818, "step": 54642 }, { "epoch": 5.61209818219164, "grad_norm": 0.04719147831201553, "learning_rate": 0.01, "loss": 2.0048, "step": 54645 }, { "epoch": 5.612406285303481, "grad_norm": 0.040852706879377365, "learning_rate": 0.01, "loss": 1.9856, "step": 54648 }, { "epoch": 5.612714388415323, "grad_norm": 0.038343146443367004, "learning_rate": 0.01, "loss": 1.9995, "step": 54651 }, { "epoch": 5.613022491527165, "grad_norm": 0.03426671028137207, "learning_rate": 0.01, "loss": 2.0135, "step": 54654 }, { "epoch": 5.613330594639006, "grad_norm": 0.08030149340629578, "learning_rate": 0.01, "loss": 1.988, "step": 54657 }, { "epoch": 5.613638697750847, "grad_norm": 0.04974358528852463, "learning_rate": 0.01, "loss": 1.9767, "step": 54660 }, { "epoch": 5.6139468008626885, "grad_norm": 0.04733881726861, "learning_rate": 0.01, "loss": 1.9959, "step": 54663 }, { "epoch": 5.61425490397453, "grad_norm": 0.04850970208644867, "learning_rate": 0.01, "loss": 2.0017, "step": 54666 }, { "epoch": 5.614563007086372, "grad_norm": 0.08880652487277985, "learning_rate": 0.01, "loss": 1.984, "step": 54669 }, { "epoch": 5.614871110198213, "grad_norm": 0.07070966809988022, "learning_rate": 0.01, "loss": 2.0034, "step": 54672 }, { "epoch": 5.615179213310054, "grad_norm": 0.043020039796829224, "learning_rate": 0.01, "loss": 2.0113, "step": 54675 }, { "epoch": 5.615487316421896, "grad_norm": 0.06512738764286041, "learning_rate": 0.01, "loss": 2.0029, "step": 54678 }, { "epoch": 5.615795419533737, "grad_norm": 0.09249335527420044, "learning_rate": 0.01, "loss": 1.988, "step": 54681 }, { "epoch": 5.616103522645579, "grad_norm": 0.12002009153366089, "learning_rate": 0.01, "loss": 2.0126, "step": 54684 }, { "epoch": 5.61641162575742, "grad_norm": 0.046711284667253494, "learning_rate": 0.01, "loss": 1.9996, "step": 54687 }, { "epoch": 5.616719728869262, "grad_norm": 0.0453648641705513, "learning_rate": 0.01, "loss": 1.9813, "step": 54690 }, { "epoch": 5.617027831981103, "grad_norm": 0.06286431849002838, "learning_rate": 0.01, "loss": 1.9911, "step": 54693 }, { "epoch": 5.617335935092944, "grad_norm": 0.10770335048437119, "learning_rate": 0.01, "loss": 1.9801, "step": 54696 }, { "epoch": 5.617644038204785, "grad_norm": 0.038174375891685486, "learning_rate": 0.01, "loss": 1.9972, "step": 54699 }, { "epoch": 5.617952141316628, "grad_norm": 0.0749521404504776, "learning_rate": 0.01, "loss": 1.9962, "step": 54702 }, { "epoch": 5.618260244428469, "grad_norm": 0.08033082634210587, "learning_rate": 0.01, "loss": 2.0188, "step": 54705 }, { "epoch": 5.61856834754031, "grad_norm": 0.06879189610481262, "learning_rate": 0.01, "loss": 2.0035, "step": 54708 }, { "epoch": 5.618876450652151, "grad_norm": 0.06201532483100891, "learning_rate": 0.01, "loss": 1.9886, "step": 54711 }, { "epoch": 5.6191845537639935, "grad_norm": 0.0482785739004612, "learning_rate": 0.01, "loss": 1.9988, "step": 54714 }, { "epoch": 5.619492656875835, "grad_norm": 0.08434705436229706, "learning_rate": 0.01, "loss": 1.9682, "step": 54717 }, { "epoch": 5.619800759987676, "grad_norm": 0.06971675902605057, "learning_rate": 0.01, "loss": 1.9853, "step": 54720 }, { "epoch": 5.620108863099517, "grad_norm": 0.05243263766169548, "learning_rate": 0.01, "loss": 2.0031, "step": 54723 }, { "epoch": 5.620416966211359, "grad_norm": 0.05071935057640076, "learning_rate": 0.01, "loss": 1.9857, "step": 54726 }, { "epoch": 5.6207250693232, "grad_norm": 0.07799027860164642, "learning_rate": 0.01, "loss": 2.0045, "step": 54729 }, { "epoch": 5.621033172435042, "grad_norm": 0.05585414171218872, "learning_rate": 0.01, "loss": 1.9517, "step": 54732 }, { "epoch": 5.621341275546883, "grad_norm": 0.04890783876180649, "learning_rate": 0.01, "loss": 2.0014, "step": 54735 }, { "epoch": 5.6216493786587245, "grad_norm": 0.035397402942180634, "learning_rate": 0.01, "loss": 1.9882, "step": 54738 }, { "epoch": 5.621957481770566, "grad_norm": 0.04959969222545624, "learning_rate": 0.01, "loss": 1.9935, "step": 54741 }, { "epoch": 5.622265584882407, "grad_norm": 0.1236480176448822, "learning_rate": 0.01, "loss": 1.9914, "step": 54744 }, { "epoch": 5.622573687994249, "grad_norm": 0.05167923495173454, "learning_rate": 0.01, "loss": 1.9898, "step": 54747 }, { "epoch": 5.62288179110609, "grad_norm": 0.08985866606235504, "learning_rate": 0.01, "loss": 1.9832, "step": 54750 }, { "epoch": 5.623189894217932, "grad_norm": 0.051048051565885544, "learning_rate": 0.01, "loss": 1.9796, "step": 54753 }, { "epoch": 5.623497997329773, "grad_norm": 0.034315209835767746, "learning_rate": 0.01, "loss": 2.0143, "step": 54756 }, { "epoch": 5.623806100441614, "grad_norm": 0.034064918756484985, "learning_rate": 0.01, "loss": 1.9891, "step": 54759 }, { "epoch": 5.6241142035534555, "grad_norm": 0.04847462847828865, "learning_rate": 0.01, "loss": 1.9785, "step": 54762 }, { "epoch": 5.624422306665298, "grad_norm": 0.036340802907943726, "learning_rate": 0.01, "loss": 1.9776, "step": 54765 }, { "epoch": 5.624730409777139, "grad_norm": 0.07472583651542664, "learning_rate": 0.01, "loss": 2.0025, "step": 54768 }, { "epoch": 5.62503851288898, "grad_norm": 0.05728251487016678, "learning_rate": 0.01, "loss": 1.9907, "step": 54771 }, { "epoch": 5.625346616000821, "grad_norm": 0.08100686967372894, "learning_rate": 0.01, "loss": 1.9901, "step": 54774 }, { "epoch": 5.625654719112663, "grad_norm": 0.03416838496923447, "learning_rate": 0.01, "loss": 1.9714, "step": 54777 }, { "epoch": 5.625962822224505, "grad_norm": 0.08783119171857834, "learning_rate": 0.01, "loss": 2.0181, "step": 54780 }, { "epoch": 5.626270925336346, "grad_norm": 0.06878135353326797, "learning_rate": 0.01, "loss": 1.9804, "step": 54783 }, { "epoch": 5.626579028448187, "grad_norm": 0.08717949688434601, "learning_rate": 0.01, "loss": 1.9825, "step": 54786 }, { "epoch": 5.626887131560029, "grad_norm": 0.06441211700439453, "learning_rate": 0.01, "loss": 1.9827, "step": 54789 }, { "epoch": 5.62719523467187, "grad_norm": 0.0610957108438015, "learning_rate": 0.01, "loss": 2.0121, "step": 54792 }, { "epoch": 5.627503337783711, "grad_norm": 0.04509379714727402, "learning_rate": 0.01, "loss": 1.9932, "step": 54795 }, { "epoch": 5.627811440895553, "grad_norm": 0.05279795825481415, "learning_rate": 0.01, "loss": 1.998, "step": 54798 }, { "epoch": 5.628119544007395, "grad_norm": 0.03723832964897156, "learning_rate": 0.01, "loss": 1.9857, "step": 54801 }, { "epoch": 5.628427647119236, "grad_norm": 0.10692798346281052, "learning_rate": 0.01, "loss": 1.9708, "step": 54804 }, { "epoch": 5.628735750231077, "grad_norm": 0.09552669525146484, "learning_rate": 0.01, "loss": 1.9862, "step": 54807 }, { "epoch": 5.629043853342919, "grad_norm": 0.11247537285089493, "learning_rate": 0.01, "loss": 1.9629, "step": 54810 }, { "epoch": 5.6293519564547605, "grad_norm": 0.08334960043430328, "learning_rate": 0.01, "loss": 1.9591, "step": 54813 }, { "epoch": 5.629660059566602, "grad_norm": 0.03950768709182739, "learning_rate": 0.01, "loss": 1.993, "step": 54816 }, { "epoch": 5.629968162678443, "grad_norm": 0.032677773386240005, "learning_rate": 0.01, "loss": 1.9838, "step": 54819 }, { "epoch": 5.630276265790284, "grad_norm": 0.0373060442507267, "learning_rate": 0.01, "loss": 1.9888, "step": 54822 }, { "epoch": 5.6305843689021255, "grad_norm": 0.07568337768316269, "learning_rate": 0.01, "loss": 1.9853, "step": 54825 }, { "epoch": 5.630892472013968, "grad_norm": 0.08441541343927383, "learning_rate": 0.01, "loss": 2.017, "step": 54828 }, { "epoch": 5.631200575125809, "grad_norm": 0.06486140936613083, "learning_rate": 0.01, "loss": 1.9929, "step": 54831 }, { "epoch": 5.63150867823765, "grad_norm": 0.06372927129268646, "learning_rate": 0.01, "loss": 1.9953, "step": 54834 }, { "epoch": 5.6318167813494915, "grad_norm": 0.06529858708381653, "learning_rate": 0.01, "loss": 2.0085, "step": 54837 }, { "epoch": 5.632124884461333, "grad_norm": 0.08682098984718323, "learning_rate": 0.01, "loss": 1.9814, "step": 54840 }, { "epoch": 5.632432987573175, "grad_norm": 0.05896419286727905, "learning_rate": 0.01, "loss": 2.0086, "step": 54843 }, { "epoch": 5.632741090685016, "grad_norm": 0.06567796319723129, "learning_rate": 0.01, "loss": 1.9803, "step": 54846 }, { "epoch": 5.633049193796857, "grad_norm": 0.06519652158021927, "learning_rate": 0.01, "loss": 2.0124, "step": 54849 }, { "epoch": 5.633357296908699, "grad_norm": 0.09215736389160156, "learning_rate": 0.01, "loss": 1.9772, "step": 54852 }, { "epoch": 5.63366540002054, "grad_norm": 0.08821207284927368, "learning_rate": 0.01, "loss": 1.9821, "step": 54855 }, { "epoch": 5.633973503132381, "grad_norm": 0.06266074627637863, "learning_rate": 0.01, "loss": 2.0139, "step": 54858 }, { "epoch": 5.634281606244223, "grad_norm": 0.11765746027231216, "learning_rate": 0.01, "loss": 2.0164, "step": 54861 }, { "epoch": 5.634589709356065, "grad_norm": 0.07203897088766098, "learning_rate": 0.01, "loss": 1.9625, "step": 54864 }, { "epoch": 5.634897812467906, "grad_norm": 0.05088105797767639, "learning_rate": 0.01, "loss": 1.9678, "step": 54867 }, { "epoch": 5.635205915579747, "grad_norm": 0.04583646357059479, "learning_rate": 0.01, "loss": 1.9923, "step": 54870 }, { "epoch": 5.635514018691588, "grad_norm": 0.03866413235664368, "learning_rate": 0.01, "loss": 1.989, "step": 54873 }, { "epoch": 5.635822121803431, "grad_norm": 0.02919906936585903, "learning_rate": 0.01, "loss": 1.9957, "step": 54876 }, { "epoch": 5.636130224915272, "grad_norm": 0.04621945321559906, "learning_rate": 0.01, "loss": 1.9813, "step": 54879 }, { "epoch": 5.636438328027113, "grad_norm": 0.131577268242836, "learning_rate": 0.01, "loss": 2.0007, "step": 54882 }, { "epoch": 5.636746431138954, "grad_norm": 0.13305455446243286, "learning_rate": 0.01, "loss": 1.9824, "step": 54885 }, { "epoch": 5.637054534250796, "grad_norm": 0.046413157135248184, "learning_rate": 0.01, "loss": 1.9809, "step": 54888 }, { "epoch": 5.637362637362637, "grad_norm": 0.0828058049082756, "learning_rate": 0.01, "loss": 1.9907, "step": 54891 }, { "epoch": 5.637670740474479, "grad_norm": 0.09685652703046799, "learning_rate": 0.01, "loss": 2.0036, "step": 54894 }, { "epoch": 5.63797884358632, "grad_norm": 0.055401384830474854, "learning_rate": 0.01, "loss": 1.9648, "step": 54897 }, { "epoch": 5.6382869466981616, "grad_norm": 0.04287857934832573, "learning_rate": 0.01, "loss": 1.9692, "step": 54900 }, { "epoch": 5.638595049810003, "grad_norm": 0.09044301509857178, "learning_rate": 0.01, "loss": 1.9839, "step": 54903 }, { "epoch": 5.638903152921845, "grad_norm": 0.07684798538684845, "learning_rate": 0.01, "loss": 1.973, "step": 54906 }, { "epoch": 5.639211256033686, "grad_norm": 0.05879480764269829, "learning_rate": 0.01, "loss": 1.9706, "step": 54909 }, { "epoch": 5.6395193591455275, "grad_norm": 0.04824329912662506, "learning_rate": 0.01, "loss": 1.9882, "step": 54912 }, { "epoch": 5.639827462257369, "grad_norm": 0.0385722778737545, "learning_rate": 0.01, "loss": 2.0095, "step": 54915 }, { "epoch": 5.64013556536921, "grad_norm": 0.10394332557916641, "learning_rate": 0.01, "loss": 2.0104, "step": 54918 }, { "epoch": 5.640443668481051, "grad_norm": 0.08453499525785446, "learning_rate": 0.01, "loss": 2.0022, "step": 54921 }, { "epoch": 5.640751771592893, "grad_norm": 0.037400417029857635, "learning_rate": 0.01, "loss": 1.9885, "step": 54924 }, { "epoch": 5.641059874704735, "grad_norm": 0.04908459633588791, "learning_rate": 0.01, "loss": 2.0018, "step": 54927 }, { "epoch": 5.641367977816576, "grad_norm": 0.06965642422437668, "learning_rate": 0.01, "loss": 1.9925, "step": 54930 }, { "epoch": 5.641676080928417, "grad_norm": 0.11287185549736023, "learning_rate": 0.01, "loss": 2.0021, "step": 54933 }, { "epoch": 5.6419841840402585, "grad_norm": 0.039775166660547256, "learning_rate": 0.01, "loss": 2.0144, "step": 54936 }, { "epoch": 5.642292287152101, "grad_norm": 0.05167205631732941, "learning_rate": 0.01, "loss": 2.0006, "step": 54939 }, { "epoch": 5.642600390263942, "grad_norm": 0.09172987937927246, "learning_rate": 0.01, "loss": 1.9867, "step": 54942 }, { "epoch": 5.642908493375783, "grad_norm": 0.11361383646726608, "learning_rate": 0.01, "loss": 2.0029, "step": 54945 }, { "epoch": 5.643216596487624, "grad_norm": 0.07098819315433502, "learning_rate": 0.01, "loss": 1.9842, "step": 54948 }, { "epoch": 5.643524699599466, "grad_norm": 0.06430887430906296, "learning_rate": 0.01, "loss": 1.9575, "step": 54951 }, { "epoch": 5.643832802711307, "grad_norm": 0.04281394183635712, "learning_rate": 0.01, "loss": 1.98, "step": 54954 }, { "epoch": 5.644140905823149, "grad_norm": 0.04618782550096512, "learning_rate": 0.01, "loss": 1.9854, "step": 54957 }, { "epoch": 5.64444900893499, "grad_norm": 0.049203407019376755, "learning_rate": 0.01, "loss": 1.9667, "step": 54960 }, { "epoch": 5.644757112046832, "grad_norm": 0.07782324403524399, "learning_rate": 0.01, "loss": 1.9784, "step": 54963 }, { "epoch": 5.645065215158673, "grad_norm": 0.11196355521678925, "learning_rate": 0.01, "loss": 2.0154, "step": 54966 }, { "epoch": 5.645373318270515, "grad_norm": 0.08459745347499847, "learning_rate": 0.01, "loss": 1.9958, "step": 54969 }, { "epoch": 5.645681421382356, "grad_norm": 0.05610903725028038, "learning_rate": 0.01, "loss": 2.0065, "step": 54972 }, { "epoch": 5.6459895244941976, "grad_norm": 0.07638850063085556, "learning_rate": 0.01, "loss": 1.9952, "step": 54975 }, { "epoch": 5.646297627606039, "grad_norm": 0.0996907651424408, "learning_rate": 0.01, "loss": 1.9908, "step": 54978 }, { "epoch": 5.64660573071788, "grad_norm": 0.05218489095568657, "learning_rate": 0.01, "loss": 2.0118, "step": 54981 }, { "epoch": 5.646913833829721, "grad_norm": 0.05683809146285057, "learning_rate": 0.01, "loss": 1.9972, "step": 54984 }, { "epoch": 5.6472219369415635, "grad_norm": 0.08494491875171661, "learning_rate": 0.01, "loss": 2.0059, "step": 54987 }, { "epoch": 5.647530040053405, "grad_norm": 0.09330440312623978, "learning_rate": 0.01, "loss": 1.9925, "step": 54990 }, { "epoch": 5.647838143165246, "grad_norm": 0.1456758677959442, "learning_rate": 0.01, "loss": 1.9906, "step": 54993 }, { "epoch": 5.648146246277087, "grad_norm": 0.13807424902915955, "learning_rate": 0.01, "loss": 1.9826, "step": 54996 }, { "epoch": 5.6484543493889285, "grad_norm": 0.0649813711643219, "learning_rate": 0.01, "loss": 2.017, "step": 54999 }, { "epoch": 5.648762452500771, "grad_norm": 0.0742340162396431, "learning_rate": 0.01, "loss": 1.9713, "step": 55002 }, { "epoch": 5.649070555612612, "grad_norm": 0.09716660529375076, "learning_rate": 0.01, "loss": 1.9821, "step": 55005 }, { "epoch": 5.649378658724453, "grad_norm": 0.056228429079055786, "learning_rate": 0.01, "loss": 1.9979, "step": 55008 }, { "epoch": 5.6496867618362945, "grad_norm": 0.045122385025024414, "learning_rate": 0.01, "loss": 1.9805, "step": 55011 }, { "epoch": 5.649994864948136, "grad_norm": 0.0388353131711483, "learning_rate": 0.01, "loss": 2.0094, "step": 55014 }, { "epoch": 5.650302968059977, "grad_norm": 0.03766791522502899, "learning_rate": 0.01, "loss": 2.0001, "step": 55017 }, { "epoch": 5.650611071171819, "grad_norm": 0.037102095782756805, "learning_rate": 0.01, "loss": 1.9865, "step": 55020 }, { "epoch": 5.65091917428366, "grad_norm": 0.03547612577676773, "learning_rate": 0.01, "loss": 2.001, "step": 55023 }, { "epoch": 5.651227277395502, "grad_norm": 0.14113888144493103, "learning_rate": 0.01, "loss": 2.0096, "step": 55026 }, { "epoch": 5.651535380507343, "grad_norm": 0.13051262497901917, "learning_rate": 0.01, "loss": 2.002, "step": 55029 }, { "epoch": 5.651843483619184, "grad_norm": 0.050375718623399734, "learning_rate": 0.01, "loss": 1.9995, "step": 55032 }, { "epoch": 5.652151586731026, "grad_norm": 0.05951124429702759, "learning_rate": 0.01, "loss": 2.0184, "step": 55035 }, { "epoch": 5.652459689842868, "grad_norm": 0.0422302782535553, "learning_rate": 0.01, "loss": 1.9875, "step": 55038 }, { "epoch": 5.652767792954709, "grad_norm": 0.04383467882871628, "learning_rate": 0.01, "loss": 1.9966, "step": 55041 }, { "epoch": 5.65307589606655, "grad_norm": 0.06659442186355591, "learning_rate": 0.01, "loss": 1.9942, "step": 55044 }, { "epoch": 5.653383999178391, "grad_norm": 0.03979937732219696, "learning_rate": 0.01, "loss": 2.009, "step": 55047 }, { "epoch": 5.653692102290233, "grad_norm": 0.07855933159589767, "learning_rate": 0.01, "loss": 2.0032, "step": 55050 }, { "epoch": 5.654000205402075, "grad_norm": 0.06732465326786041, "learning_rate": 0.01, "loss": 1.9929, "step": 55053 }, { "epoch": 5.654308308513916, "grad_norm": 0.057772446423769, "learning_rate": 0.01, "loss": 1.9747, "step": 55056 }, { "epoch": 5.654616411625757, "grad_norm": 0.08292173594236374, "learning_rate": 0.01, "loss": 1.9825, "step": 55059 }, { "epoch": 5.654924514737599, "grad_norm": 0.0935162603855133, "learning_rate": 0.01, "loss": 2.0047, "step": 55062 }, { "epoch": 5.655232617849441, "grad_norm": 0.06377442181110382, "learning_rate": 0.01, "loss": 1.9789, "step": 55065 }, { "epoch": 5.655540720961282, "grad_norm": 0.08845983445644379, "learning_rate": 0.01, "loss": 2.0187, "step": 55068 }, { "epoch": 5.655848824073123, "grad_norm": 0.06194892153143883, "learning_rate": 0.01, "loss": 1.9933, "step": 55071 }, { "epoch": 5.6561569271849645, "grad_norm": 0.06892132014036179, "learning_rate": 0.01, "loss": 2.0008, "step": 55074 }, { "epoch": 5.656465030296806, "grad_norm": 0.04041128605604172, "learning_rate": 0.01, "loss": 1.9889, "step": 55077 }, { "epoch": 5.656773133408647, "grad_norm": 0.09989805519580841, "learning_rate": 0.01, "loss": 1.9763, "step": 55080 }, { "epoch": 5.657081236520489, "grad_norm": 0.0866503044962883, "learning_rate": 0.01, "loss": 1.9806, "step": 55083 }, { "epoch": 5.6573893396323305, "grad_norm": 0.043805480003356934, "learning_rate": 0.01, "loss": 1.9971, "step": 55086 }, { "epoch": 5.657697442744172, "grad_norm": 0.038269441574811935, "learning_rate": 0.01, "loss": 1.9519, "step": 55089 }, { "epoch": 5.658005545856013, "grad_norm": 0.04652794450521469, "learning_rate": 0.01, "loss": 1.9728, "step": 55092 }, { "epoch": 5.658313648967854, "grad_norm": 0.09699436277151108, "learning_rate": 0.01, "loss": 2.0021, "step": 55095 }, { "epoch": 5.658621752079696, "grad_norm": 0.07722003012895584, "learning_rate": 0.01, "loss": 2.0101, "step": 55098 }, { "epoch": 5.658929855191538, "grad_norm": 0.08195403218269348, "learning_rate": 0.01, "loss": 2.0104, "step": 55101 }, { "epoch": 5.659237958303379, "grad_norm": 0.04434705525636673, "learning_rate": 0.01, "loss": 1.981, "step": 55104 }, { "epoch": 5.65954606141522, "grad_norm": 0.03978164121508598, "learning_rate": 0.01, "loss": 2.0033, "step": 55107 }, { "epoch": 5.6598541645270615, "grad_norm": 0.1201346144080162, "learning_rate": 0.01, "loss": 1.9801, "step": 55110 }, { "epoch": 5.660162267638903, "grad_norm": 0.17597338557243347, "learning_rate": 0.01, "loss": 2.0094, "step": 55113 }, { "epoch": 5.660470370750745, "grad_norm": 0.13311070203781128, "learning_rate": 0.01, "loss": 2.0209, "step": 55116 }, { "epoch": 5.660778473862586, "grad_norm": 0.11461780220270157, "learning_rate": 0.01, "loss": 1.9889, "step": 55119 }, { "epoch": 5.661086576974427, "grad_norm": 0.04778537154197693, "learning_rate": 0.01, "loss": 1.9912, "step": 55122 }, { "epoch": 5.661394680086269, "grad_norm": 0.03900016471743584, "learning_rate": 0.01, "loss": 1.9935, "step": 55125 }, { "epoch": 5.66170278319811, "grad_norm": 0.03934657946228981, "learning_rate": 0.01, "loss": 2.0063, "step": 55128 }, { "epoch": 5.662010886309952, "grad_norm": 0.07235165685415268, "learning_rate": 0.01, "loss": 2.0177, "step": 55131 }, { "epoch": 5.662318989421793, "grad_norm": 0.09168880432844162, "learning_rate": 0.01, "loss": 2.0312, "step": 55134 }, { "epoch": 5.662627092533635, "grad_norm": 0.058992091566324234, "learning_rate": 0.01, "loss": 1.984, "step": 55137 }, { "epoch": 5.662935195645476, "grad_norm": 0.044471897184848785, "learning_rate": 0.01, "loss": 2.0053, "step": 55140 }, { "epoch": 5.663243298757317, "grad_norm": 0.04644366726279259, "learning_rate": 0.01, "loss": 2.018, "step": 55143 }, { "epoch": 5.663551401869158, "grad_norm": 0.04064124450087547, "learning_rate": 0.01, "loss": 1.9876, "step": 55146 }, { "epoch": 5.6638595049810005, "grad_norm": 0.0546630322933197, "learning_rate": 0.01, "loss": 1.9961, "step": 55149 }, { "epoch": 5.664167608092842, "grad_norm": 0.07874899357557297, "learning_rate": 0.01, "loss": 2.0005, "step": 55152 }, { "epoch": 5.664475711204683, "grad_norm": 0.10178565979003906, "learning_rate": 0.01, "loss": 1.9983, "step": 55155 }, { "epoch": 5.664783814316524, "grad_norm": 0.10570824146270752, "learning_rate": 0.01, "loss": 1.9842, "step": 55158 }, { "epoch": 5.6650919174283665, "grad_norm": 0.046325813978910446, "learning_rate": 0.01, "loss": 2.0006, "step": 55161 }, { "epoch": 5.665400020540208, "grad_norm": 0.05771588161587715, "learning_rate": 0.01, "loss": 2.0082, "step": 55164 }, { "epoch": 5.665708123652049, "grad_norm": 0.0589786060154438, "learning_rate": 0.01, "loss": 1.9781, "step": 55167 }, { "epoch": 5.66601622676389, "grad_norm": 0.043214842677116394, "learning_rate": 0.01, "loss": 1.9791, "step": 55170 }, { "epoch": 5.6663243298757315, "grad_norm": 0.09702472388744354, "learning_rate": 0.01, "loss": 1.9972, "step": 55173 }, { "epoch": 5.666632432987573, "grad_norm": 0.04949367418885231, "learning_rate": 0.01, "loss": 2.001, "step": 55176 }, { "epoch": 5.666940536099415, "grad_norm": 0.10156387835741043, "learning_rate": 0.01, "loss": 1.9972, "step": 55179 }, { "epoch": 5.667248639211256, "grad_norm": 0.04134589806199074, "learning_rate": 0.01, "loss": 1.992, "step": 55182 }, { "epoch": 5.6675567423230975, "grad_norm": 0.09493035078048706, "learning_rate": 0.01, "loss": 1.9788, "step": 55185 }, { "epoch": 5.667864845434939, "grad_norm": 0.06814124435186386, "learning_rate": 0.01, "loss": 1.9685, "step": 55188 }, { "epoch": 5.66817294854678, "grad_norm": 0.05078943446278572, "learning_rate": 0.01, "loss": 1.9709, "step": 55191 }, { "epoch": 5.668481051658622, "grad_norm": 0.04670390486717224, "learning_rate": 0.01, "loss": 1.9826, "step": 55194 }, { "epoch": 5.668789154770463, "grad_norm": 0.04085162654519081, "learning_rate": 0.01, "loss": 1.9787, "step": 55197 }, { "epoch": 5.669097257882305, "grad_norm": 0.05365972965955734, "learning_rate": 0.01, "loss": 2.0002, "step": 55200 }, { "epoch": 5.669405360994146, "grad_norm": 0.06176181137561798, "learning_rate": 0.01, "loss": 1.9751, "step": 55203 }, { "epoch": 5.669713464105987, "grad_norm": 0.08370962738990784, "learning_rate": 0.01, "loss": 1.9991, "step": 55206 }, { "epoch": 5.6700215672178285, "grad_norm": 0.06180374324321747, "learning_rate": 0.01, "loss": 1.9872, "step": 55209 }, { "epoch": 5.670329670329671, "grad_norm": 0.04040297865867615, "learning_rate": 0.01, "loss": 1.9788, "step": 55212 }, { "epoch": 5.670637773441512, "grad_norm": 0.041469234973192215, "learning_rate": 0.01, "loss": 1.9993, "step": 55215 }, { "epoch": 5.670945876553353, "grad_norm": 0.04767376556992531, "learning_rate": 0.01, "loss": 1.9957, "step": 55218 }, { "epoch": 5.671253979665194, "grad_norm": 0.127590611577034, "learning_rate": 0.01, "loss": 1.9806, "step": 55221 }, { "epoch": 5.671562082777036, "grad_norm": 0.09273551404476166, "learning_rate": 0.01, "loss": 1.9828, "step": 55224 }, { "epoch": 5.671870185888878, "grad_norm": 0.06734216213226318, "learning_rate": 0.01, "loss": 1.9951, "step": 55227 }, { "epoch": 5.672178289000719, "grad_norm": 0.04960544407367706, "learning_rate": 0.01, "loss": 1.9583, "step": 55230 }, { "epoch": 5.67248639211256, "grad_norm": 0.08085974305868149, "learning_rate": 0.01, "loss": 2.0236, "step": 55233 }, { "epoch": 5.672794495224402, "grad_norm": 0.09541311115026474, "learning_rate": 0.01, "loss": 2.0077, "step": 55236 }, { "epoch": 5.673102598336243, "grad_norm": 0.04482199624180794, "learning_rate": 0.01, "loss": 2.0372, "step": 55239 }, { "epoch": 5.673410701448085, "grad_norm": 0.0415610708296299, "learning_rate": 0.01, "loss": 2.0211, "step": 55242 }, { "epoch": 5.673718804559926, "grad_norm": 0.08066660910844803, "learning_rate": 0.01, "loss": 2.0093, "step": 55245 }, { "epoch": 5.6740269076717675, "grad_norm": 0.06819882988929749, "learning_rate": 0.01, "loss": 1.9637, "step": 55248 }, { "epoch": 5.674335010783609, "grad_norm": 0.10055188089609146, "learning_rate": 0.01, "loss": 1.9581, "step": 55251 }, { "epoch": 5.67464311389545, "grad_norm": 0.05193830654025078, "learning_rate": 0.01, "loss": 2.0189, "step": 55254 }, { "epoch": 5.674951217007292, "grad_norm": 0.09766846150159836, "learning_rate": 0.01, "loss": 1.9897, "step": 55257 }, { "epoch": 5.6752593201191335, "grad_norm": 0.041506506502628326, "learning_rate": 0.01, "loss": 1.9807, "step": 55260 }, { "epoch": 5.675567423230975, "grad_norm": 0.04589756205677986, "learning_rate": 0.01, "loss": 1.9822, "step": 55263 }, { "epoch": 5.675875526342816, "grad_norm": 0.05841269716620445, "learning_rate": 0.01, "loss": 1.9997, "step": 55266 }, { "epoch": 5.676183629454657, "grad_norm": 0.08855876326560974, "learning_rate": 0.01, "loss": 2.0031, "step": 55269 }, { "epoch": 5.6764917325664985, "grad_norm": 0.055619269609451294, "learning_rate": 0.01, "loss": 2.0033, "step": 55272 }, { "epoch": 5.676799835678341, "grad_norm": 0.04398469999432564, "learning_rate": 0.01, "loss": 2.0109, "step": 55275 }, { "epoch": 5.677107938790182, "grad_norm": 0.073618583381176, "learning_rate": 0.01, "loss": 1.9971, "step": 55278 }, { "epoch": 5.677416041902023, "grad_norm": 0.03379726782441139, "learning_rate": 0.01, "loss": 2.0152, "step": 55281 }, { "epoch": 5.6777241450138645, "grad_norm": 0.040897171944379807, "learning_rate": 0.01, "loss": 1.9854, "step": 55284 }, { "epoch": 5.678032248125706, "grad_norm": 0.09364822506904602, "learning_rate": 0.01, "loss": 2.02, "step": 55287 }, { "epoch": 5.678340351237548, "grad_norm": 0.05096183717250824, "learning_rate": 0.01, "loss": 2.0101, "step": 55290 }, { "epoch": 5.678648454349389, "grad_norm": 0.03920522332191467, "learning_rate": 0.01, "loss": 1.9825, "step": 55293 }, { "epoch": 5.67895655746123, "grad_norm": 0.12465393543243408, "learning_rate": 0.01, "loss": 2.0117, "step": 55296 }, { "epoch": 5.679264660573072, "grad_norm": 0.0732463076710701, "learning_rate": 0.01, "loss": 1.9749, "step": 55299 }, { "epoch": 5.679572763684913, "grad_norm": 0.048661403357982635, "learning_rate": 0.01, "loss": 2.0204, "step": 55302 }, { "epoch": 5.679880866796754, "grad_norm": 0.072269506752491, "learning_rate": 0.01, "loss": 1.9664, "step": 55305 }, { "epoch": 5.680188969908596, "grad_norm": 0.09489680081605911, "learning_rate": 0.01, "loss": 1.9677, "step": 55308 }, { "epoch": 5.680497073020438, "grad_norm": 0.05124731361865997, "learning_rate": 0.01, "loss": 1.9919, "step": 55311 }, { "epoch": 5.680805176132279, "grad_norm": 0.05603638291358948, "learning_rate": 0.01, "loss": 2.021, "step": 55314 }, { "epoch": 5.68111327924412, "grad_norm": 0.045195356011390686, "learning_rate": 0.01, "loss": 1.9934, "step": 55317 }, { "epoch": 5.681421382355962, "grad_norm": 0.08664342761039734, "learning_rate": 0.01, "loss": 1.985, "step": 55320 }, { "epoch": 5.6817294854678035, "grad_norm": 0.06681100279092789, "learning_rate": 0.01, "loss": 1.9705, "step": 55323 }, { "epoch": 5.682037588579645, "grad_norm": 0.040817707777023315, "learning_rate": 0.01, "loss": 1.9747, "step": 55326 }, { "epoch": 5.682345691691486, "grad_norm": 0.04150600731372833, "learning_rate": 0.01, "loss": 1.986, "step": 55329 }, { "epoch": 5.682653794803327, "grad_norm": 0.05804332345724106, "learning_rate": 0.01, "loss": 2.0082, "step": 55332 }, { "epoch": 5.682961897915169, "grad_norm": 0.06679341942071915, "learning_rate": 0.01, "loss": 2.0132, "step": 55335 }, { "epoch": 5.683270001027011, "grad_norm": 0.1082872524857521, "learning_rate": 0.01, "loss": 1.9618, "step": 55338 }, { "epoch": 5.683578104138852, "grad_norm": 0.039868131279945374, "learning_rate": 0.01, "loss": 1.9962, "step": 55341 }, { "epoch": 5.683886207250693, "grad_norm": 0.056761614978313446, "learning_rate": 0.01, "loss": 1.9671, "step": 55344 }, { "epoch": 5.6841943103625345, "grad_norm": 0.05743727087974548, "learning_rate": 0.01, "loss": 2.0146, "step": 55347 }, { "epoch": 5.684502413474376, "grad_norm": 0.07240551710128784, "learning_rate": 0.01, "loss": 1.9733, "step": 55350 }, { "epoch": 5.684810516586218, "grad_norm": 0.04794316738843918, "learning_rate": 0.01, "loss": 1.9986, "step": 55353 }, { "epoch": 5.685118619698059, "grad_norm": 0.03990946710109711, "learning_rate": 0.01, "loss": 1.9905, "step": 55356 }, { "epoch": 5.6854267228099005, "grad_norm": 0.030655885115265846, "learning_rate": 0.01, "loss": 1.9831, "step": 55359 }, { "epoch": 5.685734825921742, "grad_norm": 0.05635961890220642, "learning_rate": 0.01, "loss": 1.9743, "step": 55362 }, { "epoch": 5.686042929033583, "grad_norm": 0.058308038860559464, "learning_rate": 0.01, "loss": 2.0184, "step": 55365 }, { "epoch": 5.686351032145424, "grad_norm": 0.08183693885803223, "learning_rate": 0.01, "loss": 1.9729, "step": 55368 }, { "epoch": 5.686659135257266, "grad_norm": 0.14002621173858643, "learning_rate": 0.01, "loss": 2.0092, "step": 55371 }, { "epoch": 5.686967238369108, "grad_norm": 0.06427164375782013, "learning_rate": 0.01, "loss": 2.0034, "step": 55374 }, { "epoch": 5.687275341480949, "grad_norm": 0.03860678896307945, "learning_rate": 0.01, "loss": 1.9951, "step": 55377 }, { "epoch": 5.68758344459279, "grad_norm": 0.035588864237070084, "learning_rate": 0.01, "loss": 1.9627, "step": 55380 }, { "epoch": 5.6878915477046315, "grad_norm": 0.035896506160497665, "learning_rate": 0.01, "loss": 1.9818, "step": 55383 }, { "epoch": 5.688199650816474, "grad_norm": 0.042555298656225204, "learning_rate": 0.01, "loss": 2.0008, "step": 55386 }, { "epoch": 5.688507753928315, "grad_norm": 0.061510197818279266, "learning_rate": 0.01, "loss": 2.0048, "step": 55389 }, { "epoch": 5.688815857040156, "grad_norm": 0.058596979826688766, "learning_rate": 0.01, "loss": 1.989, "step": 55392 }, { "epoch": 5.689123960151997, "grad_norm": 0.14717184007167816, "learning_rate": 0.01, "loss": 1.9873, "step": 55395 }, { "epoch": 5.689432063263839, "grad_norm": 0.1496891975402832, "learning_rate": 0.01, "loss": 2.013, "step": 55398 }, { "epoch": 5.68974016637568, "grad_norm": 0.09148060530424118, "learning_rate": 0.01, "loss": 1.9623, "step": 55401 }, { "epoch": 5.690048269487522, "grad_norm": 0.06617487967014313, "learning_rate": 0.01, "loss": 1.9755, "step": 55404 }, { "epoch": 5.690356372599363, "grad_norm": 0.0447409562766552, "learning_rate": 0.01, "loss": 1.9838, "step": 55407 }, { "epoch": 5.690664475711205, "grad_norm": 0.07360169291496277, "learning_rate": 0.01, "loss": 1.9893, "step": 55410 }, { "epoch": 5.690972578823046, "grad_norm": 0.03748020529747009, "learning_rate": 0.01, "loss": 1.9817, "step": 55413 }, { "epoch": 5.691280681934888, "grad_norm": 0.0812792256474495, "learning_rate": 0.01, "loss": 1.9723, "step": 55416 }, { "epoch": 5.691588785046729, "grad_norm": 0.06878095865249634, "learning_rate": 0.01, "loss": 2.0051, "step": 55419 }, { "epoch": 5.6918968881585705, "grad_norm": 0.10537760704755783, "learning_rate": 0.01, "loss": 2.0093, "step": 55422 }, { "epoch": 5.692204991270412, "grad_norm": 0.04603094980120659, "learning_rate": 0.01, "loss": 1.9772, "step": 55425 }, { "epoch": 5.692513094382253, "grad_norm": 0.09780580550432205, "learning_rate": 0.01, "loss": 1.9796, "step": 55428 }, { "epoch": 5.692821197494094, "grad_norm": 0.07139319181442261, "learning_rate": 0.01, "loss": 2.0059, "step": 55431 }, { "epoch": 5.6931293006059365, "grad_norm": 0.10430671274662018, "learning_rate": 0.01, "loss": 1.9902, "step": 55434 }, { "epoch": 5.693437403717778, "grad_norm": 0.07455434650182724, "learning_rate": 0.01, "loss": 1.9818, "step": 55437 }, { "epoch": 5.693745506829619, "grad_norm": 0.06788843870162964, "learning_rate": 0.01, "loss": 1.9998, "step": 55440 }, { "epoch": 5.69405360994146, "grad_norm": 0.07325445860624313, "learning_rate": 0.01, "loss": 1.9999, "step": 55443 }, { "epoch": 5.6943617130533015, "grad_norm": 0.06702747941017151, "learning_rate": 0.01, "loss": 1.9889, "step": 55446 }, { "epoch": 5.694669816165144, "grad_norm": 0.05118415504693985, "learning_rate": 0.01, "loss": 1.9879, "step": 55449 }, { "epoch": 5.694977919276985, "grad_norm": 0.08692149817943573, "learning_rate": 0.01, "loss": 1.9744, "step": 55452 }, { "epoch": 5.695286022388826, "grad_norm": 0.11190303415060043, "learning_rate": 0.01, "loss": 2.0081, "step": 55455 }, { "epoch": 5.6955941255006675, "grad_norm": 0.048586875200271606, "learning_rate": 0.01, "loss": 1.9659, "step": 55458 }, { "epoch": 5.695902228612509, "grad_norm": 0.04205205664038658, "learning_rate": 0.01, "loss": 1.9794, "step": 55461 }, { "epoch": 5.69621033172435, "grad_norm": 0.06149724870920181, "learning_rate": 0.01, "loss": 1.9959, "step": 55464 }, { "epoch": 5.696518434836192, "grad_norm": 0.0992293581366539, "learning_rate": 0.01, "loss": 1.9993, "step": 55467 }, { "epoch": 5.696826537948033, "grad_norm": 0.13116061687469482, "learning_rate": 0.01, "loss": 1.9888, "step": 55470 }, { "epoch": 5.697134641059875, "grad_norm": 0.06159098818898201, "learning_rate": 0.01, "loss": 1.976, "step": 55473 }, { "epoch": 5.697442744171716, "grad_norm": 0.03453617915511131, "learning_rate": 0.01, "loss": 1.9777, "step": 55476 }, { "epoch": 5.697750847283557, "grad_norm": 0.09355533123016357, "learning_rate": 0.01, "loss": 1.9967, "step": 55479 }, { "epoch": 5.698058950395399, "grad_norm": 0.059095028787851334, "learning_rate": 0.01, "loss": 1.9949, "step": 55482 }, { "epoch": 5.698367053507241, "grad_norm": 0.0825907438993454, "learning_rate": 0.01, "loss": 1.9903, "step": 55485 }, { "epoch": 5.698675156619082, "grad_norm": 0.07268752157688141, "learning_rate": 0.01, "loss": 1.9881, "step": 55488 }, { "epoch": 5.698983259730923, "grad_norm": 0.07047848403453827, "learning_rate": 0.01, "loss": 1.9774, "step": 55491 }, { "epoch": 5.699291362842764, "grad_norm": 0.09214555472135544, "learning_rate": 0.01, "loss": 1.9972, "step": 55494 }, { "epoch": 5.699599465954606, "grad_norm": 0.11323852092027664, "learning_rate": 0.01, "loss": 2.0205, "step": 55497 }, { "epoch": 5.699907569066448, "grad_norm": 0.06520719081163406, "learning_rate": 0.01, "loss": 1.9786, "step": 55500 }, { "epoch": 5.700215672178289, "grad_norm": 0.04669101536273956, "learning_rate": 0.01, "loss": 2.0015, "step": 55503 }, { "epoch": 5.70052377529013, "grad_norm": 0.04730800911784172, "learning_rate": 0.01, "loss": 1.9764, "step": 55506 }, { "epoch": 5.700831878401972, "grad_norm": 0.043729908764362335, "learning_rate": 0.01, "loss": 1.9933, "step": 55509 }, { "epoch": 5.701139981513814, "grad_norm": 0.03472939133644104, "learning_rate": 0.01, "loss": 1.9783, "step": 55512 }, { "epoch": 5.701448084625655, "grad_norm": 0.050825025886297226, "learning_rate": 0.01, "loss": 1.975, "step": 55515 }, { "epoch": 5.701756187737496, "grad_norm": 0.06947685033082962, "learning_rate": 0.01, "loss": 1.9835, "step": 55518 }, { "epoch": 5.7020642908493375, "grad_norm": 0.036018531769514084, "learning_rate": 0.01, "loss": 1.9978, "step": 55521 }, { "epoch": 5.702372393961179, "grad_norm": 0.10103366523981094, "learning_rate": 0.01, "loss": 1.9807, "step": 55524 }, { "epoch": 5.70268049707302, "grad_norm": 0.07310350984334946, "learning_rate": 0.01, "loss": 1.9987, "step": 55527 }, { "epoch": 5.702988600184862, "grad_norm": 0.13540640473365784, "learning_rate": 0.01, "loss": 2.0221, "step": 55530 }, { "epoch": 5.7032967032967035, "grad_norm": 0.0949084460735321, "learning_rate": 0.01, "loss": 2.01, "step": 55533 }, { "epoch": 5.703604806408545, "grad_norm": 0.059144023805856705, "learning_rate": 0.01, "loss": 1.9724, "step": 55536 }, { "epoch": 5.703912909520386, "grad_norm": 0.0527474619448185, "learning_rate": 0.01, "loss": 1.9823, "step": 55539 }, { "epoch": 5.704221012632227, "grad_norm": 0.06749939173460007, "learning_rate": 0.01, "loss": 1.9977, "step": 55542 }, { "epoch": 5.704529115744069, "grad_norm": 0.03750371187925339, "learning_rate": 0.01, "loss": 1.9799, "step": 55545 }, { "epoch": 5.704837218855911, "grad_norm": 0.04007263481616974, "learning_rate": 0.01, "loss": 2.0147, "step": 55548 }, { "epoch": 5.705145321967752, "grad_norm": 0.1166180819272995, "learning_rate": 0.01, "loss": 1.9786, "step": 55551 }, { "epoch": 5.705453425079593, "grad_norm": 0.09550142288208008, "learning_rate": 0.01, "loss": 1.9833, "step": 55554 }, { "epoch": 5.7057615281914345, "grad_norm": 0.03766748309135437, "learning_rate": 0.01, "loss": 1.9798, "step": 55557 }, { "epoch": 5.706069631303276, "grad_norm": 0.05305793881416321, "learning_rate": 0.01, "loss": 2.0144, "step": 55560 }, { "epoch": 5.706377734415118, "grad_norm": 0.042423125356435776, "learning_rate": 0.01, "loss": 1.9934, "step": 55563 }, { "epoch": 5.706685837526959, "grad_norm": 0.08636484295129776, "learning_rate": 0.01, "loss": 1.9986, "step": 55566 }, { "epoch": 5.7069939406388, "grad_norm": 0.05162766948342323, "learning_rate": 0.01, "loss": 2.0144, "step": 55569 }, { "epoch": 5.707302043750642, "grad_norm": 0.08721368759870529, "learning_rate": 0.01, "loss": 1.9887, "step": 55572 }, { "epoch": 5.707610146862484, "grad_norm": 0.043255776166915894, "learning_rate": 0.01, "loss": 2.0111, "step": 55575 }, { "epoch": 5.707918249974325, "grad_norm": 0.0371641106903553, "learning_rate": 0.01, "loss": 2.0051, "step": 55578 }, { "epoch": 5.708226353086166, "grad_norm": 0.03695603460073471, "learning_rate": 0.01, "loss": 1.9999, "step": 55581 }, { "epoch": 5.708534456198008, "grad_norm": 0.07745962589979172, "learning_rate": 0.01, "loss": 2.0055, "step": 55584 }, { "epoch": 5.708842559309849, "grad_norm": 0.10786707699298859, "learning_rate": 0.01, "loss": 1.974, "step": 55587 }, { "epoch": 5.70915066242169, "grad_norm": 0.11932874470949173, "learning_rate": 0.01, "loss": 1.9749, "step": 55590 }, { "epoch": 5.709458765533532, "grad_norm": 0.09362657368183136, "learning_rate": 0.01, "loss": 2.0116, "step": 55593 }, { "epoch": 5.7097668686453735, "grad_norm": 0.049765367060899734, "learning_rate": 0.01, "loss": 1.9835, "step": 55596 }, { "epoch": 5.710074971757215, "grad_norm": 0.0707634910941124, "learning_rate": 0.01, "loss": 1.9923, "step": 55599 }, { "epoch": 5.710383074869056, "grad_norm": 0.05576471611857414, "learning_rate": 0.01, "loss": 2.0247, "step": 55602 }, { "epoch": 5.710691177980897, "grad_norm": 0.036324393004179, "learning_rate": 0.01, "loss": 1.9941, "step": 55605 }, { "epoch": 5.7109992810927395, "grad_norm": 0.043876856565475464, "learning_rate": 0.01, "loss": 1.9866, "step": 55608 }, { "epoch": 5.711307384204581, "grad_norm": 0.04931717738509178, "learning_rate": 0.01, "loss": 1.9934, "step": 55611 }, { "epoch": 5.711615487316422, "grad_norm": 0.10611497610807419, "learning_rate": 0.01, "loss": 1.9828, "step": 55614 }, { "epoch": 5.711923590428263, "grad_norm": 0.04808138310909271, "learning_rate": 0.01, "loss": 1.9795, "step": 55617 }, { "epoch": 5.7122316935401045, "grad_norm": 0.05550553277134895, "learning_rate": 0.01, "loss": 2.0022, "step": 55620 }, { "epoch": 5.712539796651946, "grad_norm": 0.07001514732837677, "learning_rate": 0.01, "loss": 2.0225, "step": 55623 }, { "epoch": 5.712847899763788, "grad_norm": 0.04201444238424301, "learning_rate": 0.01, "loss": 1.9674, "step": 55626 }, { "epoch": 5.713156002875629, "grad_norm": 0.04204641655087471, "learning_rate": 0.01, "loss": 2.0051, "step": 55629 }, { "epoch": 5.7134641059874705, "grad_norm": 0.07260296493768692, "learning_rate": 0.01, "loss": 1.9954, "step": 55632 }, { "epoch": 5.713772209099312, "grad_norm": 0.04765797406435013, "learning_rate": 0.01, "loss": 2.0221, "step": 55635 }, { "epoch": 5.714080312211153, "grad_norm": 0.10189679265022278, "learning_rate": 0.01, "loss": 1.9826, "step": 55638 }, { "epoch": 5.714388415322995, "grad_norm": 0.08928123861551285, "learning_rate": 0.01, "loss": 1.9825, "step": 55641 }, { "epoch": 5.714696518434836, "grad_norm": 0.041944585740566254, "learning_rate": 0.01, "loss": 1.9979, "step": 55644 }, { "epoch": 5.715004621546678, "grad_norm": 0.062304090708494186, "learning_rate": 0.01, "loss": 2.0229, "step": 55647 }, { "epoch": 5.715312724658519, "grad_norm": 0.03752179071307182, "learning_rate": 0.01, "loss": 1.9861, "step": 55650 }, { "epoch": 5.71562082777036, "grad_norm": 0.05441533774137497, "learning_rate": 0.01, "loss": 2.0013, "step": 55653 }, { "epoch": 5.715928930882201, "grad_norm": 0.0389556922018528, "learning_rate": 0.01, "loss": 1.9849, "step": 55656 }, { "epoch": 5.716237033994044, "grad_norm": 0.046292494982481, "learning_rate": 0.01, "loss": 2.0006, "step": 55659 }, { "epoch": 5.716545137105885, "grad_norm": 0.0998992770910263, "learning_rate": 0.01, "loss": 1.9953, "step": 55662 }, { "epoch": 5.716853240217726, "grad_norm": 0.07048474252223969, "learning_rate": 0.01, "loss": 1.9914, "step": 55665 }, { "epoch": 5.717161343329567, "grad_norm": 0.03293071687221527, "learning_rate": 0.01, "loss": 1.9971, "step": 55668 }, { "epoch": 5.7174694464414095, "grad_norm": 0.040962520986795425, "learning_rate": 0.01, "loss": 2.0123, "step": 55671 }, { "epoch": 5.717777549553251, "grad_norm": 0.08064155280590057, "learning_rate": 0.01, "loss": 2.0082, "step": 55674 }, { "epoch": 5.718085652665092, "grad_norm": 0.10333026945590973, "learning_rate": 0.01, "loss": 2.0194, "step": 55677 }, { "epoch": 5.718393755776933, "grad_norm": 0.06801532953977585, "learning_rate": 0.01, "loss": 1.9974, "step": 55680 }, { "epoch": 5.718701858888775, "grad_norm": 0.051722411066293716, "learning_rate": 0.01, "loss": 1.9785, "step": 55683 }, { "epoch": 5.719009962000616, "grad_norm": 0.08710015565156937, "learning_rate": 0.01, "loss": 1.982, "step": 55686 }, { "epoch": 5.719318065112458, "grad_norm": 0.06947915256023407, "learning_rate": 0.01, "loss": 2.0078, "step": 55689 }, { "epoch": 5.719626168224299, "grad_norm": 0.08588126301765442, "learning_rate": 0.01, "loss": 2.0111, "step": 55692 }, { "epoch": 5.7199342713361405, "grad_norm": 0.1062588170170784, "learning_rate": 0.01, "loss": 1.9812, "step": 55695 }, { "epoch": 5.720242374447982, "grad_norm": 0.044331252574920654, "learning_rate": 0.01, "loss": 2.0213, "step": 55698 }, { "epoch": 5.720550477559823, "grad_norm": 0.06031282618641853, "learning_rate": 0.01, "loss": 1.9923, "step": 55701 }, { "epoch": 5.720858580671665, "grad_norm": 0.0506250225007534, "learning_rate": 0.01, "loss": 1.9847, "step": 55704 }, { "epoch": 5.7211666837835065, "grad_norm": 0.04892846941947937, "learning_rate": 0.01, "loss": 1.9845, "step": 55707 }, { "epoch": 5.721474786895348, "grad_norm": 0.0958351120352745, "learning_rate": 0.01, "loss": 1.9916, "step": 55710 }, { "epoch": 5.721782890007189, "grad_norm": 0.03931442275643349, "learning_rate": 0.01, "loss": 1.9758, "step": 55713 }, { "epoch": 5.72209099311903, "grad_norm": 0.10494615882635117, "learning_rate": 0.01, "loss": 2.0026, "step": 55716 }, { "epoch": 5.7223990962308715, "grad_norm": 0.06329034268856049, "learning_rate": 0.01, "loss": 1.9781, "step": 55719 }, { "epoch": 5.722707199342714, "grad_norm": 0.08237000554800034, "learning_rate": 0.01, "loss": 2.0026, "step": 55722 }, { "epoch": 5.723015302454555, "grad_norm": 0.063106007874012, "learning_rate": 0.01, "loss": 1.9761, "step": 55725 }, { "epoch": 5.723323405566396, "grad_norm": 0.04435192793607712, "learning_rate": 0.01, "loss": 2.0046, "step": 55728 }, { "epoch": 5.7236315086782374, "grad_norm": 0.09624417126178741, "learning_rate": 0.01, "loss": 1.9986, "step": 55731 }, { "epoch": 5.723939611790079, "grad_norm": 0.04826802760362625, "learning_rate": 0.01, "loss": 1.9803, "step": 55734 }, { "epoch": 5.724247714901921, "grad_norm": 0.13376633822917938, "learning_rate": 0.01, "loss": 2.0249, "step": 55737 }, { "epoch": 5.724555818013762, "grad_norm": 0.10685109347105026, "learning_rate": 0.01, "loss": 1.9724, "step": 55740 }, { "epoch": 5.724863921125603, "grad_norm": 0.03832371160387993, "learning_rate": 0.01, "loss": 1.9977, "step": 55743 }, { "epoch": 5.725172024237445, "grad_norm": 0.06955388933420181, "learning_rate": 0.01, "loss": 1.983, "step": 55746 }, { "epoch": 5.725480127349286, "grad_norm": 0.10767482221126556, "learning_rate": 0.01, "loss": 2.0095, "step": 55749 }, { "epoch": 5.725788230461127, "grad_norm": 0.09542632102966309, "learning_rate": 0.01, "loss": 1.9976, "step": 55752 }, { "epoch": 5.726096333572969, "grad_norm": 0.07101954519748688, "learning_rate": 0.01, "loss": 1.9897, "step": 55755 }, { "epoch": 5.726404436684811, "grad_norm": 0.04699917882680893, "learning_rate": 0.01, "loss": 2.0034, "step": 55758 }, { "epoch": 5.726712539796652, "grad_norm": 0.03901418298482895, "learning_rate": 0.01, "loss": 1.9782, "step": 55761 }, { "epoch": 5.727020642908493, "grad_norm": 0.03572557494044304, "learning_rate": 0.01, "loss": 1.9926, "step": 55764 }, { "epoch": 5.727328746020335, "grad_norm": 0.048515480011701584, "learning_rate": 0.01, "loss": 1.9798, "step": 55767 }, { "epoch": 5.7276368491321765, "grad_norm": 0.09653262794017792, "learning_rate": 0.01, "loss": 1.9913, "step": 55770 }, { "epoch": 5.727944952244018, "grad_norm": 0.05700475722551346, "learning_rate": 0.01, "loss": 2.0076, "step": 55773 }, { "epoch": 5.728253055355859, "grad_norm": 0.1171969622373581, "learning_rate": 0.01, "loss": 1.9628, "step": 55776 }, { "epoch": 5.7285611584677, "grad_norm": 0.043651919811964035, "learning_rate": 0.01, "loss": 2.0058, "step": 55779 }, { "epoch": 5.728869261579542, "grad_norm": 0.034597914665937424, "learning_rate": 0.01, "loss": 1.9807, "step": 55782 }, { "epoch": 5.729177364691384, "grad_norm": 0.036805570125579834, "learning_rate": 0.01, "loss": 1.9867, "step": 55785 }, { "epoch": 5.729485467803225, "grad_norm": 0.0409085676074028, "learning_rate": 0.01, "loss": 2.0135, "step": 55788 }, { "epoch": 5.729793570915066, "grad_norm": 0.041818439960479736, "learning_rate": 0.01, "loss": 2.0131, "step": 55791 }, { "epoch": 5.7301016740269075, "grad_norm": 0.04998002573847771, "learning_rate": 0.01, "loss": 2.0026, "step": 55794 }, { "epoch": 5.730409777138749, "grad_norm": 0.13136835396289825, "learning_rate": 0.01, "loss": 1.9779, "step": 55797 }, { "epoch": 5.730717880250591, "grad_norm": 0.09088631719350815, "learning_rate": 0.01, "loss": 1.9816, "step": 55800 }, { "epoch": 5.731025983362432, "grad_norm": 0.04242292046546936, "learning_rate": 0.01, "loss": 1.995, "step": 55803 }, { "epoch": 5.7313340864742734, "grad_norm": 0.06529027223587036, "learning_rate": 0.01, "loss": 1.992, "step": 55806 }, { "epoch": 5.731642189586115, "grad_norm": 0.07886287569999695, "learning_rate": 0.01, "loss": 1.9938, "step": 55809 }, { "epoch": 5.731950292697956, "grad_norm": 0.05292735993862152, "learning_rate": 0.01, "loss": 2.0094, "step": 55812 }, { "epoch": 5.732258395809797, "grad_norm": 0.08382521569728851, "learning_rate": 0.01, "loss": 1.9827, "step": 55815 }, { "epoch": 5.732566498921639, "grad_norm": 0.16346225142478943, "learning_rate": 0.01, "loss": 1.9933, "step": 55818 }, { "epoch": 5.732874602033481, "grad_norm": 0.1087159663438797, "learning_rate": 0.01, "loss": 1.9706, "step": 55821 }, { "epoch": 5.733182705145322, "grad_norm": 0.08878868818283081, "learning_rate": 0.01, "loss": 1.9981, "step": 55824 }, { "epoch": 5.733490808257163, "grad_norm": 0.05677541717886925, "learning_rate": 0.01, "loss": 2.0083, "step": 55827 }, { "epoch": 5.733798911369005, "grad_norm": 0.047212038189172745, "learning_rate": 0.01, "loss": 1.9733, "step": 55830 }, { "epoch": 5.734107014480847, "grad_norm": 0.04530366137623787, "learning_rate": 0.01, "loss": 1.9799, "step": 55833 }, { "epoch": 5.734415117592688, "grad_norm": 0.03656543791294098, "learning_rate": 0.01, "loss": 1.9945, "step": 55836 }, { "epoch": 5.734723220704529, "grad_norm": 0.05110878124833107, "learning_rate": 0.01, "loss": 2.0001, "step": 55839 }, { "epoch": 5.73503132381637, "grad_norm": 0.046113792806863785, "learning_rate": 0.01, "loss": 2.0205, "step": 55842 }, { "epoch": 5.735339426928212, "grad_norm": 0.05249856784939766, "learning_rate": 0.01, "loss": 2.0061, "step": 55845 }, { "epoch": 5.735647530040054, "grad_norm": 0.10378940403461456, "learning_rate": 0.01, "loss": 1.99, "step": 55848 }, { "epoch": 5.735955633151895, "grad_norm": 0.04738283157348633, "learning_rate": 0.01, "loss": 1.9874, "step": 55851 }, { "epoch": 5.736263736263736, "grad_norm": 0.042795851826667786, "learning_rate": 0.01, "loss": 1.9761, "step": 55854 }, { "epoch": 5.736571839375578, "grad_norm": 0.060027096420526505, "learning_rate": 0.01, "loss": 2.0001, "step": 55857 }, { "epoch": 5.736879942487419, "grad_norm": 0.060275256633758545, "learning_rate": 0.01, "loss": 2.0119, "step": 55860 }, { "epoch": 5.737188045599261, "grad_norm": 0.038434866815805435, "learning_rate": 0.01, "loss": 1.9843, "step": 55863 }, { "epoch": 5.737496148711102, "grad_norm": 0.06112179905176163, "learning_rate": 0.01, "loss": 1.9656, "step": 55866 }, { "epoch": 5.7378042518229435, "grad_norm": 0.057865045964717865, "learning_rate": 0.01, "loss": 2.0079, "step": 55869 }, { "epoch": 5.738112354934785, "grad_norm": 0.10420432686805725, "learning_rate": 0.01, "loss": 2.0062, "step": 55872 }, { "epoch": 5.738420458046626, "grad_norm": 0.10990259796380997, "learning_rate": 0.01, "loss": 1.9878, "step": 55875 }, { "epoch": 5.738728561158467, "grad_norm": 0.047898877412080765, "learning_rate": 0.01, "loss": 2.0026, "step": 55878 }, { "epoch": 5.7390366642703095, "grad_norm": 0.09271835535764694, "learning_rate": 0.01, "loss": 2.0032, "step": 55881 }, { "epoch": 5.739344767382151, "grad_norm": 0.046148382127285004, "learning_rate": 0.01, "loss": 2.0049, "step": 55884 }, { "epoch": 5.739652870493992, "grad_norm": 0.09337476640939713, "learning_rate": 0.01, "loss": 2.0101, "step": 55887 }, { "epoch": 5.739960973605833, "grad_norm": 0.0676882416009903, "learning_rate": 0.01, "loss": 1.9989, "step": 55890 }, { "epoch": 5.7402690767176745, "grad_norm": 0.07760494947433472, "learning_rate": 0.01, "loss": 1.9789, "step": 55893 }, { "epoch": 5.740577179829517, "grad_norm": 0.0753534585237503, "learning_rate": 0.01, "loss": 2.0221, "step": 55896 }, { "epoch": 5.740885282941358, "grad_norm": 0.07207735627889633, "learning_rate": 0.01, "loss": 1.9996, "step": 55899 }, { "epoch": 5.741193386053199, "grad_norm": 0.10786162316799164, "learning_rate": 0.01, "loss": 1.994, "step": 55902 }, { "epoch": 5.74150148916504, "grad_norm": 0.05810957029461861, "learning_rate": 0.01, "loss": 1.9892, "step": 55905 }, { "epoch": 5.741809592276882, "grad_norm": 0.043440163135528564, "learning_rate": 0.01, "loss": 2.025, "step": 55908 }, { "epoch": 5.742117695388723, "grad_norm": 0.10091213136911392, "learning_rate": 0.01, "loss": 1.9735, "step": 55911 }, { "epoch": 5.742425798500565, "grad_norm": 0.05655033513903618, "learning_rate": 0.01, "loss": 1.9865, "step": 55914 }, { "epoch": 5.742733901612406, "grad_norm": 0.04754583537578583, "learning_rate": 0.01, "loss": 1.9866, "step": 55917 }, { "epoch": 5.743042004724248, "grad_norm": 0.04805220663547516, "learning_rate": 0.01, "loss": 2.0079, "step": 55920 }, { "epoch": 5.743350107836089, "grad_norm": 0.03961215168237686, "learning_rate": 0.01, "loss": 2.0027, "step": 55923 }, { "epoch": 5.743658210947931, "grad_norm": 0.0900992751121521, "learning_rate": 0.01, "loss": 2.004, "step": 55926 }, { "epoch": 5.743966314059772, "grad_norm": 0.06700705736875534, "learning_rate": 0.01, "loss": 2.0104, "step": 55929 }, { "epoch": 5.744274417171614, "grad_norm": 0.06921757012605667, "learning_rate": 0.01, "loss": 2.0068, "step": 55932 }, { "epoch": 5.744582520283455, "grad_norm": 0.08982661366462708, "learning_rate": 0.01, "loss": 1.9646, "step": 55935 }, { "epoch": 5.744890623395296, "grad_norm": 0.059035494923591614, "learning_rate": 0.01, "loss": 1.9974, "step": 55938 }, { "epoch": 5.745198726507137, "grad_norm": 0.11368583887815475, "learning_rate": 0.01, "loss": 1.9738, "step": 55941 }, { "epoch": 5.7455068296189795, "grad_norm": 0.08658309280872345, "learning_rate": 0.01, "loss": 1.9933, "step": 55944 }, { "epoch": 5.745814932730821, "grad_norm": 0.06159745901823044, "learning_rate": 0.01, "loss": 2.0021, "step": 55947 }, { "epoch": 5.746123035842662, "grad_norm": 0.09689722955226898, "learning_rate": 0.01, "loss": 1.9833, "step": 55950 }, { "epoch": 5.746431138954503, "grad_norm": 0.04213634133338928, "learning_rate": 0.01, "loss": 1.9909, "step": 55953 }, { "epoch": 5.746739242066345, "grad_norm": 0.09022536873817444, "learning_rate": 0.01, "loss": 1.9798, "step": 55956 }, { "epoch": 5.747047345178187, "grad_norm": 0.047155823558568954, "learning_rate": 0.01, "loss": 1.9914, "step": 55959 }, { "epoch": 5.747355448290028, "grad_norm": 0.12048459053039551, "learning_rate": 0.01, "loss": 1.9988, "step": 55962 }, { "epoch": 5.747663551401869, "grad_norm": 0.14343701303005219, "learning_rate": 0.01, "loss": 1.9728, "step": 55965 }, { "epoch": 5.7479716545137105, "grad_norm": 0.10892770439386368, "learning_rate": 0.01, "loss": 1.9709, "step": 55968 }, { "epoch": 5.748279757625552, "grad_norm": 0.08472274988889694, "learning_rate": 0.01, "loss": 1.9681, "step": 55971 }, { "epoch": 5.748587860737393, "grad_norm": 0.06257399171590805, "learning_rate": 0.01, "loss": 2.0121, "step": 55974 }, { "epoch": 5.748895963849235, "grad_norm": 0.050180837512016296, "learning_rate": 0.01, "loss": 1.9785, "step": 55977 }, { "epoch": 5.749204066961076, "grad_norm": 0.04516899958252907, "learning_rate": 0.01, "loss": 2.0091, "step": 55980 }, { "epoch": 5.749512170072918, "grad_norm": 0.06078992411494255, "learning_rate": 0.01, "loss": 2.0152, "step": 55983 }, { "epoch": 5.749820273184759, "grad_norm": 0.060303375124931335, "learning_rate": 0.01, "loss": 1.983, "step": 55986 }, { "epoch": 5.7501283762966, "grad_norm": 0.046474192291498184, "learning_rate": 0.01, "loss": 1.9981, "step": 55989 }, { "epoch": 5.750436479408442, "grad_norm": 0.042475223541259766, "learning_rate": 0.01, "loss": 2.0081, "step": 55992 }, { "epoch": 5.750744582520284, "grad_norm": 0.10338003933429718, "learning_rate": 0.01, "loss": 2.0098, "step": 55995 }, { "epoch": 5.751052685632125, "grad_norm": 0.07736215740442276, "learning_rate": 0.01, "loss": 1.9692, "step": 55998 }, { "epoch": 5.751360788743966, "grad_norm": 0.12122607231140137, "learning_rate": 0.01, "loss": 1.9965, "step": 56001 }, { "epoch": 5.751668891855807, "grad_norm": 0.12788768112659454, "learning_rate": 0.01, "loss": 2.0051, "step": 56004 }, { "epoch": 5.751976994967649, "grad_norm": 0.1270526796579361, "learning_rate": 0.01, "loss": 1.9902, "step": 56007 }, { "epoch": 5.752285098079491, "grad_norm": 0.07083453238010406, "learning_rate": 0.01, "loss": 2.0271, "step": 56010 }, { "epoch": 5.752593201191332, "grad_norm": 0.0539688877761364, "learning_rate": 0.01, "loss": 2.0156, "step": 56013 }, { "epoch": 5.752901304303173, "grad_norm": 0.05941673740744591, "learning_rate": 0.01, "loss": 2.0111, "step": 56016 }, { "epoch": 5.753209407415015, "grad_norm": 0.058209896087646484, "learning_rate": 0.01, "loss": 2.0078, "step": 56019 }, { "epoch": 5.753517510526857, "grad_norm": 0.046941760927438736, "learning_rate": 0.01, "loss": 1.9934, "step": 56022 }, { "epoch": 5.753825613638698, "grad_norm": 0.05372985079884529, "learning_rate": 0.01, "loss": 1.9865, "step": 56025 }, { "epoch": 5.754133716750539, "grad_norm": 0.04523862898349762, "learning_rate": 0.01, "loss": 2.0294, "step": 56028 }, { "epoch": 5.754441819862381, "grad_norm": 0.03816952556371689, "learning_rate": 0.01, "loss": 1.9829, "step": 56031 }, { "epoch": 5.754749922974222, "grad_norm": 0.06410220265388489, "learning_rate": 0.01, "loss": 2.0222, "step": 56034 }, { "epoch": 5.755058026086063, "grad_norm": 0.03941243886947632, "learning_rate": 0.01, "loss": 1.9731, "step": 56037 }, { "epoch": 5.755366129197905, "grad_norm": 0.054606303572654724, "learning_rate": 0.01, "loss": 1.9634, "step": 56040 }, { "epoch": 5.7556742323097465, "grad_norm": 0.12037497013807297, "learning_rate": 0.01, "loss": 2.013, "step": 56043 }, { "epoch": 5.755982335421588, "grad_norm": 0.09681439399719238, "learning_rate": 0.01, "loss": 2.0124, "step": 56046 }, { "epoch": 5.756290438533429, "grad_norm": 0.09768577665090561, "learning_rate": 0.01, "loss": 2.0103, "step": 56049 }, { "epoch": 5.75659854164527, "grad_norm": 0.08952365070581436, "learning_rate": 0.01, "loss": 2.0002, "step": 56052 }, { "epoch": 5.7569066447571124, "grad_norm": 0.09528356790542603, "learning_rate": 0.01, "loss": 1.9914, "step": 56055 }, { "epoch": 5.757214747868954, "grad_norm": 0.05678083375096321, "learning_rate": 0.01, "loss": 1.9775, "step": 56058 }, { "epoch": 5.757522850980795, "grad_norm": 0.040836770087480545, "learning_rate": 0.01, "loss": 1.9526, "step": 56061 }, { "epoch": 5.757830954092636, "grad_norm": 0.053255595266819, "learning_rate": 0.01, "loss": 1.977, "step": 56064 }, { "epoch": 5.7581390572044775, "grad_norm": 0.053145814687013626, "learning_rate": 0.01, "loss": 1.9792, "step": 56067 }, { "epoch": 5.758447160316319, "grad_norm": 0.04844813421368599, "learning_rate": 0.01, "loss": 1.9795, "step": 56070 }, { "epoch": 5.758755263428161, "grad_norm": 0.08748742938041687, "learning_rate": 0.01, "loss": 1.9728, "step": 56073 }, { "epoch": 5.759063366540002, "grad_norm": 0.03808058798313141, "learning_rate": 0.01, "loss": 1.9878, "step": 56076 }, { "epoch": 5.759371469651843, "grad_norm": 0.0475618913769722, "learning_rate": 0.01, "loss": 1.9702, "step": 56079 }, { "epoch": 5.759679572763685, "grad_norm": 0.04134085774421692, "learning_rate": 0.01, "loss": 1.9759, "step": 56082 }, { "epoch": 5.759987675875526, "grad_norm": 0.035581640899181366, "learning_rate": 0.01, "loss": 1.9846, "step": 56085 }, { "epoch": 5.760295778987368, "grad_norm": 0.1027594655752182, "learning_rate": 0.01, "loss": 1.9873, "step": 56088 }, { "epoch": 5.760603882099209, "grad_norm": 0.06380899995565414, "learning_rate": 0.01, "loss": 1.9913, "step": 56091 }, { "epoch": 5.760911985211051, "grad_norm": 0.04780165106058121, "learning_rate": 0.01, "loss": 1.9984, "step": 56094 }, { "epoch": 5.761220088322892, "grad_norm": 0.0501483753323555, "learning_rate": 0.01, "loss": 1.9949, "step": 56097 }, { "epoch": 5.761528191434733, "grad_norm": 0.06381183862686157, "learning_rate": 0.01, "loss": 1.9734, "step": 56100 }, { "epoch": 5.761836294546574, "grad_norm": 0.08505979180335999, "learning_rate": 0.01, "loss": 1.9734, "step": 56103 }, { "epoch": 5.762144397658417, "grad_norm": 0.09470894932746887, "learning_rate": 0.01, "loss": 1.9798, "step": 56106 }, { "epoch": 5.762452500770258, "grad_norm": 0.08021756261587143, "learning_rate": 0.01, "loss": 1.9843, "step": 56109 }, { "epoch": 5.762760603882099, "grad_norm": 0.05404616892337799, "learning_rate": 0.01, "loss": 1.9731, "step": 56112 }, { "epoch": 5.76306870699394, "grad_norm": 0.05812733992934227, "learning_rate": 0.01, "loss": 1.9987, "step": 56115 }, { "epoch": 5.7633768101057825, "grad_norm": 0.045452944934368134, "learning_rate": 0.01, "loss": 1.9802, "step": 56118 }, { "epoch": 5.763684913217624, "grad_norm": 0.06332234293222427, "learning_rate": 0.01, "loss": 1.9977, "step": 56121 }, { "epoch": 5.763993016329465, "grad_norm": 0.0571395680308342, "learning_rate": 0.01, "loss": 1.9647, "step": 56124 }, { "epoch": 5.764301119441306, "grad_norm": 0.057966724038124084, "learning_rate": 0.01, "loss": 1.983, "step": 56127 }, { "epoch": 5.764609222553148, "grad_norm": 0.03700026124715805, "learning_rate": 0.01, "loss": 1.9724, "step": 56130 }, { "epoch": 5.764917325664989, "grad_norm": 0.06366675347089767, "learning_rate": 0.01, "loss": 2.0134, "step": 56133 }, { "epoch": 5.765225428776831, "grad_norm": 0.12095164507627487, "learning_rate": 0.01, "loss": 1.9885, "step": 56136 }, { "epoch": 5.765533531888672, "grad_norm": 0.05511580780148506, "learning_rate": 0.01, "loss": 2.0004, "step": 56139 }, { "epoch": 5.7658416350005135, "grad_norm": 0.06662758439779282, "learning_rate": 0.01, "loss": 2.0039, "step": 56142 }, { "epoch": 5.766149738112355, "grad_norm": 0.05345858260989189, "learning_rate": 0.01, "loss": 2.0104, "step": 56145 }, { "epoch": 5.766457841224196, "grad_norm": 0.05073786526918411, "learning_rate": 0.01, "loss": 1.9908, "step": 56148 }, { "epoch": 5.766765944336038, "grad_norm": 0.041545137763023376, "learning_rate": 0.01, "loss": 1.9773, "step": 56151 }, { "epoch": 5.767074047447879, "grad_norm": 0.058929163962602615, "learning_rate": 0.01, "loss": 1.9695, "step": 56154 }, { "epoch": 5.767382150559721, "grad_norm": 0.0575728714466095, "learning_rate": 0.01, "loss": 1.993, "step": 56157 }, { "epoch": 5.767690253671562, "grad_norm": 0.05252945050597191, "learning_rate": 0.01, "loss": 1.9974, "step": 56160 }, { "epoch": 5.767998356783403, "grad_norm": 0.09407012909650803, "learning_rate": 0.01, "loss": 1.975, "step": 56163 }, { "epoch": 5.7683064598952445, "grad_norm": 0.07041514664888382, "learning_rate": 0.01, "loss": 1.9804, "step": 56166 }, { "epoch": 5.768614563007087, "grad_norm": 0.0976720005273819, "learning_rate": 0.01, "loss": 2.0004, "step": 56169 }, { "epoch": 5.768922666118928, "grad_norm": 0.06705368310213089, "learning_rate": 0.01, "loss": 2.0022, "step": 56172 }, { "epoch": 5.769230769230769, "grad_norm": 0.044996488839387894, "learning_rate": 0.01, "loss": 1.9837, "step": 56175 }, { "epoch": 5.76953887234261, "grad_norm": 0.05325739085674286, "learning_rate": 0.01, "loss": 2.0039, "step": 56178 }, { "epoch": 5.769846975454453, "grad_norm": 0.06506424397230148, "learning_rate": 0.01, "loss": 1.9852, "step": 56181 }, { "epoch": 5.770155078566294, "grad_norm": 0.05719895660877228, "learning_rate": 0.01, "loss": 1.9644, "step": 56184 }, { "epoch": 5.770463181678135, "grad_norm": 0.057186469435691833, "learning_rate": 0.01, "loss": 1.9881, "step": 56187 }, { "epoch": 5.770771284789976, "grad_norm": 0.06783724576234818, "learning_rate": 0.01, "loss": 2.0129, "step": 56190 }, { "epoch": 5.771079387901818, "grad_norm": 0.10789873450994492, "learning_rate": 0.01, "loss": 2.0168, "step": 56193 }, { "epoch": 5.771387491013659, "grad_norm": 0.07688331604003906, "learning_rate": 0.01, "loss": 1.9821, "step": 56196 }, { "epoch": 5.771695594125501, "grad_norm": 0.056692346930503845, "learning_rate": 0.01, "loss": 2.0001, "step": 56199 }, { "epoch": 5.772003697237342, "grad_norm": 0.04593949392437935, "learning_rate": 0.01, "loss": 2.0237, "step": 56202 }, { "epoch": 5.772311800349184, "grad_norm": 0.050233110785484314, "learning_rate": 0.01, "loss": 1.9874, "step": 56205 }, { "epoch": 5.772619903461025, "grad_norm": 0.049515530467033386, "learning_rate": 0.01, "loss": 2.0098, "step": 56208 }, { "epoch": 5.772928006572866, "grad_norm": 0.10692589730024338, "learning_rate": 0.01, "loss": 1.9865, "step": 56211 }, { "epoch": 5.773236109684708, "grad_norm": 0.054260801523923874, "learning_rate": 0.01, "loss": 2.0194, "step": 56214 }, { "epoch": 5.7735442127965495, "grad_norm": 0.042156677693128586, "learning_rate": 0.01, "loss": 1.983, "step": 56217 }, { "epoch": 5.773852315908391, "grad_norm": 0.06891094893217087, "learning_rate": 0.01, "loss": 1.981, "step": 56220 }, { "epoch": 5.774160419020232, "grad_norm": 0.09000008553266525, "learning_rate": 0.01, "loss": 1.9868, "step": 56223 }, { "epoch": 5.774468522132073, "grad_norm": 0.051672134548425674, "learning_rate": 0.01, "loss": 1.9953, "step": 56226 }, { "epoch": 5.7747766252439146, "grad_norm": 0.04013100638985634, "learning_rate": 0.01, "loss": 2.0094, "step": 56229 }, { "epoch": 5.775084728355757, "grad_norm": 0.051951587200164795, "learning_rate": 0.01, "loss": 1.9719, "step": 56232 }, { "epoch": 5.775392831467598, "grad_norm": 0.038350678980350494, "learning_rate": 0.01, "loss": 1.9587, "step": 56235 }, { "epoch": 5.775700934579439, "grad_norm": 0.10026026517152786, "learning_rate": 0.01, "loss": 1.9906, "step": 56238 }, { "epoch": 5.7760090376912805, "grad_norm": 0.07415781170129776, "learning_rate": 0.01, "loss": 1.9795, "step": 56241 }, { "epoch": 5.776317140803122, "grad_norm": 0.04173508659005165, "learning_rate": 0.01, "loss": 2.0075, "step": 56244 }, { "epoch": 5.776625243914964, "grad_norm": 0.1886957734823227, "learning_rate": 0.01, "loss": 1.9861, "step": 56247 }, { "epoch": 5.776933347026805, "grad_norm": 0.13357476890087128, "learning_rate": 0.01, "loss": 1.9562, "step": 56250 }, { "epoch": 5.777241450138646, "grad_norm": 0.08402860164642334, "learning_rate": 0.01, "loss": 1.9802, "step": 56253 }, { "epoch": 5.777549553250488, "grad_norm": 0.08017169684171677, "learning_rate": 0.01, "loss": 1.9688, "step": 56256 }, { "epoch": 5.777857656362329, "grad_norm": 0.08612015098333359, "learning_rate": 0.01, "loss": 2.0009, "step": 56259 }, { "epoch": 5.77816575947417, "grad_norm": 0.06540670245885849, "learning_rate": 0.01, "loss": 1.9948, "step": 56262 }, { "epoch": 5.778473862586012, "grad_norm": 0.03119773231446743, "learning_rate": 0.01, "loss": 1.9967, "step": 56265 }, { "epoch": 5.778781965697854, "grad_norm": 0.12924209237098694, "learning_rate": 0.01, "loss": 2.0022, "step": 56268 }, { "epoch": 5.779090068809695, "grad_norm": 0.07823926955461502, "learning_rate": 0.01, "loss": 1.9822, "step": 56271 }, { "epoch": 5.779398171921536, "grad_norm": 0.05518370866775513, "learning_rate": 0.01, "loss": 1.9894, "step": 56274 }, { "epoch": 5.779706275033378, "grad_norm": 0.041604407131671906, "learning_rate": 0.01, "loss": 1.9931, "step": 56277 }, { "epoch": 5.78001437814522, "grad_norm": 0.05646267905831337, "learning_rate": 0.01, "loss": 1.9798, "step": 56280 }, { "epoch": 5.780322481257061, "grad_norm": 0.051092468202114105, "learning_rate": 0.01, "loss": 2.0014, "step": 56283 }, { "epoch": 5.780630584368902, "grad_norm": 0.05034413933753967, "learning_rate": 0.01, "loss": 1.9559, "step": 56286 }, { "epoch": 5.780938687480743, "grad_norm": 0.03945804387331009, "learning_rate": 0.01, "loss": 1.9894, "step": 56289 }, { "epoch": 5.781246790592585, "grad_norm": 0.046318311244249344, "learning_rate": 0.01, "loss": 1.9839, "step": 56292 }, { "epoch": 5.781554893704427, "grad_norm": 0.0614444762468338, "learning_rate": 0.01, "loss": 2.0039, "step": 56295 }, { "epoch": 5.781862996816268, "grad_norm": 0.06555134057998657, "learning_rate": 0.01, "loss": 1.9804, "step": 56298 }, { "epoch": 5.782171099928109, "grad_norm": 0.08905301988124847, "learning_rate": 0.01, "loss": 1.9749, "step": 56301 }, { "epoch": 5.7824792030399506, "grad_norm": 0.06253067404031754, "learning_rate": 0.01, "loss": 1.9804, "step": 56304 }, { "epoch": 5.782787306151792, "grad_norm": 0.05287610739469528, "learning_rate": 0.01, "loss": 2.0049, "step": 56307 }, { "epoch": 5.783095409263634, "grad_norm": 0.11111927032470703, "learning_rate": 0.01, "loss": 2.0164, "step": 56310 }, { "epoch": 5.783403512375475, "grad_norm": 0.05433667451143265, "learning_rate": 0.01, "loss": 2.0039, "step": 56313 }, { "epoch": 5.7837116154873165, "grad_norm": 0.03661932423710823, "learning_rate": 0.01, "loss": 2.0006, "step": 56316 }, { "epoch": 5.784019718599158, "grad_norm": 0.06176665052771568, "learning_rate": 0.01, "loss": 1.9975, "step": 56319 }, { "epoch": 5.784327821710999, "grad_norm": 0.05721355602145195, "learning_rate": 0.01, "loss": 1.9852, "step": 56322 }, { "epoch": 5.78463592482284, "grad_norm": 0.043899018317461014, "learning_rate": 0.01, "loss": 1.9896, "step": 56325 }, { "epoch": 5.784944027934682, "grad_norm": 0.17083556950092316, "learning_rate": 0.01, "loss": 1.9842, "step": 56328 }, { "epoch": 5.785252131046524, "grad_norm": 0.12060192972421646, "learning_rate": 0.01, "loss": 1.9777, "step": 56331 }, { "epoch": 5.785560234158365, "grad_norm": 0.1456676870584488, "learning_rate": 0.01, "loss": 2.0144, "step": 56334 }, { "epoch": 5.785868337270206, "grad_norm": 0.08020723611116409, "learning_rate": 0.01, "loss": 1.9992, "step": 56337 }, { "epoch": 5.7861764403820475, "grad_norm": 0.058028947561979294, "learning_rate": 0.01, "loss": 1.9873, "step": 56340 }, { "epoch": 5.78648454349389, "grad_norm": 0.031094344332814217, "learning_rate": 0.01, "loss": 1.9892, "step": 56343 }, { "epoch": 5.786792646605731, "grad_norm": 0.03784453868865967, "learning_rate": 0.01, "loss": 2.0126, "step": 56346 }, { "epoch": 5.787100749717572, "grad_norm": 0.03983352333307266, "learning_rate": 0.01, "loss": 1.9971, "step": 56349 }, { "epoch": 5.787408852829413, "grad_norm": 0.048995792865753174, "learning_rate": 0.01, "loss": 2.0012, "step": 56352 }, { "epoch": 5.787716955941255, "grad_norm": 0.040493566542863846, "learning_rate": 0.01, "loss": 1.9774, "step": 56355 }, { "epoch": 5.788025059053096, "grad_norm": 0.04488976299762726, "learning_rate": 0.01, "loss": 1.9907, "step": 56358 }, { "epoch": 5.788333162164938, "grad_norm": 0.1611676812171936, "learning_rate": 0.01, "loss": 2.0129, "step": 56361 }, { "epoch": 5.788641265276779, "grad_norm": 0.079713374376297, "learning_rate": 0.01, "loss": 1.9736, "step": 56364 }, { "epoch": 5.788949368388621, "grad_norm": 0.06308376044034958, "learning_rate": 0.01, "loss": 1.9918, "step": 56367 }, { "epoch": 5.789257471500462, "grad_norm": 0.049073606729507446, "learning_rate": 0.01, "loss": 1.9981, "step": 56370 }, { "epoch": 5.789565574612304, "grad_norm": 0.06402777135372162, "learning_rate": 0.01, "loss": 1.9927, "step": 56373 }, { "epoch": 5.789873677724145, "grad_norm": 0.05663265287876129, "learning_rate": 0.01, "loss": 1.9938, "step": 56376 }, { "epoch": 5.790181780835987, "grad_norm": 0.05829964950680733, "learning_rate": 0.01, "loss": 2.0055, "step": 56379 }, { "epoch": 5.790489883947828, "grad_norm": 0.05013945326209068, "learning_rate": 0.01, "loss": 1.9722, "step": 56382 }, { "epoch": 5.790797987059669, "grad_norm": 0.04025455191731453, "learning_rate": 0.01, "loss": 1.9842, "step": 56385 }, { "epoch": 5.79110609017151, "grad_norm": 0.05371120572090149, "learning_rate": 0.01, "loss": 1.9788, "step": 56388 }, { "epoch": 5.7914141932833525, "grad_norm": 0.03760494664311409, "learning_rate": 0.01, "loss": 1.9658, "step": 56391 }, { "epoch": 5.791722296395194, "grad_norm": 0.05783433839678764, "learning_rate": 0.01, "loss": 2.0043, "step": 56394 }, { "epoch": 5.792030399507035, "grad_norm": 0.04827668145298958, "learning_rate": 0.01, "loss": 1.9705, "step": 56397 }, { "epoch": 5.792338502618876, "grad_norm": 0.055127400904893875, "learning_rate": 0.01, "loss": 1.9976, "step": 56400 }, { "epoch": 5.7926466057307175, "grad_norm": 0.07991458475589752, "learning_rate": 0.01, "loss": 1.9551, "step": 56403 }, { "epoch": 5.79295470884256, "grad_norm": 0.06287828832864761, "learning_rate": 0.01, "loss": 1.9848, "step": 56406 }, { "epoch": 5.793262811954401, "grad_norm": 0.07629155367612839, "learning_rate": 0.01, "loss": 2.0004, "step": 56409 }, { "epoch": 5.793570915066242, "grad_norm": 0.10665415227413177, "learning_rate": 0.01, "loss": 1.9869, "step": 56412 }, { "epoch": 5.7938790181780835, "grad_norm": 0.048113856464624405, "learning_rate": 0.01, "loss": 2.0162, "step": 56415 }, { "epoch": 5.794187121289925, "grad_norm": 0.12309219688177109, "learning_rate": 0.01, "loss": 1.9723, "step": 56418 }, { "epoch": 5.794495224401766, "grad_norm": 0.08479398488998413, "learning_rate": 0.01, "loss": 1.9979, "step": 56421 }, { "epoch": 5.794803327513608, "grad_norm": 0.06182289868593216, "learning_rate": 0.01, "loss": 2.0007, "step": 56424 }, { "epoch": 5.795111430625449, "grad_norm": 0.042767733335494995, "learning_rate": 0.01, "loss": 2.0061, "step": 56427 }, { "epoch": 5.795419533737291, "grad_norm": 0.07279690355062485, "learning_rate": 0.01, "loss": 2.0027, "step": 56430 }, { "epoch": 5.795727636849132, "grad_norm": 0.05660317465662956, "learning_rate": 0.01, "loss": 1.9924, "step": 56433 }, { "epoch": 5.796035739960974, "grad_norm": 0.048105914145708084, "learning_rate": 0.01, "loss": 1.9722, "step": 56436 }, { "epoch": 5.796343843072815, "grad_norm": 0.09529261291027069, "learning_rate": 0.01, "loss": 1.9771, "step": 56439 }, { "epoch": 5.796651946184657, "grad_norm": 0.09696771949529648, "learning_rate": 0.01, "loss": 2.0022, "step": 56442 }, { "epoch": 5.796960049296498, "grad_norm": 0.10225138068199158, "learning_rate": 0.01, "loss": 2.009, "step": 56445 }, { "epoch": 5.797268152408339, "grad_norm": 0.09067768603563309, "learning_rate": 0.01, "loss": 2.0002, "step": 56448 }, { "epoch": 5.79757625552018, "grad_norm": 0.03427765890955925, "learning_rate": 0.01, "loss": 2.0128, "step": 56451 }, { "epoch": 5.797884358632023, "grad_norm": 0.03189976140856743, "learning_rate": 0.01, "loss": 1.9747, "step": 56454 }, { "epoch": 5.798192461743864, "grad_norm": 0.04521431401371956, "learning_rate": 0.01, "loss": 1.9947, "step": 56457 }, { "epoch": 5.798500564855705, "grad_norm": 0.05165833979845047, "learning_rate": 0.01, "loss": 2.0026, "step": 56460 }, { "epoch": 5.798808667967546, "grad_norm": 0.051866378635168076, "learning_rate": 0.01, "loss": 2.0151, "step": 56463 }, { "epoch": 5.799116771079388, "grad_norm": 0.03956654295325279, "learning_rate": 0.01, "loss": 1.9971, "step": 56466 }, { "epoch": 5.79942487419123, "grad_norm": 0.04093864560127258, "learning_rate": 0.01, "loss": 1.9768, "step": 56469 }, { "epoch": 5.799732977303071, "grad_norm": 0.05415187031030655, "learning_rate": 0.01, "loss": 1.9907, "step": 56472 }, { "epoch": 5.800041080414912, "grad_norm": 0.057444531470537186, "learning_rate": 0.01, "loss": 2.0053, "step": 56475 }, { "epoch": 5.8003491835267535, "grad_norm": 0.08681999891996384, "learning_rate": 0.01, "loss": 1.9861, "step": 56478 }, { "epoch": 5.800657286638595, "grad_norm": 0.08110462129116058, "learning_rate": 0.01, "loss": 1.9969, "step": 56481 }, { "epoch": 5.800965389750436, "grad_norm": 0.04393799602985382, "learning_rate": 0.01, "loss": 1.9871, "step": 56484 }, { "epoch": 5.801273492862278, "grad_norm": 0.05482124909758568, "learning_rate": 0.01, "loss": 1.9971, "step": 56487 }, { "epoch": 5.8015815959741195, "grad_norm": 0.11982918530702591, "learning_rate": 0.01, "loss": 2.0002, "step": 56490 }, { "epoch": 5.801889699085961, "grad_norm": 0.10209926217794418, "learning_rate": 0.01, "loss": 1.9897, "step": 56493 }, { "epoch": 5.802197802197802, "grad_norm": 0.0397653728723526, "learning_rate": 0.01, "loss": 1.9938, "step": 56496 }, { "epoch": 5.802505905309643, "grad_norm": 0.09022632986307144, "learning_rate": 0.01, "loss": 1.9985, "step": 56499 }, { "epoch": 5.802814008421485, "grad_norm": 0.07979937642812729, "learning_rate": 0.01, "loss": 1.9855, "step": 56502 }, { "epoch": 5.803122111533327, "grad_norm": 0.05430705472826958, "learning_rate": 0.01, "loss": 1.9831, "step": 56505 }, { "epoch": 5.803430214645168, "grad_norm": 0.05586136132478714, "learning_rate": 0.01, "loss": 2.0002, "step": 56508 }, { "epoch": 5.803738317757009, "grad_norm": 0.05488377809524536, "learning_rate": 0.01, "loss": 1.9996, "step": 56511 }, { "epoch": 5.8040464208688505, "grad_norm": 0.04581581428647041, "learning_rate": 0.01, "loss": 1.9911, "step": 56514 }, { "epoch": 5.804354523980692, "grad_norm": 0.11193082481622696, "learning_rate": 0.01, "loss": 2.0275, "step": 56517 }, { "epoch": 5.804662627092534, "grad_norm": 0.0932486429810524, "learning_rate": 0.01, "loss": 2.0051, "step": 56520 }, { "epoch": 5.804970730204375, "grad_norm": 0.079062819480896, "learning_rate": 0.01, "loss": 2.0094, "step": 56523 }, { "epoch": 5.805278833316216, "grad_norm": 0.04556051269173622, "learning_rate": 0.01, "loss": 1.9906, "step": 56526 }, { "epoch": 5.805586936428058, "grad_norm": 0.04513680562376976, "learning_rate": 0.01, "loss": 1.9765, "step": 56529 }, { "epoch": 5.8058950395399, "grad_norm": 0.06065935268998146, "learning_rate": 0.01, "loss": 1.985, "step": 56532 }, { "epoch": 5.806203142651741, "grad_norm": 0.03341379761695862, "learning_rate": 0.01, "loss": 1.9817, "step": 56535 }, { "epoch": 5.806511245763582, "grad_norm": 0.13032658398151398, "learning_rate": 0.01, "loss": 1.9805, "step": 56538 }, { "epoch": 5.806819348875424, "grad_norm": 0.049995262175798416, "learning_rate": 0.01, "loss": 1.998, "step": 56541 }, { "epoch": 5.807127451987265, "grad_norm": 0.10843599587678909, "learning_rate": 0.01, "loss": 1.9833, "step": 56544 }, { "epoch": 5.807435555099106, "grad_norm": 0.12207616865634918, "learning_rate": 0.01, "loss": 2.0046, "step": 56547 }, { "epoch": 5.807743658210948, "grad_norm": 0.18218643963336945, "learning_rate": 0.01, "loss": 1.9768, "step": 56550 }, { "epoch": 5.8080517613227896, "grad_norm": 0.1526477336883545, "learning_rate": 0.01, "loss": 2.0081, "step": 56553 }, { "epoch": 5.808359864434631, "grad_norm": 0.08327028155326843, "learning_rate": 0.01, "loss": 1.971, "step": 56556 }, { "epoch": 5.808667967546472, "grad_norm": 0.05534267798066139, "learning_rate": 0.01, "loss": 1.9766, "step": 56559 }, { "epoch": 5.808976070658313, "grad_norm": 0.04754161089658737, "learning_rate": 0.01, "loss": 1.964, "step": 56562 }, { "epoch": 5.8092841737701555, "grad_norm": 0.039216890931129456, "learning_rate": 0.01, "loss": 2.0034, "step": 56565 }, { "epoch": 5.809592276881997, "grad_norm": 0.05179159343242645, "learning_rate": 0.01, "loss": 1.9925, "step": 56568 }, { "epoch": 5.809900379993838, "grad_norm": 0.03787977248430252, "learning_rate": 0.01, "loss": 1.9959, "step": 56571 }, { "epoch": 5.810208483105679, "grad_norm": 0.06589755415916443, "learning_rate": 0.01, "loss": 1.9944, "step": 56574 }, { "epoch": 5.8105165862175205, "grad_norm": 0.04956568777561188, "learning_rate": 0.01, "loss": 1.9836, "step": 56577 }, { "epoch": 5.810824689329362, "grad_norm": 0.039915554225444794, "learning_rate": 0.01, "loss": 2.0168, "step": 56580 }, { "epoch": 5.811132792441204, "grad_norm": 0.10185667872428894, "learning_rate": 0.01, "loss": 1.991, "step": 56583 }, { "epoch": 5.811440895553045, "grad_norm": 0.048244234174489975, "learning_rate": 0.01, "loss": 2.007, "step": 56586 }, { "epoch": 5.8117489986648865, "grad_norm": 0.07163000851869583, "learning_rate": 0.01, "loss": 2.0091, "step": 56589 }, { "epoch": 5.812057101776728, "grad_norm": 0.04289057105779648, "learning_rate": 0.01, "loss": 2.0326, "step": 56592 }, { "epoch": 5.812365204888569, "grad_norm": 0.06520239263772964, "learning_rate": 0.01, "loss": 2.0223, "step": 56595 }, { "epoch": 5.812673308000411, "grad_norm": 0.038391079753637314, "learning_rate": 0.01, "loss": 2.0037, "step": 56598 }, { "epoch": 5.812981411112252, "grad_norm": 0.039008256047964096, "learning_rate": 0.01, "loss": 1.9904, "step": 56601 }, { "epoch": 5.813289514224094, "grad_norm": 0.1321951448917389, "learning_rate": 0.01, "loss": 1.9769, "step": 56604 }, { "epoch": 5.813597617335935, "grad_norm": 0.058711450546979904, "learning_rate": 0.01, "loss": 1.9986, "step": 56607 }, { "epoch": 5.813905720447776, "grad_norm": 0.10861786454916, "learning_rate": 0.01, "loss": 1.9912, "step": 56610 }, { "epoch": 5.8142138235596175, "grad_norm": 0.09143928438425064, "learning_rate": 0.01, "loss": 1.9946, "step": 56613 }, { "epoch": 5.81452192667146, "grad_norm": 0.051334600895643234, "learning_rate": 0.01, "loss": 2.007, "step": 56616 }, { "epoch": 5.814830029783301, "grad_norm": 0.0675191655755043, "learning_rate": 0.01, "loss": 1.9805, "step": 56619 }, { "epoch": 5.815138132895142, "grad_norm": 0.038013167679309845, "learning_rate": 0.01, "loss": 1.9922, "step": 56622 }, { "epoch": 5.815446236006983, "grad_norm": 0.043695393949747086, "learning_rate": 0.01, "loss": 1.9982, "step": 56625 }, { "epoch": 5.8157543391188256, "grad_norm": 0.034250471740961075, "learning_rate": 0.01, "loss": 1.9587, "step": 56628 }, { "epoch": 5.816062442230667, "grad_norm": 0.11236503720283508, "learning_rate": 0.01, "loss": 1.9828, "step": 56631 }, { "epoch": 5.816370545342508, "grad_norm": 0.053564272820949554, "learning_rate": 0.01, "loss": 1.9897, "step": 56634 }, { "epoch": 5.816678648454349, "grad_norm": 0.08416426926851273, "learning_rate": 0.01, "loss": 1.9997, "step": 56637 }, { "epoch": 5.816986751566191, "grad_norm": 0.047465983778238297, "learning_rate": 0.01, "loss": 1.9813, "step": 56640 }, { "epoch": 5.817294854678032, "grad_norm": 0.12391756474971771, "learning_rate": 0.01, "loss": 1.997, "step": 56643 }, { "epoch": 5.817602957789874, "grad_norm": 0.12705251574516296, "learning_rate": 0.01, "loss": 2.0021, "step": 56646 }, { "epoch": 5.817911060901715, "grad_norm": 0.09919688105583191, "learning_rate": 0.01, "loss": 1.9895, "step": 56649 }, { "epoch": 5.8182191640135565, "grad_norm": 0.08405104279518127, "learning_rate": 0.01, "loss": 2.016, "step": 56652 }, { "epoch": 5.818527267125398, "grad_norm": 0.04591039568185806, "learning_rate": 0.01, "loss": 1.9716, "step": 56655 }, { "epoch": 5.818835370237239, "grad_norm": 0.04161922633647919, "learning_rate": 0.01, "loss": 1.9985, "step": 56658 }, { "epoch": 5.819143473349081, "grad_norm": 0.05382911115884781, "learning_rate": 0.01, "loss": 1.9799, "step": 56661 }, { "epoch": 5.8194515764609225, "grad_norm": 0.05096543952822685, "learning_rate": 0.01, "loss": 1.993, "step": 56664 }, { "epoch": 5.819759679572764, "grad_norm": 0.03849412500858307, "learning_rate": 0.01, "loss": 1.9875, "step": 56667 }, { "epoch": 5.820067782684605, "grad_norm": 0.04661410301923752, "learning_rate": 0.01, "loss": 1.9914, "step": 56670 }, { "epoch": 5.820375885796446, "grad_norm": 0.11310985684394836, "learning_rate": 0.01, "loss": 1.9887, "step": 56673 }, { "epoch": 5.8206839889082875, "grad_norm": 0.04283592104911804, "learning_rate": 0.01, "loss": 1.9839, "step": 56676 }, { "epoch": 5.82099209202013, "grad_norm": 0.09948618710041046, "learning_rate": 0.01, "loss": 2.0009, "step": 56679 }, { "epoch": 5.821300195131971, "grad_norm": 0.07223562151193619, "learning_rate": 0.01, "loss": 1.9975, "step": 56682 }, { "epoch": 5.821608298243812, "grad_norm": 0.067604660987854, "learning_rate": 0.01, "loss": 2.0109, "step": 56685 }, { "epoch": 5.8219164013556535, "grad_norm": 0.03564516827464104, "learning_rate": 0.01, "loss": 1.9924, "step": 56688 }, { "epoch": 5.822224504467496, "grad_norm": 0.046253006905317307, "learning_rate": 0.01, "loss": 1.9848, "step": 56691 }, { "epoch": 5.822532607579337, "grad_norm": 0.05620354413986206, "learning_rate": 0.01, "loss": 1.98, "step": 56694 }, { "epoch": 5.822840710691178, "grad_norm": 0.110667385160923, "learning_rate": 0.01, "loss": 1.9915, "step": 56697 }, { "epoch": 5.823148813803019, "grad_norm": 0.05062146484851837, "learning_rate": 0.01, "loss": 1.9848, "step": 56700 }, { "epoch": 5.823456916914861, "grad_norm": 0.10704591870307922, "learning_rate": 0.01, "loss": 1.9962, "step": 56703 }, { "epoch": 5.823765020026702, "grad_norm": 0.05372390151023865, "learning_rate": 0.01, "loss": 1.9775, "step": 56706 }, { "epoch": 5.824073123138544, "grad_norm": 0.05827971547842026, "learning_rate": 0.01, "loss": 1.9922, "step": 56709 }, { "epoch": 5.824381226250385, "grad_norm": 0.04202426224946976, "learning_rate": 0.01, "loss": 1.9506, "step": 56712 }, { "epoch": 5.824689329362227, "grad_norm": 0.08493483811616898, "learning_rate": 0.01, "loss": 2.0143, "step": 56715 }, { "epoch": 5.824997432474068, "grad_norm": 0.043885089457035065, "learning_rate": 0.01, "loss": 1.978, "step": 56718 }, { "epoch": 5.825305535585909, "grad_norm": 0.04681296646595001, "learning_rate": 0.01, "loss": 1.9664, "step": 56721 }, { "epoch": 5.825613638697751, "grad_norm": 0.04231657460331917, "learning_rate": 0.01, "loss": 1.982, "step": 56724 }, { "epoch": 5.8259217418095925, "grad_norm": 0.04888215288519859, "learning_rate": 0.01, "loss": 1.9802, "step": 56727 }, { "epoch": 5.826229844921434, "grad_norm": 0.0635887086391449, "learning_rate": 0.01, "loss": 2.0027, "step": 56730 }, { "epoch": 5.826537948033275, "grad_norm": 0.056875888258218765, "learning_rate": 0.01, "loss": 2.0209, "step": 56733 }, { "epoch": 5.826846051145116, "grad_norm": 0.05551552772521973, "learning_rate": 0.01, "loss": 1.9837, "step": 56736 }, { "epoch": 5.827154154256958, "grad_norm": 0.10465902090072632, "learning_rate": 0.01, "loss": 1.9638, "step": 56739 }, { "epoch": 5.8274622573688, "grad_norm": 0.08767648041248322, "learning_rate": 0.01, "loss": 1.9868, "step": 56742 }, { "epoch": 5.827770360480641, "grad_norm": 0.04951823502779007, "learning_rate": 0.01, "loss": 2.0045, "step": 56745 }, { "epoch": 5.828078463592482, "grad_norm": 0.04926539584994316, "learning_rate": 0.01, "loss": 1.984, "step": 56748 }, { "epoch": 5.8283865667043235, "grad_norm": 0.05217811465263367, "learning_rate": 0.01, "loss": 1.9942, "step": 56751 }, { "epoch": 5.828694669816165, "grad_norm": 0.0460977777838707, "learning_rate": 0.01, "loss": 2.0027, "step": 56754 }, { "epoch": 5.829002772928007, "grad_norm": 0.046701837331056595, "learning_rate": 0.01, "loss": 2.0079, "step": 56757 }, { "epoch": 5.829310876039848, "grad_norm": 0.07417173683643341, "learning_rate": 0.01, "loss": 1.9739, "step": 56760 }, { "epoch": 5.8296189791516895, "grad_norm": 0.09357844293117523, "learning_rate": 0.01, "loss": 1.9957, "step": 56763 }, { "epoch": 5.829927082263531, "grad_norm": 0.05171707645058632, "learning_rate": 0.01, "loss": 1.9856, "step": 56766 }, { "epoch": 5.830235185375372, "grad_norm": 0.08236930519342422, "learning_rate": 0.01, "loss": 1.9891, "step": 56769 }, { "epoch": 5.830543288487213, "grad_norm": 0.041667647659778595, "learning_rate": 0.01, "loss": 1.9927, "step": 56772 }, { "epoch": 5.830851391599055, "grad_norm": 0.055335305631160736, "learning_rate": 0.01, "loss": 1.9976, "step": 56775 }, { "epoch": 5.831159494710897, "grad_norm": 0.03667658939957619, "learning_rate": 0.01, "loss": 1.9859, "step": 56778 }, { "epoch": 5.831467597822738, "grad_norm": 0.05994969233870506, "learning_rate": 0.01, "loss": 2.0108, "step": 56781 }, { "epoch": 5.831775700934579, "grad_norm": 0.1395941525697708, "learning_rate": 0.01, "loss": 2.0184, "step": 56784 }, { "epoch": 5.832083804046421, "grad_norm": 0.0712098479270935, "learning_rate": 0.01, "loss": 1.9823, "step": 56787 }, { "epoch": 5.832391907158263, "grad_norm": 0.08360549062490463, "learning_rate": 0.01, "loss": 1.985, "step": 56790 }, { "epoch": 5.832700010270104, "grad_norm": 0.07638189941644669, "learning_rate": 0.01, "loss": 1.9848, "step": 56793 }, { "epoch": 5.833008113381945, "grad_norm": 0.07885719835758209, "learning_rate": 0.01, "loss": 1.9919, "step": 56796 }, { "epoch": 5.833316216493786, "grad_norm": 0.055790308862924576, "learning_rate": 0.01, "loss": 1.9866, "step": 56799 }, { "epoch": 5.833624319605628, "grad_norm": 0.03926543518900871, "learning_rate": 0.01, "loss": 1.9928, "step": 56802 }, { "epoch": 5.83393242271747, "grad_norm": 0.10085275024175644, "learning_rate": 0.01, "loss": 1.9989, "step": 56805 }, { "epoch": 5.834240525829311, "grad_norm": 0.07995092123746872, "learning_rate": 0.01, "loss": 1.9767, "step": 56808 }, { "epoch": 5.834548628941152, "grad_norm": 0.09439895302057266, "learning_rate": 0.01, "loss": 1.9854, "step": 56811 }, { "epoch": 5.834856732052994, "grad_norm": 0.04961158335208893, "learning_rate": 0.01, "loss": 1.9933, "step": 56814 }, { "epoch": 5.835164835164835, "grad_norm": 0.052938688546419144, "learning_rate": 0.01, "loss": 1.9686, "step": 56817 }, { "epoch": 5.835472938276677, "grad_norm": 0.04960361868143082, "learning_rate": 0.01, "loss": 1.9919, "step": 56820 }, { "epoch": 5.835781041388518, "grad_norm": 0.08586288243532181, "learning_rate": 0.01, "loss": 2.0012, "step": 56823 }, { "epoch": 5.8360891445003595, "grad_norm": 0.07194332033395767, "learning_rate": 0.01, "loss": 2.01, "step": 56826 }, { "epoch": 5.836397247612201, "grad_norm": 0.062126416712999344, "learning_rate": 0.01, "loss": 1.9826, "step": 56829 }, { "epoch": 5.836705350724042, "grad_norm": 0.06656119972467422, "learning_rate": 0.01, "loss": 2.0029, "step": 56832 }, { "epoch": 5.837013453835883, "grad_norm": 0.0884518176317215, "learning_rate": 0.01, "loss": 1.9983, "step": 56835 }, { "epoch": 5.8373215569477255, "grad_norm": 0.04115528613328934, "learning_rate": 0.01, "loss": 1.9905, "step": 56838 }, { "epoch": 5.837629660059567, "grad_norm": 0.05185645818710327, "learning_rate": 0.01, "loss": 2.0076, "step": 56841 }, { "epoch": 5.837937763171408, "grad_norm": 0.11587214469909668, "learning_rate": 0.01, "loss": 1.9973, "step": 56844 }, { "epoch": 5.838245866283249, "grad_norm": 0.13516643643379211, "learning_rate": 0.01, "loss": 2.0027, "step": 56847 }, { "epoch": 5.8385539693950905, "grad_norm": 0.0748513862490654, "learning_rate": 0.01, "loss": 1.9723, "step": 56850 }, { "epoch": 5.838862072506933, "grad_norm": 0.12254352867603302, "learning_rate": 0.01, "loss": 1.9822, "step": 56853 }, { "epoch": 5.839170175618774, "grad_norm": 0.07876627147197723, "learning_rate": 0.01, "loss": 2.0085, "step": 56856 }, { "epoch": 5.839478278730615, "grad_norm": 0.07364428788423538, "learning_rate": 0.01, "loss": 1.9794, "step": 56859 }, { "epoch": 5.8397863818424565, "grad_norm": 0.0350789949297905, "learning_rate": 0.01, "loss": 1.9687, "step": 56862 }, { "epoch": 5.840094484954298, "grad_norm": 0.046593520790338516, "learning_rate": 0.01, "loss": 2.0026, "step": 56865 }, { "epoch": 5.840402588066139, "grad_norm": 0.05111907422542572, "learning_rate": 0.01, "loss": 1.9659, "step": 56868 }, { "epoch": 5.840710691177981, "grad_norm": 0.046573907136917114, "learning_rate": 0.01, "loss": 2.0105, "step": 56871 }, { "epoch": 5.841018794289822, "grad_norm": 0.05139172077178955, "learning_rate": 0.01, "loss": 1.9674, "step": 56874 }, { "epoch": 5.841326897401664, "grad_norm": 0.06465848535299301, "learning_rate": 0.01, "loss": 1.994, "step": 56877 }, { "epoch": 5.841635000513505, "grad_norm": 0.046135857701301575, "learning_rate": 0.01, "loss": 1.9875, "step": 56880 }, { "epoch": 5.841943103625347, "grad_norm": 0.039065003395080566, "learning_rate": 0.01, "loss": 1.9676, "step": 56883 }, { "epoch": 5.842251206737188, "grad_norm": 0.05646162107586861, "learning_rate": 0.01, "loss": 1.9904, "step": 56886 }, { "epoch": 5.84255930984903, "grad_norm": 0.1448718160390854, "learning_rate": 0.01, "loss": 2.0192, "step": 56889 }, { "epoch": 5.842867412960871, "grad_norm": 0.12302438169717789, "learning_rate": 0.01, "loss": 2.0024, "step": 56892 }, { "epoch": 5.843175516072712, "grad_norm": 0.0701448991894722, "learning_rate": 0.01, "loss": 2.0049, "step": 56895 }, { "epoch": 5.843483619184553, "grad_norm": 0.08622895181179047, "learning_rate": 0.01, "loss": 1.9951, "step": 56898 }, { "epoch": 5.8437917222963955, "grad_norm": 0.04280500486493111, "learning_rate": 0.01, "loss": 1.954, "step": 56901 }, { "epoch": 5.844099825408237, "grad_norm": 0.04177187755703926, "learning_rate": 0.01, "loss": 1.9822, "step": 56904 }, { "epoch": 5.844407928520078, "grad_norm": 0.04321052134037018, "learning_rate": 0.01, "loss": 1.9818, "step": 56907 }, { "epoch": 5.844716031631919, "grad_norm": 0.10391934216022491, "learning_rate": 0.01, "loss": 1.9831, "step": 56910 }, { "epoch": 5.845024134743761, "grad_norm": 0.07634170353412628, "learning_rate": 0.01, "loss": 1.9638, "step": 56913 }, { "epoch": 5.845332237855603, "grad_norm": 0.05854891613125801, "learning_rate": 0.01, "loss": 1.9675, "step": 56916 }, { "epoch": 5.845640340967444, "grad_norm": 0.08058078587055206, "learning_rate": 0.01, "loss": 2.0034, "step": 56919 }, { "epoch": 5.845948444079285, "grad_norm": 0.10716883093118668, "learning_rate": 0.01, "loss": 1.9883, "step": 56922 }, { "epoch": 5.8462565471911265, "grad_norm": 0.1634785681962967, "learning_rate": 0.01, "loss": 1.9998, "step": 56925 }, { "epoch": 5.846564650302968, "grad_norm": 0.08859071135520935, "learning_rate": 0.01, "loss": 1.9666, "step": 56928 }, { "epoch": 5.846872753414809, "grad_norm": 0.05146167427301407, "learning_rate": 0.01, "loss": 1.997, "step": 56931 }, { "epoch": 5.847180856526651, "grad_norm": 0.057162970304489136, "learning_rate": 0.01, "loss": 2.0044, "step": 56934 }, { "epoch": 5.8474889596384925, "grad_norm": 0.042328424751758575, "learning_rate": 0.01, "loss": 2.005, "step": 56937 }, { "epoch": 5.847797062750334, "grad_norm": 0.05657609552145004, "learning_rate": 0.01, "loss": 2.0205, "step": 56940 }, { "epoch": 5.848105165862175, "grad_norm": 0.11977431923151016, "learning_rate": 0.01, "loss": 2.007, "step": 56943 }, { "epoch": 5.848413268974016, "grad_norm": 0.13686580955982208, "learning_rate": 0.01, "loss": 2.0215, "step": 56946 }, { "epoch": 5.848721372085858, "grad_norm": 0.06063876673579216, "learning_rate": 0.01, "loss": 2.0057, "step": 56949 }, { "epoch": 5.8490294751977, "grad_norm": 0.05211206525564194, "learning_rate": 0.01, "loss": 1.9885, "step": 56952 }, { "epoch": 5.849337578309541, "grad_norm": 0.03639880567789078, "learning_rate": 0.01, "loss": 1.9987, "step": 56955 }, { "epoch": 5.849645681421382, "grad_norm": 0.07671289145946503, "learning_rate": 0.01, "loss": 2.0046, "step": 56958 }, { "epoch": 5.8499537845332235, "grad_norm": 0.03963657468557358, "learning_rate": 0.01, "loss": 1.9815, "step": 56961 }, { "epoch": 5.850261887645065, "grad_norm": 0.040136873722076416, "learning_rate": 0.01, "loss": 1.9858, "step": 56964 }, { "epoch": 5.850569990756907, "grad_norm": 0.06622539460659027, "learning_rate": 0.01, "loss": 1.992, "step": 56967 }, { "epoch": 5.850878093868748, "grad_norm": 0.04892749339342117, "learning_rate": 0.01, "loss": 1.9727, "step": 56970 }, { "epoch": 5.851186196980589, "grad_norm": 0.10630463808774948, "learning_rate": 0.01, "loss": 1.9775, "step": 56973 }, { "epoch": 5.851494300092431, "grad_norm": 0.08189810067415237, "learning_rate": 0.01, "loss": 1.9867, "step": 56976 }, { "epoch": 5.851802403204273, "grad_norm": 0.04350016266107559, "learning_rate": 0.01, "loss": 1.984, "step": 56979 }, { "epoch": 5.852110506316114, "grad_norm": 0.04086649417877197, "learning_rate": 0.01, "loss": 1.9975, "step": 56982 }, { "epoch": 5.852418609427955, "grad_norm": 0.0460704006254673, "learning_rate": 0.01, "loss": 2.0029, "step": 56985 }, { "epoch": 5.852726712539797, "grad_norm": 0.04429091140627861, "learning_rate": 0.01, "loss": 1.9772, "step": 56988 }, { "epoch": 5.853034815651638, "grad_norm": 0.08944038301706314, "learning_rate": 0.01, "loss": 1.9889, "step": 56991 }, { "epoch": 5.853342918763479, "grad_norm": 0.039852675050497055, "learning_rate": 0.01, "loss": 1.9906, "step": 56994 }, { "epoch": 5.853651021875321, "grad_norm": 0.051225848495960236, "learning_rate": 0.01, "loss": 1.995, "step": 56997 }, { "epoch": 5.8539591249871625, "grad_norm": 0.07715211063623428, "learning_rate": 0.01, "loss": 1.9747, "step": 57000 }, { "epoch": 5.854267228099004, "grad_norm": 0.05754586309194565, "learning_rate": 0.01, "loss": 1.9835, "step": 57003 }, { "epoch": 5.854575331210845, "grad_norm": 0.0871339812874794, "learning_rate": 0.01, "loss": 1.9903, "step": 57006 }, { "epoch": 5.854883434322686, "grad_norm": 0.03794780746102333, "learning_rate": 0.01, "loss": 1.9883, "step": 57009 }, { "epoch": 5.8551915374345285, "grad_norm": 0.039838340133428574, "learning_rate": 0.01, "loss": 1.9881, "step": 57012 }, { "epoch": 5.85549964054637, "grad_norm": 0.03378939628601074, "learning_rate": 0.01, "loss": 1.9892, "step": 57015 }, { "epoch": 5.855807743658211, "grad_norm": 0.033859383314847946, "learning_rate": 0.01, "loss": 1.9775, "step": 57018 }, { "epoch": 5.856115846770052, "grad_norm": 0.08148138970136642, "learning_rate": 0.01, "loss": 2.0163, "step": 57021 }, { "epoch": 5.8564239498818935, "grad_norm": 0.11949529498815536, "learning_rate": 0.01, "loss": 1.9921, "step": 57024 }, { "epoch": 5.856732052993735, "grad_norm": 0.06349018216133118, "learning_rate": 0.01, "loss": 1.9789, "step": 57027 }, { "epoch": 5.857040156105577, "grad_norm": 0.03922737017273903, "learning_rate": 0.01, "loss": 1.9835, "step": 57030 }, { "epoch": 5.857348259217418, "grad_norm": 0.04704468697309494, "learning_rate": 0.01, "loss": 2.0088, "step": 57033 }, { "epoch": 5.8576563623292595, "grad_norm": 0.06949195265769958, "learning_rate": 0.01, "loss": 2.005, "step": 57036 }, { "epoch": 5.857964465441101, "grad_norm": 0.08585426956415176, "learning_rate": 0.01, "loss": 1.9679, "step": 57039 }, { "epoch": 5.858272568552943, "grad_norm": 0.09531115740537643, "learning_rate": 0.01, "loss": 1.9904, "step": 57042 }, { "epoch": 5.858580671664784, "grad_norm": 0.07545875012874603, "learning_rate": 0.01, "loss": 1.9858, "step": 57045 }, { "epoch": 5.858888774776625, "grad_norm": 0.11838539689779282, "learning_rate": 0.01, "loss": 1.9612, "step": 57048 }, { "epoch": 5.859196877888467, "grad_norm": 0.1177034080028534, "learning_rate": 0.01, "loss": 2.0015, "step": 57051 }, { "epoch": 5.859504981000308, "grad_norm": 0.08327648043632507, "learning_rate": 0.01, "loss": 1.9731, "step": 57054 }, { "epoch": 5.859813084112149, "grad_norm": 0.05028563365340233, "learning_rate": 0.01, "loss": 1.977, "step": 57057 }, { "epoch": 5.860121187223991, "grad_norm": 0.03819550573825836, "learning_rate": 0.01, "loss": 1.9791, "step": 57060 }, { "epoch": 5.860429290335833, "grad_norm": 0.06301325559616089, "learning_rate": 0.01, "loss": 1.9996, "step": 57063 }, { "epoch": 5.860737393447674, "grad_norm": 0.09482091665267944, "learning_rate": 0.01, "loss": 1.9907, "step": 57066 }, { "epoch": 5.861045496559515, "grad_norm": 0.07003244757652283, "learning_rate": 0.01, "loss": 1.9692, "step": 57069 }, { "epoch": 5.861353599671356, "grad_norm": 0.04322872310876846, "learning_rate": 0.01, "loss": 1.96, "step": 57072 }, { "epoch": 5.8616617027831985, "grad_norm": 0.04223179072141647, "learning_rate": 0.01, "loss": 1.9618, "step": 57075 }, { "epoch": 5.86196980589504, "grad_norm": 0.09732475876808167, "learning_rate": 0.01, "loss": 1.9774, "step": 57078 }, { "epoch": 5.862277909006881, "grad_norm": 0.08102111518383026, "learning_rate": 0.01, "loss": 2.0031, "step": 57081 }, { "epoch": 5.862586012118722, "grad_norm": 0.06564348191022873, "learning_rate": 0.01, "loss": 1.989, "step": 57084 }, { "epoch": 5.862894115230564, "grad_norm": 0.11364365369081497, "learning_rate": 0.01, "loss": 1.9736, "step": 57087 }, { "epoch": 5.863202218342405, "grad_norm": 0.05005546286702156, "learning_rate": 0.01, "loss": 1.9899, "step": 57090 }, { "epoch": 5.863510321454247, "grad_norm": 0.10924969613552094, "learning_rate": 0.01, "loss": 2.0024, "step": 57093 }, { "epoch": 5.863818424566088, "grad_norm": 0.06546615809202194, "learning_rate": 0.01, "loss": 2.0178, "step": 57096 }, { "epoch": 5.8641265276779295, "grad_norm": 0.04817098379135132, "learning_rate": 0.01, "loss": 2.0082, "step": 57099 }, { "epoch": 5.864434630789771, "grad_norm": 0.038462646305561066, "learning_rate": 0.01, "loss": 1.9894, "step": 57102 }, { "epoch": 5.864742733901612, "grad_norm": 0.03544708341360092, "learning_rate": 0.01, "loss": 1.9882, "step": 57105 }, { "epoch": 5.865050837013454, "grad_norm": 0.032861895859241486, "learning_rate": 0.01, "loss": 1.9565, "step": 57108 }, { "epoch": 5.8653589401252955, "grad_norm": 0.13162890076637268, "learning_rate": 0.01, "loss": 1.9916, "step": 57111 }, { "epoch": 5.865667043237137, "grad_norm": 0.13489802181720734, "learning_rate": 0.01, "loss": 1.9733, "step": 57114 }, { "epoch": 5.865975146348978, "grad_norm": 0.08409550786018372, "learning_rate": 0.01, "loss": 2.0175, "step": 57117 }, { "epoch": 5.866283249460819, "grad_norm": 0.04150281846523285, "learning_rate": 0.01, "loss": 2.0027, "step": 57120 }, { "epoch": 5.8665913525726605, "grad_norm": 0.04543888941407204, "learning_rate": 0.01, "loss": 1.9891, "step": 57123 }, { "epoch": 5.866899455684503, "grad_norm": 0.0386471152305603, "learning_rate": 0.01, "loss": 1.9853, "step": 57126 }, { "epoch": 5.867207558796344, "grad_norm": 0.029136119410395622, "learning_rate": 0.01, "loss": 1.9899, "step": 57129 }, { "epoch": 5.867515661908185, "grad_norm": 0.031866688281297684, "learning_rate": 0.01, "loss": 2.0055, "step": 57132 }, { "epoch": 5.8678237650200264, "grad_norm": 0.05178746208548546, "learning_rate": 0.01, "loss": 1.9831, "step": 57135 }, { "epoch": 5.868131868131869, "grad_norm": 0.10231690853834152, "learning_rate": 0.01, "loss": 1.9925, "step": 57138 }, { "epoch": 5.86843997124371, "grad_norm": 0.06103064864873886, "learning_rate": 0.01, "loss": 2.0041, "step": 57141 }, { "epoch": 5.868748074355551, "grad_norm": 0.05619410425424576, "learning_rate": 0.01, "loss": 1.9982, "step": 57144 }, { "epoch": 5.869056177467392, "grad_norm": 0.07958339899778366, "learning_rate": 0.01, "loss": 1.963, "step": 57147 }, { "epoch": 5.869364280579234, "grad_norm": 0.12638811767101288, "learning_rate": 0.01, "loss": 1.9885, "step": 57150 }, { "epoch": 5.869672383691075, "grad_norm": 0.05043957009911537, "learning_rate": 0.01, "loss": 1.9775, "step": 57153 }, { "epoch": 5.869980486802917, "grad_norm": 0.049426887184381485, "learning_rate": 0.01, "loss": 1.9766, "step": 57156 }, { "epoch": 5.870288589914758, "grad_norm": 0.058896906673908234, "learning_rate": 0.01, "loss": 1.9848, "step": 57159 }, { "epoch": 5.8705966930266, "grad_norm": 0.06517619639635086, "learning_rate": 0.01, "loss": 2.0153, "step": 57162 }, { "epoch": 5.870904796138441, "grad_norm": 0.04372745007276535, "learning_rate": 0.01, "loss": 1.9784, "step": 57165 }, { "epoch": 5.871212899250282, "grad_norm": 0.11192513257265091, "learning_rate": 0.01, "loss": 1.9899, "step": 57168 }, { "epoch": 5.871521002362124, "grad_norm": 0.04966152831912041, "learning_rate": 0.01, "loss": 2.0124, "step": 57171 }, { "epoch": 5.8718291054739655, "grad_norm": 0.04251588135957718, "learning_rate": 0.01, "loss": 2.0008, "step": 57174 }, { "epoch": 5.872137208585807, "grad_norm": 0.03627980872988701, "learning_rate": 0.01, "loss": 1.9823, "step": 57177 }, { "epoch": 5.872445311697648, "grad_norm": 0.035204388201236725, "learning_rate": 0.01, "loss": 2.0024, "step": 57180 }, { "epoch": 5.872753414809489, "grad_norm": 0.097038634121418, "learning_rate": 0.01, "loss": 2.0121, "step": 57183 }, { "epoch": 5.873061517921331, "grad_norm": 0.0762251615524292, "learning_rate": 0.01, "loss": 1.9699, "step": 57186 }, { "epoch": 5.873369621033173, "grad_norm": 0.12268881499767303, "learning_rate": 0.01, "loss": 1.9672, "step": 57189 }, { "epoch": 5.873677724145014, "grad_norm": 0.04830612614750862, "learning_rate": 0.01, "loss": 1.999, "step": 57192 }, { "epoch": 5.873985827256855, "grad_norm": 0.03913014009594917, "learning_rate": 0.01, "loss": 1.9709, "step": 57195 }, { "epoch": 5.8742939303686965, "grad_norm": 0.043585240840911865, "learning_rate": 0.01, "loss": 1.9949, "step": 57198 }, { "epoch": 5.874602033480538, "grad_norm": 0.07954023778438568, "learning_rate": 0.01, "loss": 2.0041, "step": 57201 }, { "epoch": 5.87491013659238, "grad_norm": 0.08643771708011627, "learning_rate": 0.01, "loss": 1.9933, "step": 57204 }, { "epoch": 5.875218239704221, "grad_norm": 0.0801578015089035, "learning_rate": 0.01, "loss": 2.0187, "step": 57207 }, { "epoch": 5.8755263428160625, "grad_norm": 0.07038013637065887, "learning_rate": 0.01, "loss": 1.9959, "step": 57210 }, { "epoch": 5.875834445927904, "grad_norm": 0.09337194263935089, "learning_rate": 0.01, "loss": 2.0194, "step": 57213 }, { "epoch": 5.876142549039745, "grad_norm": 0.07053009420633316, "learning_rate": 0.01, "loss": 1.9872, "step": 57216 }, { "epoch": 5.876450652151586, "grad_norm": 0.0576632060110569, "learning_rate": 0.01, "loss": 1.9923, "step": 57219 }, { "epoch": 5.876758755263428, "grad_norm": 0.0459139384329319, "learning_rate": 0.01, "loss": 1.9598, "step": 57222 }, { "epoch": 5.87706685837527, "grad_norm": 0.0351143553853035, "learning_rate": 0.01, "loss": 1.993, "step": 57225 }, { "epoch": 5.877374961487111, "grad_norm": 0.07681302726268768, "learning_rate": 0.01, "loss": 2.0058, "step": 57228 }, { "epoch": 5.877683064598952, "grad_norm": 0.08413039892911911, "learning_rate": 0.01, "loss": 2.0022, "step": 57231 }, { "epoch": 5.877991167710794, "grad_norm": 0.07289810478687286, "learning_rate": 0.01, "loss": 1.9994, "step": 57234 }, { "epoch": 5.878299270822636, "grad_norm": 0.10013864189386368, "learning_rate": 0.01, "loss": 1.9881, "step": 57237 }, { "epoch": 5.878607373934477, "grad_norm": 0.042369335889816284, "learning_rate": 0.01, "loss": 1.9895, "step": 57240 }, { "epoch": 5.878915477046318, "grad_norm": 0.06880860030651093, "learning_rate": 0.01, "loss": 1.9864, "step": 57243 }, { "epoch": 5.879223580158159, "grad_norm": 0.07166635245084763, "learning_rate": 0.01, "loss": 2.0128, "step": 57246 }, { "epoch": 5.879531683270001, "grad_norm": 0.07346632331609726, "learning_rate": 0.01, "loss": 2.003, "step": 57249 }, { "epoch": 5.879839786381843, "grad_norm": 0.039783868938684464, "learning_rate": 0.01, "loss": 1.9899, "step": 57252 }, { "epoch": 5.880147889493684, "grad_norm": 0.03261881321668625, "learning_rate": 0.01, "loss": 1.9996, "step": 57255 }, { "epoch": 5.880455992605525, "grad_norm": 0.05967556685209274, "learning_rate": 0.01, "loss": 1.9614, "step": 57258 }, { "epoch": 5.880764095717367, "grad_norm": 0.09463750571012497, "learning_rate": 0.01, "loss": 1.9969, "step": 57261 }, { "epoch": 5.881072198829208, "grad_norm": 0.11261694133281708, "learning_rate": 0.01, "loss": 1.9916, "step": 57264 }, { "epoch": 5.88138030194105, "grad_norm": 0.04502738267183304, "learning_rate": 0.01, "loss": 2.0151, "step": 57267 }, { "epoch": 5.881688405052891, "grad_norm": 0.048553161323070526, "learning_rate": 0.01, "loss": 1.9656, "step": 57270 }, { "epoch": 5.8819965081647325, "grad_norm": 0.04814717173576355, "learning_rate": 0.01, "loss": 1.9772, "step": 57273 }, { "epoch": 5.882304611276574, "grad_norm": 0.11577591300010681, "learning_rate": 0.01, "loss": 1.9696, "step": 57276 }, { "epoch": 5.882612714388415, "grad_norm": 0.07953160256147385, "learning_rate": 0.01, "loss": 1.9866, "step": 57279 }, { "epoch": 5.882920817500256, "grad_norm": 0.0455656498670578, "learning_rate": 0.01, "loss": 1.9971, "step": 57282 }, { "epoch": 5.8832289206120985, "grad_norm": 0.035418231040239334, "learning_rate": 0.01, "loss": 1.9733, "step": 57285 }, { "epoch": 5.88353702372394, "grad_norm": 0.03444494679570198, "learning_rate": 0.01, "loss": 1.9998, "step": 57288 }, { "epoch": 5.883845126835781, "grad_norm": 0.03543773293495178, "learning_rate": 0.01, "loss": 1.9938, "step": 57291 }, { "epoch": 5.884153229947622, "grad_norm": 0.032163105905056, "learning_rate": 0.01, "loss": 1.9931, "step": 57294 }, { "epoch": 5.884461333059464, "grad_norm": 0.051954787224531174, "learning_rate": 0.01, "loss": 2.0193, "step": 57297 }, { "epoch": 5.884769436171306, "grad_norm": 0.046241723001003265, "learning_rate": 0.01, "loss": 1.9825, "step": 57300 }, { "epoch": 5.885077539283147, "grad_norm": 0.08583594113588333, "learning_rate": 0.01, "loss": 1.9741, "step": 57303 }, { "epoch": 5.885385642394988, "grad_norm": 0.10830538719892502, "learning_rate": 0.01, "loss": 1.993, "step": 57306 }, { "epoch": 5.885693745506829, "grad_norm": 0.04670783504843712, "learning_rate": 0.01, "loss": 1.9688, "step": 57309 }, { "epoch": 5.886001848618671, "grad_norm": 0.08331948518753052, "learning_rate": 0.01, "loss": 1.9914, "step": 57312 }, { "epoch": 5.886309951730513, "grad_norm": 0.12991750240325928, "learning_rate": 0.01, "loss": 1.9861, "step": 57315 }, { "epoch": 5.886618054842354, "grad_norm": 0.03781532123684883, "learning_rate": 0.01, "loss": 1.9742, "step": 57318 }, { "epoch": 5.886926157954195, "grad_norm": 0.03549522906541824, "learning_rate": 0.01, "loss": 2.0197, "step": 57321 }, { "epoch": 5.887234261066037, "grad_norm": 0.03147748485207558, "learning_rate": 0.01, "loss": 1.9779, "step": 57324 }, { "epoch": 5.887542364177878, "grad_norm": 0.06375817954540253, "learning_rate": 0.01, "loss": 1.9794, "step": 57327 }, { "epoch": 5.88785046728972, "grad_norm": 0.06203543394804001, "learning_rate": 0.01, "loss": 1.9854, "step": 57330 }, { "epoch": 5.888158570401561, "grad_norm": 0.07817850261926651, "learning_rate": 0.01, "loss": 1.9844, "step": 57333 }, { "epoch": 5.888466673513403, "grad_norm": 0.06806226819753647, "learning_rate": 0.01, "loss": 1.9795, "step": 57336 }, { "epoch": 5.888774776625244, "grad_norm": 0.06197969987988472, "learning_rate": 0.01, "loss": 1.973, "step": 57339 }, { "epoch": 5.889082879737085, "grad_norm": 0.10083005577325821, "learning_rate": 0.01, "loss": 1.9745, "step": 57342 }, { "epoch": 5.889390982848926, "grad_norm": 0.04016329348087311, "learning_rate": 0.01, "loss": 1.9907, "step": 57345 }, { "epoch": 5.8896990859607685, "grad_norm": 0.047332972288131714, "learning_rate": 0.01, "loss": 1.9967, "step": 57348 }, { "epoch": 5.89000718907261, "grad_norm": 0.042285602539777756, "learning_rate": 0.01, "loss": 1.9842, "step": 57351 }, { "epoch": 5.890315292184451, "grad_norm": 0.0776951014995575, "learning_rate": 0.01, "loss": 1.9833, "step": 57354 }, { "epoch": 5.890623395296292, "grad_norm": 0.1287294626235962, "learning_rate": 0.01, "loss": 2.0342, "step": 57357 }, { "epoch": 5.890931498408134, "grad_norm": 0.08230342715978622, "learning_rate": 0.01, "loss": 1.9969, "step": 57360 }, { "epoch": 5.891239601519976, "grad_norm": 0.03847470134496689, "learning_rate": 0.01, "loss": 1.965, "step": 57363 }, { "epoch": 5.891547704631817, "grad_norm": 0.07836028188467026, "learning_rate": 0.01, "loss": 1.9971, "step": 57366 }, { "epoch": 5.891855807743658, "grad_norm": 0.03260621055960655, "learning_rate": 0.01, "loss": 2.0024, "step": 57369 }, { "epoch": 5.8921639108554995, "grad_norm": 0.036644842475652695, "learning_rate": 0.01, "loss": 1.9969, "step": 57372 }, { "epoch": 5.892472013967341, "grad_norm": 0.03415127098560333, "learning_rate": 0.01, "loss": 1.9984, "step": 57375 }, { "epoch": 5.892780117079182, "grad_norm": 0.05682433769106865, "learning_rate": 0.01, "loss": 1.9843, "step": 57378 }, { "epoch": 5.893088220191024, "grad_norm": 0.09808420389890671, "learning_rate": 0.01, "loss": 1.9828, "step": 57381 }, { "epoch": 5.8933963233028654, "grad_norm": 0.05321237072348595, "learning_rate": 0.01, "loss": 2.0029, "step": 57384 }, { "epoch": 5.893704426414707, "grad_norm": 0.03606530278921127, "learning_rate": 0.01, "loss": 1.9851, "step": 57387 }, { "epoch": 5.894012529526548, "grad_norm": 0.04051011800765991, "learning_rate": 0.01, "loss": 1.9815, "step": 57390 }, { "epoch": 5.89432063263839, "grad_norm": 0.039596255868673325, "learning_rate": 0.01, "loss": 1.9747, "step": 57393 }, { "epoch": 5.894628735750231, "grad_norm": 0.0865618884563446, "learning_rate": 0.01, "loss": 1.9794, "step": 57396 }, { "epoch": 5.894936838862073, "grad_norm": 0.05438371002674103, "learning_rate": 0.01, "loss": 1.9912, "step": 57399 }, { "epoch": 5.895244941973914, "grad_norm": 0.14930184185504913, "learning_rate": 0.01, "loss": 1.9767, "step": 57402 }, { "epoch": 5.895553045085755, "grad_norm": 0.05614696815609932, "learning_rate": 0.01, "loss": 1.9998, "step": 57405 }, { "epoch": 5.895861148197596, "grad_norm": 0.06839166581630707, "learning_rate": 0.01, "loss": 1.985, "step": 57408 }, { "epoch": 5.896169251309439, "grad_norm": 0.0660998523235321, "learning_rate": 0.01, "loss": 1.9721, "step": 57411 }, { "epoch": 5.89647735442128, "grad_norm": 0.07487353682518005, "learning_rate": 0.01, "loss": 1.996, "step": 57414 }, { "epoch": 5.896785457533121, "grad_norm": 0.05647150054574013, "learning_rate": 0.01, "loss": 2.0115, "step": 57417 }, { "epoch": 5.897093560644962, "grad_norm": 0.04570586606860161, "learning_rate": 0.01, "loss": 1.9749, "step": 57420 }, { "epoch": 5.897401663756804, "grad_norm": 0.10942738503217697, "learning_rate": 0.01, "loss": 1.9793, "step": 57423 }, { "epoch": 5.897709766868646, "grad_norm": 0.08452852070331573, "learning_rate": 0.01, "loss": 2.0137, "step": 57426 }, { "epoch": 5.898017869980487, "grad_norm": 0.07079064846038818, "learning_rate": 0.01, "loss": 2.0166, "step": 57429 }, { "epoch": 5.898325973092328, "grad_norm": 0.09797564148902893, "learning_rate": 0.01, "loss": 1.949, "step": 57432 }, { "epoch": 5.89863407620417, "grad_norm": 0.08445065468549728, "learning_rate": 0.01, "loss": 2.0102, "step": 57435 }, { "epoch": 5.898942179316011, "grad_norm": 0.08496986329555511, "learning_rate": 0.01, "loss": 1.9857, "step": 57438 }, { "epoch": 5.899250282427852, "grad_norm": 0.04799206554889679, "learning_rate": 0.01, "loss": 1.9913, "step": 57441 }, { "epoch": 5.899558385539694, "grad_norm": 0.047865018248558044, "learning_rate": 0.01, "loss": 2.0047, "step": 57444 }, { "epoch": 5.8998664886515355, "grad_norm": 0.040415599942207336, "learning_rate": 0.01, "loss": 2.0027, "step": 57447 }, { "epoch": 5.900174591763377, "grad_norm": 0.05303875729441643, "learning_rate": 0.01, "loss": 1.986, "step": 57450 }, { "epoch": 5.900482694875218, "grad_norm": 0.13320137560367584, "learning_rate": 0.01, "loss": 2.0125, "step": 57453 }, { "epoch": 5.900790797987059, "grad_norm": 0.05218903720378876, "learning_rate": 0.01, "loss": 2.0022, "step": 57456 }, { "epoch": 5.9010989010989015, "grad_norm": 0.06745684146881104, "learning_rate": 0.01, "loss": 1.9737, "step": 57459 }, { "epoch": 5.901407004210743, "grad_norm": 0.04268602281808853, "learning_rate": 0.01, "loss": 1.9863, "step": 57462 }, { "epoch": 5.901715107322584, "grad_norm": 0.034592967480421066, "learning_rate": 0.01, "loss": 1.9977, "step": 57465 }, { "epoch": 5.902023210434425, "grad_norm": 0.11330728232860565, "learning_rate": 0.01, "loss": 1.9778, "step": 57468 }, { "epoch": 5.9023313135462665, "grad_norm": 0.035573434084653854, "learning_rate": 0.01, "loss": 2.0189, "step": 57471 }, { "epoch": 5.902639416658108, "grad_norm": 0.09724743664264679, "learning_rate": 0.01, "loss": 2.0004, "step": 57474 }, { "epoch": 5.90294751976995, "grad_norm": 0.06769842654466629, "learning_rate": 0.01, "loss": 1.9953, "step": 57477 }, { "epoch": 5.903255622881791, "grad_norm": 0.07262661308050156, "learning_rate": 0.01, "loss": 1.972, "step": 57480 }, { "epoch": 5.903563725993632, "grad_norm": 0.08442901074886322, "learning_rate": 0.01, "loss": 1.9921, "step": 57483 }, { "epoch": 5.903871829105474, "grad_norm": 0.07244334369897842, "learning_rate": 0.01, "loss": 1.9969, "step": 57486 }, { "epoch": 5.904179932217316, "grad_norm": 0.0898541584610939, "learning_rate": 0.01, "loss": 1.9972, "step": 57489 }, { "epoch": 5.904488035329157, "grad_norm": 0.043548524379730225, "learning_rate": 0.01, "loss": 2.0112, "step": 57492 }, { "epoch": 5.904796138440998, "grad_norm": 0.035105109214782715, "learning_rate": 0.01, "loss": 1.968, "step": 57495 }, { "epoch": 5.90510424155284, "grad_norm": 0.06654632091522217, "learning_rate": 0.01, "loss": 1.9919, "step": 57498 }, { "epoch": 5.905412344664681, "grad_norm": 0.09614937752485275, "learning_rate": 0.01, "loss": 1.9801, "step": 57501 }, { "epoch": 5.905720447776522, "grad_norm": 0.07232387363910675, "learning_rate": 0.01, "loss": 2.0032, "step": 57504 }, { "epoch": 5.906028550888364, "grad_norm": 0.06795191764831543, "learning_rate": 0.01, "loss": 1.9686, "step": 57507 }, { "epoch": 5.906336654000206, "grad_norm": 0.1627775877714157, "learning_rate": 0.01, "loss": 2.0031, "step": 57510 }, { "epoch": 5.906644757112047, "grad_norm": 0.043800655752420425, "learning_rate": 0.01, "loss": 1.9972, "step": 57513 }, { "epoch": 5.906952860223888, "grad_norm": 0.09019804745912552, "learning_rate": 0.01, "loss": 2.0035, "step": 57516 }, { "epoch": 5.907260963335729, "grad_norm": 0.03738325461745262, "learning_rate": 0.01, "loss": 1.9949, "step": 57519 }, { "epoch": 5.9075690664475715, "grad_norm": 0.05333522707223892, "learning_rate": 0.01, "loss": 1.9712, "step": 57522 }, { "epoch": 5.907877169559413, "grad_norm": 0.03560793399810791, "learning_rate": 0.01, "loss": 1.9871, "step": 57525 }, { "epoch": 5.908185272671254, "grad_norm": 0.045784592628479004, "learning_rate": 0.01, "loss": 1.9699, "step": 57528 }, { "epoch": 5.908493375783095, "grad_norm": 0.05486133694648743, "learning_rate": 0.01, "loss": 1.9918, "step": 57531 }, { "epoch": 5.908801478894937, "grad_norm": 0.06346391886472702, "learning_rate": 0.01, "loss": 1.9864, "step": 57534 }, { "epoch": 5.909109582006778, "grad_norm": 0.06552404165267944, "learning_rate": 0.01, "loss": 2.0023, "step": 57537 }, { "epoch": 5.90941768511862, "grad_norm": 0.07176794856786728, "learning_rate": 0.01, "loss": 1.9855, "step": 57540 }, { "epoch": 5.909725788230461, "grad_norm": 0.05690211430191994, "learning_rate": 0.01, "loss": 1.9842, "step": 57543 }, { "epoch": 5.9100338913423025, "grad_norm": 0.07518581300973892, "learning_rate": 0.01, "loss": 2.0117, "step": 57546 }, { "epoch": 5.910341994454144, "grad_norm": 0.09749139845371246, "learning_rate": 0.01, "loss": 2.0104, "step": 57549 }, { "epoch": 5.910650097565985, "grad_norm": 0.06265246123075485, "learning_rate": 0.01, "loss": 1.9775, "step": 57552 }, { "epoch": 5.910958200677827, "grad_norm": 0.09425773471593857, "learning_rate": 0.01, "loss": 2.0063, "step": 57555 }, { "epoch": 5.911266303789668, "grad_norm": 0.07862033694982529, "learning_rate": 0.01, "loss": 1.9784, "step": 57558 }, { "epoch": 5.91157440690151, "grad_norm": 0.052102018147706985, "learning_rate": 0.01, "loss": 1.9938, "step": 57561 }, { "epoch": 5.911882510013351, "grad_norm": 0.04333946481347084, "learning_rate": 0.01, "loss": 2.0005, "step": 57564 }, { "epoch": 5.912190613125192, "grad_norm": 0.03312124311923981, "learning_rate": 0.01, "loss": 2.0017, "step": 57567 }, { "epoch": 5.912498716237034, "grad_norm": 0.11217939108610153, "learning_rate": 0.01, "loss": 1.9932, "step": 57570 }, { "epoch": 5.912806819348876, "grad_norm": 0.050783589482307434, "learning_rate": 0.01, "loss": 1.9766, "step": 57573 }, { "epoch": 5.913114922460717, "grad_norm": 0.05247243866324425, "learning_rate": 0.01, "loss": 2.0166, "step": 57576 }, { "epoch": 5.913423025572558, "grad_norm": 0.03641197457909584, "learning_rate": 0.01, "loss": 1.9992, "step": 57579 }, { "epoch": 5.913731128684399, "grad_norm": 0.03802553936839104, "learning_rate": 0.01, "loss": 2.0152, "step": 57582 }, { "epoch": 5.914039231796242, "grad_norm": 0.04474913701415062, "learning_rate": 0.01, "loss": 1.9781, "step": 57585 }, { "epoch": 5.914347334908083, "grad_norm": 0.050698988139629364, "learning_rate": 0.01, "loss": 2.0057, "step": 57588 }, { "epoch": 5.914655438019924, "grad_norm": 0.12217648327350616, "learning_rate": 0.01, "loss": 1.9973, "step": 57591 }, { "epoch": 5.914963541131765, "grad_norm": 0.10421942919492722, "learning_rate": 0.01, "loss": 1.9972, "step": 57594 }, { "epoch": 5.915271644243607, "grad_norm": 0.09191686660051346, "learning_rate": 0.01, "loss": 1.9944, "step": 57597 }, { "epoch": 5.915579747355448, "grad_norm": 0.07071512192487717, "learning_rate": 0.01, "loss": 1.9977, "step": 57600 }, { "epoch": 5.91588785046729, "grad_norm": 0.0706767663359642, "learning_rate": 0.01, "loss": 1.9909, "step": 57603 }, { "epoch": 5.916195953579131, "grad_norm": 0.04327024519443512, "learning_rate": 0.01, "loss": 1.9859, "step": 57606 }, { "epoch": 5.916504056690973, "grad_norm": 0.04743794724345207, "learning_rate": 0.01, "loss": 1.9948, "step": 57609 }, { "epoch": 5.916812159802814, "grad_norm": 0.04772097244858742, "learning_rate": 0.01, "loss": 1.9746, "step": 57612 }, { "epoch": 5.917120262914655, "grad_norm": 0.0430108904838562, "learning_rate": 0.01, "loss": 1.9859, "step": 57615 }, { "epoch": 5.917428366026497, "grad_norm": 0.07793119549751282, "learning_rate": 0.01, "loss": 1.9789, "step": 57618 }, { "epoch": 5.9177364691383385, "grad_norm": 0.10654459148645401, "learning_rate": 0.01, "loss": 1.9975, "step": 57621 }, { "epoch": 5.91804457225018, "grad_norm": 0.03443089500069618, "learning_rate": 0.01, "loss": 1.9731, "step": 57624 }, { "epoch": 5.918352675362021, "grad_norm": 0.07121492922306061, "learning_rate": 0.01, "loss": 1.9763, "step": 57627 }, { "epoch": 5.918660778473862, "grad_norm": 0.04436872899532318, "learning_rate": 0.01, "loss": 2.0261, "step": 57630 }, { "epoch": 5.918968881585704, "grad_norm": 0.08435078710317612, "learning_rate": 0.01, "loss": 1.9783, "step": 57633 }, { "epoch": 5.919276984697546, "grad_norm": 0.1054021418094635, "learning_rate": 0.01, "loss": 2.007, "step": 57636 }, { "epoch": 5.919585087809387, "grad_norm": 0.05516025796532631, "learning_rate": 0.01, "loss": 1.9433, "step": 57639 }, { "epoch": 5.919893190921228, "grad_norm": 0.067764513194561, "learning_rate": 0.01, "loss": 2.0029, "step": 57642 }, { "epoch": 5.9202012940330695, "grad_norm": 0.0520036555826664, "learning_rate": 0.01, "loss": 1.9952, "step": 57645 }, { "epoch": 5.920509397144912, "grad_norm": 0.071280837059021, "learning_rate": 0.01, "loss": 1.9881, "step": 57648 }, { "epoch": 5.920817500256753, "grad_norm": 0.04528915137052536, "learning_rate": 0.01, "loss": 1.9862, "step": 57651 }, { "epoch": 5.921125603368594, "grad_norm": 0.0340840145945549, "learning_rate": 0.01, "loss": 1.9829, "step": 57654 }, { "epoch": 5.921433706480435, "grad_norm": 0.033173661679029465, "learning_rate": 0.01, "loss": 1.9705, "step": 57657 }, { "epoch": 5.921741809592277, "grad_norm": 0.13926701247692108, "learning_rate": 0.01, "loss": 1.9926, "step": 57660 }, { "epoch": 5.922049912704118, "grad_norm": 0.07138355076313019, "learning_rate": 0.01, "loss": 1.966, "step": 57663 }, { "epoch": 5.92235801581596, "grad_norm": 0.0728163793683052, "learning_rate": 0.01, "loss": 1.9971, "step": 57666 }, { "epoch": 5.922666118927801, "grad_norm": 0.08973317593336105, "learning_rate": 0.01, "loss": 1.9996, "step": 57669 }, { "epoch": 5.922974222039643, "grad_norm": 0.0479208379983902, "learning_rate": 0.01, "loss": 1.9665, "step": 57672 }, { "epoch": 5.923282325151484, "grad_norm": 0.05372065678238869, "learning_rate": 0.01, "loss": 2.0279, "step": 57675 }, { "epoch": 5.923590428263325, "grad_norm": 0.048417530953884125, "learning_rate": 0.01, "loss": 1.9718, "step": 57678 }, { "epoch": 5.923898531375167, "grad_norm": 0.03113439865410328, "learning_rate": 0.01, "loss": 1.9936, "step": 57681 }, { "epoch": 5.924206634487009, "grad_norm": 0.0359586738049984, "learning_rate": 0.01, "loss": 1.9672, "step": 57684 }, { "epoch": 5.92451473759885, "grad_norm": 0.12019523233175278, "learning_rate": 0.01, "loss": 1.9879, "step": 57687 }, { "epoch": 5.924822840710691, "grad_norm": 0.0717984288930893, "learning_rate": 0.01, "loss": 1.9863, "step": 57690 }, { "epoch": 5.925130943822532, "grad_norm": 0.06465215235948563, "learning_rate": 0.01, "loss": 1.9864, "step": 57693 }, { "epoch": 5.925439046934374, "grad_norm": 0.1053556352853775, "learning_rate": 0.01, "loss": 1.9825, "step": 57696 }, { "epoch": 5.925747150046216, "grad_norm": 0.04565432667732239, "learning_rate": 0.01, "loss": 1.9987, "step": 57699 }, { "epoch": 5.926055253158057, "grad_norm": 0.05233050882816315, "learning_rate": 0.01, "loss": 1.9858, "step": 57702 }, { "epoch": 5.926363356269898, "grad_norm": 0.05413827672600746, "learning_rate": 0.01, "loss": 1.9791, "step": 57705 }, { "epoch": 5.92667145938174, "grad_norm": 0.0694475993514061, "learning_rate": 0.01, "loss": 2.0002, "step": 57708 }, { "epoch": 5.926979562493581, "grad_norm": 0.03973362222313881, "learning_rate": 0.01, "loss": 1.9963, "step": 57711 }, { "epoch": 5.927287665605423, "grad_norm": 0.048481449484825134, "learning_rate": 0.01, "loss": 1.964, "step": 57714 }, { "epoch": 5.927595768717264, "grad_norm": 0.04115751013159752, "learning_rate": 0.01, "loss": 1.9826, "step": 57717 }, { "epoch": 5.9279038718291055, "grad_norm": 0.056814514100551605, "learning_rate": 0.01, "loss": 2.0123, "step": 57720 }, { "epoch": 5.928211974940947, "grad_norm": 0.07510741055011749, "learning_rate": 0.01, "loss": 1.9933, "step": 57723 }, { "epoch": 5.928520078052788, "grad_norm": 0.10000273585319519, "learning_rate": 0.01, "loss": 1.979, "step": 57726 }, { "epoch": 5.928828181164629, "grad_norm": 0.10124478489160538, "learning_rate": 0.01, "loss": 1.9944, "step": 57729 }, { "epoch": 5.929136284276471, "grad_norm": 0.1368652582168579, "learning_rate": 0.01, "loss": 1.9818, "step": 57732 }, { "epoch": 5.929444387388313, "grad_norm": 0.06806531548500061, "learning_rate": 0.01, "loss": 1.9891, "step": 57735 }, { "epoch": 5.929752490500154, "grad_norm": 0.03443896025419235, "learning_rate": 0.01, "loss": 1.9785, "step": 57738 }, { "epoch": 5.930060593611995, "grad_norm": 0.03277003765106201, "learning_rate": 0.01, "loss": 1.9706, "step": 57741 }, { "epoch": 5.930368696723837, "grad_norm": 0.04082879796624184, "learning_rate": 0.01, "loss": 1.9843, "step": 57744 }, { "epoch": 5.930676799835679, "grad_norm": 0.11657889932394028, "learning_rate": 0.01, "loss": 2.0116, "step": 57747 }, { "epoch": 5.93098490294752, "grad_norm": 0.08914856612682343, "learning_rate": 0.01, "loss": 1.9995, "step": 57750 }, { "epoch": 5.931293006059361, "grad_norm": 0.06309764087200165, "learning_rate": 0.01, "loss": 1.9943, "step": 57753 }, { "epoch": 5.931601109171202, "grad_norm": 0.051464032381772995, "learning_rate": 0.01, "loss": 1.9949, "step": 57756 }, { "epoch": 5.931909212283044, "grad_norm": 0.037711381912231445, "learning_rate": 0.01, "loss": 2.0257, "step": 57759 }, { "epoch": 5.932217315394886, "grad_norm": 0.06184235215187073, "learning_rate": 0.01, "loss": 1.9991, "step": 57762 }, { "epoch": 5.932525418506727, "grad_norm": 0.07253403961658478, "learning_rate": 0.01, "loss": 1.9763, "step": 57765 }, { "epoch": 5.932833521618568, "grad_norm": 0.0968317911028862, "learning_rate": 0.01, "loss": 2.0105, "step": 57768 }, { "epoch": 5.93314162473041, "grad_norm": 0.0691918134689331, "learning_rate": 0.01, "loss": 1.9819, "step": 57771 }, { "epoch": 5.933449727842251, "grad_norm": 0.0434565395116806, "learning_rate": 0.01, "loss": 2.0185, "step": 57774 }, { "epoch": 5.933757830954093, "grad_norm": 0.04627044126391411, "learning_rate": 0.01, "loss": 2.0037, "step": 57777 }, { "epoch": 5.934065934065934, "grad_norm": 0.04456416517496109, "learning_rate": 0.01, "loss": 2.0219, "step": 57780 }, { "epoch": 5.934374037177776, "grad_norm": 0.038884878158569336, "learning_rate": 0.01, "loss": 1.9942, "step": 57783 }, { "epoch": 5.934682140289617, "grad_norm": 0.03564108535647392, "learning_rate": 0.01, "loss": 1.9891, "step": 57786 }, { "epoch": 5.934990243401458, "grad_norm": 0.06104537844657898, "learning_rate": 0.01, "loss": 1.9844, "step": 57789 }, { "epoch": 5.935298346513299, "grad_norm": 0.06845796853303909, "learning_rate": 0.01, "loss": 1.9933, "step": 57792 }, { "epoch": 5.9356064496251415, "grad_norm": 0.08483695238828659, "learning_rate": 0.01, "loss": 1.9665, "step": 57795 }, { "epoch": 5.935914552736983, "grad_norm": 0.05989287048578262, "learning_rate": 0.01, "loss": 2.0038, "step": 57798 }, { "epoch": 5.936222655848824, "grad_norm": 0.04331030324101448, "learning_rate": 0.01, "loss": 2.0049, "step": 57801 }, { "epoch": 5.936530758960665, "grad_norm": 0.05447021499276161, "learning_rate": 0.01, "loss": 1.9922, "step": 57804 }, { "epoch": 5.9368388620725066, "grad_norm": 0.10276515781879425, "learning_rate": 0.01, "loss": 1.9818, "step": 57807 }, { "epoch": 5.937146965184349, "grad_norm": 0.041528135538101196, "learning_rate": 0.01, "loss": 1.978, "step": 57810 }, { "epoch": 5.93745506829619, "grad_norm": 0.09760654717683792, "learning_rate": 0.01, "loss": 1.9844, "step": 57813 }, { "epoch": 5.937763171408031, "grad_norm": 0.07332238554954529, "learning_rate": 0.01, "loss": 2.0096, "step": 57816 }, { "epoch": 5.9380712745198725, "grad_norm": 0.07201950252056122, "learning_rate": 0.01, "loss": 1.9674, "step": 57819 }, { "epoch": 5.938379377631714, "grad_norm": 0.09542597830295563, "learning_rate": 0.01, "loss": 1.9897, "step": 57822 }, { "epoch": 5.938687480743555, "grad_norm": 0.08886115252971649, "learning_rate": 0.01, "loss": 2.016, "step": 57825 }, { "epoch": 5.938995583855397, "grad_norm": 0.0604422464966774, "learning_rate": 0.01, "loss": 2.0074, "step": 57828 }, { "epoch": 5.939303686967238, "grad_norm": 0.04549078643321991, "learning_rate": 0.01, "loss": 2.0073, "step": 57831 }, { "epoch": 5.93961179007908, "grad_norm": 0.09901918470859528, "learning_rate": 0.01, "loss": 2.0006, "step": 57834 }, { "epoch": 5.939919893190921, "grad_norm": 0.10208103060722351, "learning_rate": 0.01, "loss": 1.9797, "step": 57837 }, { "epoch": 5.940227996302763, "grad_norm": 0.08842794597148895, "learning_rate": 0.01, "loss": 1.977, "step": 57840 }, { "epoch": 5.940536099414604, "grad_norm": 0.06819964945316315, "learning_rate": 0.01, "loss": 1.9854, "step": 57843 }, { "epoch": 5.940844202526446, "grad_norm": 0.07434326410293579, "learning_rate": 0.01, "loss": 1.9875, "step": 57846 }, { "epoch": 5.941152305638287, "grad_norm": 0.06985678523778915, "learning_rate": 0.01, "loss": 1.9805, "step": 57849 }, { "epoch": 5.941460408750128, "grad_norm": 0.04155593365430832, "learning_rate": 0.01, "loss": 1.9645, "step": 57852 }, { "epoch": 5.941768511861969, "grad_norm": 0.07389968633651733, "learning_rate": 0.01, "loss": 2.0201, "step": 57855 }, { "epoch": 5.942076614973812, "grad_norm": 0.11060785502195358, "learning_rate": 0.01, "loss": 1.9677, "step": 57858 }, { "epoch": 5.942384718085653, "grad_norm": 0.035507991909980774, "learning_rate": 0.01, "loss": 1.9752, "step": 57861 }, { "epoch": 5.942692821197494, "grad_norm": 0.07544751465320587, "learning_rate": 0.01, "loss": 2.0023, "step": 57864 }, { "epoch": 5.943000924309335, "grad_norm": 0.03793545812368393, "learning_rate": 0.01, "loss": 1.998, "step": 57867 }, { "epoch": 5.943309027421177, "grad_norm": 0.07497958838939667, "learning_rate": 0.01, "loss": 1.9943, "step": 57870 }, { "epoch": 5.943617130533019, "grad_norm": 0.0712079107761383, "learning_rate": 0.01, "loss": 2.0135, "step": 57873 }, { "epoch": 5.94392523364486, "grad_norm": 0.03715150058269501, "learning_rate": 0.01, "loss": 1.9586, "step": 57876 }, { "epoch": 5.944233336756701, "grad_norm": 0.0650571882724762, "learning_rate": 0.01, "loss": 1.9873, "step": 57879 }, { "epoch": 5.9445414398685426, "grad_norm": 0.0977357029914856, "learning_rate": 0.01, "loss": 1.994, "step": 57882 }, { "epoch": 5.944849542980384, "grad_norm": 0.06483075022697449, "learning_rate": 0.01, "loss": 1.9779, "step": 57885 }, { "epoch": 5.945157646092225, "grad_norm": 0.09345372766256332, "learning_rate": 0.01, "loss": 1.9878, "step": 57888 }, { "epoch": 5.945465749204067, "grad_norm": 0.06418720632791519, "learning_rate": 0.01, "loss": 1.9899, "step": 57891 }, { "epoch": 5.9457738523159085, "grad_norm": 0.05608039349317551, "learning_rate": 0.01, "loss": 1.9835, "step": 57894 }, { "epoch": 5.94608195542775, "grad_norm": 0.04540601745247841, "learning_rate": 0.01, "loss": 1.9884, "step": 57897 }, { "epoch": 5.946390058539591, "grad_norm": 0.07881978154182434, "learning_rate": 0.01, "loss": 1.9901, "step": 57900 }, { "epoch": 5.946698161651433, "grad_norm": 0.05478779971599579, "learning_rate": 0.01, "loss": 1.9838, "step": 57903 }, { "epoch": 5.947006264763274, "grad_norm": 0.06574473530054092, "learning_rate": 0.01, "loss": 1.9842, "step": 57906 }, { "epoch": 5.947314367875116, "grad_norm": 0.06080929934978485, "learning_rate": 0.01, "loss": 1.9615, "step": 57909 }, { "epoch": 5.947622470986957, "grad_norm": 0.06462790071964264, "learning_rate": 0.01, "loss": 1.9889, "step": 57912 }, { "epoch": 5.947930574098798, "grad_norm": 0.06746607273817062, "learning_rate": 0.01, "loss": 1.9895, "step": 57915 }, { "epoch": 5.9482386772106395, "grad_norm": 0.048096004873514175, "learning_rate": 0.01, "loss": 1.9948, "step": 57918 }, { "epoch": 5.948546780322482, "grad_norm": 0.038138337433338165, "learning_rate": 0.01, "loss": 1.9728, "step": 57921 }, { "epoch": 5.948854883434323, "grad_norm": 0.05787679925560951, "learning_rate": 0.01, "loss": 1.9743, "step": 57924 }, { "epoch": 5.949162986546164, "grad_norm": 0.21429073810577393, "learning_rate": 0.01, "loss": 1.9997, "step": 57927 }, { "epoch": 5.949471089658005, "grad_norm": 0.103017657995224, "learning_rate": 0.01, "loss": 2.0201, "step": 57930 }, { "epoch": 5.949779192769847, "grad_norm": 0.08076374232769012, "learning_rate": 0.01, "loss": 2.0054, "step": 57933 }, { "epoch": 5.950087295881689, "grad_norm": 0.06803920865058899, "learning_rate": 0.01, "loss": 1.9788, "step": 57936 }, { "epoch": 5.95039539899353, "grad_norm": 0.0801796242594719, "learning_rate": 0.01, "loss": 1.994, "step": 57939 }, { "epoch": 5.950703502105371, "grad_norm": 0.0715690553188324, "learning_rate": 0.01, "loss": 1.9848, "step": 57942 }, { "epoch": 5.951011605217213, "grad_norm": 0.051856063306331635, "learning_rate": 0.01, "loss": 1.9924, "step": 57945 }, { "epoch": 5.951319708329054, "grad_norm": 0.03726186603307724, "learning_rate": 0.01, "loss": 2.006, "step": 57948 }, { "epoch": 5.951627811440895, "grad_norm": 0.04276345297694206, "learning_rate": 0.01, "loss": 1.994, "step": 57951 }, { "epoch": 5.951935914552737, "grad_norm": 0.06446687877178192, "learning_rate": 0.01, "loss": 1.9837, "step": 57954 }, { "epoch": 5.952244017664579, "grad_norm": 0.05513986572623253, "learning_rate": 0.01, "loss": 1.9882, "step": 57957 }, { "epoch": 5.95255212077642, "grad_norm": 0.06750188022851944, "learning_rate": 0.01, "loss": 2.0185, "step": 57960 }, { "epoch": 5.952860223888261, "grad_norm": 0.048399876803159714, "learning_rate": 0.01, "loss": 1.9937, "step": 57963 }, { "epoch": 5.953168327000102, "grad_norm": 0.04391476884484291, "learning_rate": 0.01, "loss": 1.9905, "step": 57966 }, { "epoch": 5.9534764301119445, "grad_norm": 0.05262988805770874, "learning_rate": 0.01, "loss": 1.98, "step": 57969 }, { "epoch": 5.953784533223786, "grad_norm": 0.1819307655096054, "learning_rate": 0.01, "loss": 1.9768, "step": 57972 }, { "epoch": 5.954092636335627, "grad_norm": 0.04456859081983566, "learning_rate": 0.01, "loss": 1.9679, "step": 57975 }, { "epoch": 5.954400739447468, "grad_norm": 0.11903820186853409, "learning_rate": 0.01, "loss": 1.9895, "step": 57978 }, { "epoch": 5.9547088425593095, "grad_norm": 0.08669394254684448, "learning_rate": 0.01, "loss": 2.0061, "step": 57981 }, { "epoch": 5.955016945671151, "grad_norm": 0.05538726970553398, "learning_rate": 0.01, "loss": 1.9652, "step": 57984 }, { "epoch": 5.955325048782993, "grad_norm": 0.06540273129940033, "learning_rate": 0.01, "loss": 1.954, "step": 57987 }, { "epoch": 5.955633151894834, "grad_norm": 0.05096886307001114, "learning_rate": 0.01, "loss": 2.0121, "step": 57990 }, { "epoch": 5.9559412550066755, "grad_norm": 0.0463346429169178, "learning_rate": 0.01, "loss": 1.985, "step": 57993 }, { "epoch": 5.956249358118517, "grad_norm": 0.12739206850528717, "learning_rate": 0.01, "loss": 1.9886, "step": 57996 }, { "epoch": 5.956557461230359, "grad_norm": 0.03725777193903923, "learning_rate": 0.01, "loss": 1.9851, "step": 57999 }, { "epoch": 5.9568655643422, "grad_norm": 0.041961897164583206, "learning_rate": 0.01, "loss": 1.9872, "step": 58002 }, { "epoch": 5.957173667454041, "grad_norm": 0.039089135825634, "learning_rate": 0.01, "loss": 1.9865, "step": 58005 }, { "epoch": 5.957481770565883, "grad_norm": 0.03455056622624397, "learning_rate": 0.01, "loss": 1.9644, "step": 58008 }, { "epoch": 5.957789873677724, "grad_norm": 0.10427875816822052, "learning_rate": 0.01, "loss": 1.9879, "step": 58011 }, { "epoch": 5.958097976789565, "grad_norm": 0.11531616002321243, "learning_rate": 0.01, "loss": 1.9947, "step": 58014 }, { "epoch": 5.958406079901407, "grad_norm": 0.05732328072190285, "learning_rate": 0.01, "loss": 1.9666, "step": 58017 }, { "epoch": 5.958714183013249, "grad_norm": 0.033681951463222504, "learning_rate": 0.01, "loss": 1.9839, "step": 58020 }, { "epoch": 5.95902228612509, "grad_norm": 0.03731759265065193, "learning_rate": 0.01, "loss": 2.0003, "step": 58023 }, { "epoch": 5.959330389236931, "grad_norm": 0.03403918072581291, "learning_rate": 0.01, "loss": 2.0195, "step": 58026 }, { "epoch": 5.959638492348772, "grad_norm": 0.10777036845684052, "learning_rate": 0.01, "loss": 1.9861, "step": 58029 }, { "epoch": 5.959946595460615, "grad_norm": 0.03863784670829773, "learning_rate": 0.01, "loss": 1.9806, "step": 58032 }, { "epoch": 5.960254698572456, "grad_norm": 0.11702859401702881, "learning_rate": 0.01, "loss": 2.0041, "step": 58035 }, { "epoch": 5.960562801684297, "grad_norm": 0.04561970755457878, "learning_rate": 0.01, "loss": 1.9808, "step": 58038 }, { "epoch": 5.960870904796138, "grad_norm": 0.07471676170825958, "learning_rate": 0.01, "loss": 1.9927, "step": 58041 }, { "epoch": 5.96117900790798, "grad_norm": 0.06594023108482361, "learning_rate": 0.01, "loss": 1.9952, "step": 58044 }, { "epoch": 5.961487111019821, "grad_norm": 0.10957533866167068, "learning_rate": 0.01, "loss": 1.9701, "step": 58047 }, { "epoch": 5.961795214131663, "grad_norm": 0.06384073197841644, "learning_rate": 0.01, "loss": 1.9951, "step": 58050 }, { "epoch": 5.962103317243504, "grad_norm": 0.04451090097427368, "learning_rate": 0.01, "loss": 1.9979, "step": 58053 }, { "epoch": 5.9624114203553455, "grad_norm": 0.036886006593704224, "learning_rate": 0.01, "loss": 1.982, "step": 58056 }, { "epoch": 5.962719523467187, "grad_norm": 0.06101905182003975, "learning_rate": 0.01, "loss": 1.9839, "step": 58059 }, { "epoch": 5.963027626579028, "grad_norm": 0.037433281540870667, "learning_rate": 0.01, "loss": 1.951, "step": 58062 }, { "epoch": 5.96333572969087, "grad_norm": 0.04497874528169632, "learning_rate": 0.01, "loss": 1.9854, "step": 58065 }, { "epoch": 5.9636438328027115, "grad_norm": 0.10465840995311737, "learning_rate": 0.01, "loss": 1.9674, "step": 58068 }, { "epoch": 5.963951935914553, "grad_norm": 0.08285420387983322, "learning_rate": 0.01, "loss": 1.9748, "step": 58071 }, { "epoch": 5.964260039026394, "grad_norm": 0.0607406347990036, "learning_rate": 0.01, "loss": 1.9738, "step": 58074 }, { "epoch": 5.964568142138235, "grad_norm": 0.07265600562095642, "learning_rate": 0.01, "loss": 1.978, "step": 58077 }, { "epoch": 5.9648762452500765, "grad_norm": 0.06835401058197021, "learning_rate": 0.01, "loss": 2.008, "step": 58080 }, { "epoch": 5.965184348361919, "grad_norm": 0.08795753121376038, "learning_rate": 0.01, "loss": 1.983, "step": 58083 }, { "epoch": 5.96549245147376, "grad_norm": 0.06189946457743645, "learning_rate": 0.01, "loss": 1.9896, "step": 58086 }, { "epoch": 5.965800554585601, "grad_norm": 0.05601467564702034, "learning_rate": 0.01, "loss": 1.9973, "step": 58089 }, { "epoch": 5.9661086576974425, "grad_norm": 0.07447000592947006, "learning_rate": 0.01, "loss": 2.0048, "step": 58092 }, { "epoch": 5.966416760809285, "grad_norm": 0.08373255282640457, "learning_rate": 0.01, "loss": 1.969, "step": 58095 }, { "epoch": 5.966724863921126, "grad_norm": 0.09241122007369995, "learning_rate": 0.01, "loss": 1.9638, "step": 58098 }, { "epoch": 5.967032967032967, "grad_norm": 0.0638846680521965, "learning_rate": 0.01, "loss": 1.9902, "step": 58101 }, { "epoch": 5.967341070144808, "grad_norm": 0.0532197505235672, "learning_rate": 0.01, "loss": 1.9652, "step": 58104 }, { "epoch": 5.96764917325665, "grad_norm": 0.10008466243743896, "learning_rate": 0.01, "loss": 1.9972, "step": 58107 }, { "epoch": 5.967957276368491, "grad_norm": 0.047970160841941833, "learning_rate": 0.01, "loss": 1.9937, "step": 58110 }, { "epoch": 5.968265379480333, "grad_norm": 0.046584948897361755, "learning_rate": 0.01, "loss": 2.0125, "step": 58113 }, { "epoch": 5.968573482592174, "grad_norm": 0.04055574908852577, "learning_rate": 0.01, "loss": 1.966, "step": 58116 }, { "epoch": 5.968881585704016, "grad_norm": 0.11462211608886719, "learning_rate": 0.01, "loss": 1.9865, "step": 58119 }, { "epoch": 5.969189688815857, "grad_norm": 0.07639250159263611, "learning_rate": 0.01, "loss": 1.9919, "step": 58122 }, { "epoch": 5.969497791927698, "grad_norm": 0.08450743556022644, "learning_rate": 0.01, "loss": 2.0176, "step": 58125 }, { "epoch": 5.96980589503954, "grad_norm": 0.06165710464119911, "learning_rate": 0.01, "loss": 1.9772, "step": 58128 }, { "epoch": 5.9701139981513816, "grad_norm": 0.042419277131557465, "learning_rate": 0.01, "loss": 1.9939, "step": 58131 }, { "epoch": 5.970422101263223, "grad_norm": 0.04378311708569527, "learning_rate": 0.01, "loss": 2.0167, "step": 58134 }, { "epoch": 5.970730204375064, "grad_norm": 0.05443147197365761, "learning_rate": 0.01, "loss": 2.0006, "step": 58137 }, { "epoch": 5.971038307486905, "grad_norm": 0.05718831345438957, "learning_rate": 0.01, "loss": 1.991, "step": 58140 }, { "epoch": 5.971346410598747, "grad_norm": 0.0620989128947258, "learning_rate": 0.01, "loss": 1.9865, "step": 58143 }, { "epoch": 5.971654513710589, "grad_norm": 0.11351976543664932, "learning_rate": 0.01, "loss": 1.9966, "step": 58146 }, { "epoch": 5.97196261682243, "grad_norm": 0.05350454896688461, "learning_rate": 0.01, "loss": 1.9868, "step": 58149 }, { "epoch": 5.972270719934271, "grad_norm": 0.08652366697788239, "learning_rate": 0.01, "loss": 2.0034, "step": 58152 }, { "epoch": 5.9725788230461125, "grad_norm": 0.054177410900592804, "learning_rate": 0.01, "loss": 1.9981, "step": 58155 }, { "epoch": 5.972886926157955, "grad_norm": 0.047728173434734344, "learning_rate": 0.01, "loss": 1.9865, "step": 58158 }, { "epoch": 5.973195029269796, "grad_norm": 0.048977095633745193, "learning_rate": 0.01, "loss": 1.9905, "step": 58161 }, { "epoch": 5.973503132381637, "grad_norm": 0.042776644229888916, "learning_rate": 0.01, "loss": 1.9993, "step": 58164 }, { "epoch": 5.9738112354934785, "grad_norm": 0.09739521145820618, "learning_rate": 0.01, "loss": 1.9979, "step": 58167 }, { "epoch": 5.97411933860532, "grad_norm": 0.0636570006608963, "learning_rate": 0.01, "loss": 2.0037, "step": 58170 }, { "epoch": 5.974427441717161, "grad_norm": 0.09212375432252884, "learning_rate": 0.01, "loss": 1.9995, "step": 58173 }, { "epoch": 5.974735544829003, "grad_norm": 0.06589896231889725, "learning_rate": 0.01, "loss": 2.0172, "step": 58176 }, { "epoch": 5.975043647940844, "grad_norm": 0.04317329451441765, "learning_rate": 0.01, "loss": 1.9987, "step": 58179 }, { "epoch": 5.975351751052686, "grad_norm": 0.04963945969939232, "learning_rate": 0.01, "loss": 1.9737, "step": 58182 }, { "epoch": 5.975659854164527, "grad_norm": 0.054626185446977615, "learning_rate": 0.01, "loss": 1.9961, "step": 58185 }, { "epoch": 5.975967957276368, "grad_norm": 0.0750926285982132, "learning_rate": 0.01, "loss": 1.9764, "step": 58188 }, { "epoch": 5.97627606038821, "grad_norm": 0.09490969032049179, "learning_rate": 0.01, "loss": 1.9988, "step": 58191 }, { "epoch": 5.976584163500052, "grad_norm": 0.19652721285820007, "learning_rate": 0.01, "loss": 2.0049, "step": 58194 }, { "epoch": 5.976892266611893, "grad_norm": 0.13692377507686615, "learning_rate": 0.01, "loss": 2.0059, "step": 58197 }, { "epoch": 5.977200369723734, "grad_norm": 0.10185639560222626, "learning_rate": 0.01, "loss": 1.9875, "step": 58200 }, { "epoch": 5.977508472835575, "grad_norm": 0.08201511949300766, "learning_rate": 0.01, "loss": 1.9697, "step": 58203 }, { "epoch": 5.977816575947417, "grad_norm": 0.06355354934930801, "learning_rate": 0.01, "loss": 1.9582, "step": 58206 }, { "epoch": 5.978124679059259, "grad_norm": 0.07959393411874771, "learning_rate": 0.01, "loss": 1.9891, "step": 58209 }, { "epoch": 5.9784327821711, "grad_norm": 0.08127464354038239, "learning_rate": 0.01, "loss": 1.9812, "step": 58212 }, { "epoch": 5.978740885282941, "grad_norm": 0.052127279341220856, "learning_rate": 0.01, "loss": 1.982, "step": 58215 }, { "epoch": 5.979048988394783, "grad_norm": 0.05060075968503952, "learning_rate": 0.01, "loss": 1.9739, "step": 58218 }, { "epoch": 5.979357091506624, "grad_norm": 0.0653509721159935, "learning_rate": 0.01, "loss": 1.9847, "step": 58221 }, { "epoch": 5.979665194618466, "grad_norm": 0.04588339477777481, "learning_rate": 0.01, "loss": 1.9795, "step": 58224 }, { "epoch": 5.979973297730307, "grad_norm": 0.027341017499566078, "learning_rate": 0.01, "loss": 1.9736, "step": 58227 }, { "epoch": 5.9802814008421485, "grad_norm": 0.039760638028383255, "learning_rate": 0.01, "loss": 1.9857, "step": 58230 }, { "epoch": 5.98058950395399, "grad_norm": 0.07517565786838531, "learning_rate": 0.01, "loss": 1.9947, "step": 58233 }, { "epoch": 5.980897607065831, "grad_norm": 0.07364608347415924, "learning_rate": 0.01, "loss": 1.9973, "step": 58236 }, { "epoch": 5.981205710177672, "grad_norm": 0.057517606765031815, "learning_rate": 0.01, "loss": 1.9934, "step": 58239 }, { "epoch": 5.9815138132895145, "grad_norm": 0.032561108469963074, "learning_rate": 0.01, "loss": 2.0029, "step": 58242 }, { "epoch": 5.981821916401356, "grad_norm": 0.08549618721008301, "learning_rate": 0.01, "loss": 1.9769, "step": 58245 }, { "epoch": 5.982130019513197, "grad_norm": 0.17904026806354523, "learning_rate": 0.01, "loss": 2.0021, "step": 58248 }, { "epoch": 5.982438122625038, "grad_norm": 0.10775711387395859, "learning_rate": 0.01, "loss": 1.9909, "step": 58251 }, { "epoch": 5.98274622573688, "grad_norm": 0.07369556277990341, "learning_rate": 0.01, "loss": 1.9942, "step": 58254 }, { "epoch": 5.983054328848722, "grad_norm": 0.059407759457826614, "learning_rate": 0.01, "loss": 1.9739, "step": 58257 }, { "epoch": 5.983362431960563, "grad_norm": 0.05741313472390175, "learning_rate": 0.01, "loss": 2.0013, "step": 58260 }, { "epoch": 5.983670535072404, "grad_norm": 0.07792560756206512, "learning_rate": 0.01, "loss": 2.0021, "step": 58263 }, { "epoch": 5.9839786381842455, "grad_norm": 0.058633413165807724, "learning_rate": 0.01, "loss": 1.9919, "step": 58266 }, { "epoch": 5.984286741296087, "grad_norm": 0.04207305610179901, "learning_rate": 0.01, "loss": 1.9993, "step": 58269 }, { "epoch": 5.984594844407929, "grad_norm": 0.039717648178339005, "learning_rate": 0.01, "loss": 2.0046, "step": 58272 }, { "epoch": 5.98490294751977, "grad_norm": 0.03505009412765503, "learning_rate": 0.01, "loss": 1.9855, "step": 58275 }, { "epoch": 5.985211050631611, "grad_norm": 0.03421800956130028, "learning_rate": 0.01, "loss": 2.0031, "step": 58278 }, { "epoch": 5.985519153743453, "grad_norm": 0.11582440882921219, "learning_rate": 0.01, "loss": 1.9791, "step": 58281 }, { "epoch": 5.985827256855294, "grad_norm": 0.058088093996047974, "learning_rate": 0.01, "loss": 2.0229, "step": 58284 }, { "epoch": 5.986135359967136, "grad_norm": 0.08388987928628922, "learning_rate": 0.01, "loss": 2.0025, "step": 58287 }, { "epoch": 5.986443463078977, "grad_norm": 0.07332730293273926, "learning_rate": 0.01, "loss": 1.9936, "step": 58290 }, { "epoch": 5.986751566190819, "grad_norm": 0.03843742609024048, "learning_rate": 0.01, "loss": 1.9989, "step": 58293 }, { "epoch": 5.98705966930266, "grad_norm": 0.07863465696573257, "learning_rate": 0.01, "loss": 1.9932, "step": 58296 }, { "epoch": 5.987367772414501, "grad_norm": 0.08023706078529358, "learning_rate": 0.01, "loss": 1.9803, "step": 58299 }, { "epoch": 5.987675875526342, "grad_norm": 0.037095215171575546, "learning_rate": 0.01, "loss": 1.9701, "step": 58302 }, { "epoch": 5.9879839786381845, "grad_norm": 0.04752547666430473, "learning_rate": 0.01, "loss": 1.9858, "step": 58305 }, { "epoch": 5.988292081750026, "grad_norm": 0.053975239396095276, "learning_rate": 0.01, "loss": 1.9971, "step": 58308 }, { "epoch": 5.988600184861867, "grad_norm": 0.08200733363628387, "learning_rate": 0.01, "loss": 1.991, "step": 58311 }, { "epoch": 5.988908287973708, "grad_norm": 0.056134093552827835, "learning_rate": 0.01, "loss": 2.0093, "step": 58314 }, { "epoch": 5.98921639108555, "grad_norm": 0.04401000216603279, "learning_rate": 0.01, "loss": 1.9762, "step": 58317 }, { "epoch": 5.989524494197392, "grad_norm": 0.06809469312429428, "learning_rate": 0.01, "loss": 2.0004, "step": 58320 }, { "epoch": 5.989832597309233, "grad_norm": 0.06399431079626083, "learning_rate": 0.01, "loss": 2.0206, "step": 58323 }, { "epoch": 5.990140700421074, "grad_norm": 0.1221606507897377, "learning_rate": 0.01, "loss": 1.9819, "step": 58326 }, { "epoch": 5.9904488035329155, "grad_norm": 0.054819442331790924, "learning_rate": 0.01, "loss": 1.9657, "step": 58329 }, { "epoch": 5.990756906644757, "grad_norm": 0.053588200360536575, "learning_rate": 0.01, "loss": 1.9871, "step": 58332 }, { "epoch": 5.991065009756598, "grad_norm": 0.060072384774684906, "learning_rate": 0.01, "loss": 1.9893, "step": 58335 }, { "epoch": 5.99137311286844, "grad_norm": 0.09491874277591705, "learning_rate": 0.01, "loss": 1.9721, "step": 58338 }, { "epoch": 5.9916812159802815, "grad_norm": 0.054036714136600494, "learning_rate": 0.01, "loss": 1.9843, "step": 58341 }, { "epoch": 5.991989319092123, "grad_norm": 0.04162592813372612, "learning_rate": 0.01, "loss": 1.9927, "step": 58344 }, { "epoch": 5.992297422203964, "grad_norm": 0.05602291598916054, "learning_rate": 0.01, "loss": 1.9965, "step": 58347 }, { "epoch": 5.992605525315806, "grad_norm": 0.038415491580963135, "learning_rate": 0.01, "loss": 1.9956, "step": 58350 }, { "epoch": 5.992913628427647, "grad_norm": 0.07123242318630219, "learning_rate": 0.01, "loss": 1.9662, "step": 58353 }, { "epoch": 5.993221731539489, "grad_norm": 0.06751185655593872, "learning_rate": 0.01, "loss": 1.9769, "step": 58356 }, { "epoch": 5.99352983465133, "grad_norm": 0.07169979065656662, "learning_rate": 0.01, "loss": 1.9675, "step": 58359 }, { "epoch": 5.993837937763171, "grad_norm": 0.13775870203971863, "learning_rate": 0.01, "loss": 2.0067, "step": 58362 }, { "epoch": 5.9941460408750125, "grad_norm": 0.08033827692270279, "learning_rate": 0.01, "loss": 2.0378, "step": 58365 }, { "epoch": 5.994454143986855, "grad_norm": 0.07793736457824707, "learning_rate": 0.01, "loss": 1.9651, "step": 58368 }, { "epoch": 5.994762247098696, "grad_norm": 0.06328029185533524, "learning_rate": 0.01, "loss": 1.9878, "step": 58371 }, { "epoch": 5.995070350210537, "grad_norm": 0.05584852769970894, "learning_rate": 0.01, "loss": 1.9806, "step": 58374 }, { "epoch": 5.995378453322378, "grad_norm": 0.04354814067482948, "learning_rate": 0.01, "loss": 1.9829, "step": 58377 }, { "epoch": 5.99568655643422, "grad_norm": 0.05028558522462845, "learning_rate": 0.01, "loss": 1.9582, "step": 58380 }, { "epoch": 5.995994659546062, "grad_norm": 0.053422022610902786, "learning_rate": 0.01, "loss": 1.997, "step": 58383 }, { "epoch": 5.996302762657903, "grad_norm": 0.03941678628325462, "learning_rate": 0.01, "loss": 1.9952, "step": 58386 }, { "epoch": 5.996610865769744, "grad_norm": 0.03764568641781807, "learning_rate": 0.01, "loss": 1.9905, "step": 58389 }, { "epoch": 5.996918968881586, "grad_norm": 0.044926904141902924, "learning_rate": 0.01, "loss": 1.9786, "step": 58392 }, { "epoch": 5.997227071993427, "grad_norm": 0.046521514654159546, "learning_rate": 0.01, "loss": 1.9825, "step": 58395 }, { "epoch": 5.997535175105268, "grad_norm": 0.12329057604074478, "learning_rate": 0.01, "loss": 1.9898, "step": 58398 }, { "epoch": 5.99784327821711, "grad_norm": 0.045972906053066254, "learning_rate": 0.01, "loss": 1.9735, "step": 58401 }, { "epoch": 5.9981513813289515, "grad_norm": 0.1293979287147522, "learning_rate": 0.01, "loss": 1.9704, "step": 58404 }, { "epoch": 5.998459484440793, "grad_norm": 0.06918042153120041, "learning_rate": 0.01, "loss": 2.0118, "step": 58407 }, { "epoch": 5.998767587552634, "grad_norm": 0.04279787465929985, "learning_rate": 0.01, "loss": 2.0002, "step": 58410 }, { "epoch": 5.999075690664475, "grad_norm": 0.09146596491336823, "learning_rate": 0.01, "loss": 1.9658, "step": 58413 }, { "epoch": 5.9993837937763175, "grad_norm": 0.06269899010658264, "learning_rate": 0.01, "loss": 1.9947, "step": 58416 }, { "epoch": 5.999691896888159, "grad_norm": 0.04407677426934242, "learning_rate": 0.01, "loss": 1.9891, "step": 58419 }, { "epoch": 6.0, "grad_norm": 0.04100476950407028, "learning_rate": 0.01, "loss": 2.015, "step": 58422 }, { "epoch": 5.999691928527419, "grad_norm": 0.05454897880554199, "learning_rate": 0.01, "loss": 1.9814, "step": 58425 }, { "epoch": 6.0, "grad_norm": 0.12488140910863876, "learning_rate": 0.01, "loss": 2.0163, "step": 58428 }, { "epoch": 6.000308071472582, "grad_norm": 0.08063201606273651, "learning_rate": 0.01, "loss": 1.9852, "step": 58431 }, { "epoch": 6.000616142945163, "grad_norm": 0.08693194389343262, "learning_rate": 0.01, "loss": 1.9877, "step": 58434 }, { "epoch": 6.000924214417745, "grad_norm": 0.048637568950653076, "learning_rate": 0.01, "loss": 2.0074, "step": 58437 }, { "epoch": 6.001232285890326, "grad_norm": 0.047587309032678604, "learning_rate": 0.01, "loss": 1.998, "step": 58440 }, { "epoch": 6.0015403573629085, "grad_norm": 0.04123762995004654, "learning_rate": 0.01, "loss": 1.9947, "step": 58443 }, { "epoch": 6.00184842883549, "grad_norm": 0.038054220378398895, "learning_rate": 0.01, "loss": 1.9784, "step": 58446 }, { "epoch": 6.002156500308072, "grad_norm": 0.07207085937261581, "learning_rate": 0.01, "loss": 1.9842, "step": 58449 }, { "epoch": 6.002464571780653, "grad_norm": 0.09471093118190765, "learning_rate": 0.01, "loss": 1.9824, "step": 58452 }, { "epoch": 6.002772643253235, "grad_norm": 0.050923608243465424, "learning_rate": 0.01, "loss": 1.9679, "step": 58455 }, { "epoch": 6.003080714725816, "grad_norm": 0.09074869751930237, "learning_rate": 0.01, "loss": 1.9862, "step": 58458 }, { "epoch": 6.003388786198398, "grad_norm": 0.12536507844924927, "learning_rate": 0.01, "loss": 1.9766, "step": 58461 }, { "epoch": 6.003696857670979, "grad_norm": 0.04264422506093979, "learning_rate": 0.01, "loss": 1.9975, "step": 58464 }, { "epoch": 6.004004929143561, "grad_norm": 0.03600556403398514, "learning_rate": 0.01, "loss": 1.9703, "step": 58467 }, { "epoch": 6.004313000616143, "grad_norm": 0.030723579227924347, "learning_rate": 0.01, "loss": 1.975, "step": 58470 }, { "epoch": 6.0046210720887245, "grad_norm": 0.042884331196546555, "learning_rate": 0.01, "loss": 1.994, "step": 58473 }, { "epoch": 6.004929143561307, "grad_norm": 0.07880793511867523, "learning_rate": 0.01, "loss": 1.9798, "step": 58476 }, { "epoch": 6.005237215033888, "grad_norm": 0.08945189416408539, "learning_rate": 0.01, "loss": 1.9724, "step": 58479 }, { "epoch": 6.00554528650647, "grad_norm": 0.07887895405292511, "learning_rate": 0.01, "loss": 1.9982, "step": 58482 }, { "epoch": 6.005853357979051, "grad_norm": 0.13316544890403748, "learning_rate": 0.01, "loss": 2.0046, "step": 58485 }, { "epoch": 6.006161429451633, "grad_norm": 0.06402456760406494, "learning_rate": 0.01, "loss": 1.9868, "step": 58488 }, { "epoch": 6.006469500924214, "grad_norm": 0.03985295444726944, "learning_rate": 0.01, "loss": 2.0051, "step": 58491 }, { "epoch": 6.006777572396796, "grad_norm": 0.03584379702806473, "learning_rate": 0.01, "loss": 1.9806, "step": 58494 }, { "epoch": 6.007085643869377, "grad_norm": 0.03731578588485718, "learning_rate": 0.01, "loss": 1.9965, "step": 58497 }, { "epoch": 6.007393715341959, "grad_norm": 0.04280545935034752, "learning_rate": 0.01, "loss": 2.0052, "step": 58500 }, { "epoch": 6.007701786814541, "grad_norm": 0.037182942032814026, "learning_rate": 0.01, "loss": 1.9659, "step": 58503 }, { "epoch": 6.008009858287123, "grad_norm": 0.04180056229233742, "learning_rate": 0.01, "loss": 1.9746, "step": 58506 }, { "epoch": 6.008317929759705, "grad_norm": 0.05659356340765953, "learning_rate": 0.01, "loss": 2.0131, "step": 58509 }, { "epoch": 6.008626001232286, "grad_norm": 0.04323246702551842, "learning_rate": 0.01, "loss": 1.9737, "step": 58512 }, { "epoch": 6.008934072704868, "grad_norm": 0.15815724432468414, "learning_rate": 0.01, "loss": 1.9927, "step": 58515 }, { "epoch": 6.009242144177449, "grad_norm": 0.041637711226940155, "learning_rate": 0.01, "loss": 1.9983, "step": 58518 }, { "epoch": 6.009550215650031, "grad_norm": 0.09015413373708725, "learning_rate": 0.01, "loss": 1.9835, "step": 58521 }, { "epoch": 6.009858287122612, "grad_norm": 0.10606445372104645, "learning_rate": 0.01, "loss": 1.9907, "step": 58524 }, { "epoch": 6.010166358595194, "grad_norm": 0.06839009374380112, "learning_rate": 0.01, "loss": 2.0005, "step": 58527 }, { "epoch": 6.0104744300677755, "grad_norm": 0.051956940442323685, "learning_rate": 0.01, "loss": 1.967, "step": 58530 }, { "epoch": 6.0107825015403575, "grad_norm": 0.034211624413728714, "learning_rate": 0.01, "loss": 1.9935, "step": 58533 }, { "epoch": 6.011090573012939, "grad_norm": 0.04351538419723511, "learning_rate": 0.01, "loss": 1.9849, "step": 58536 }, { "epoch": 6.011398644485521, "grad_norm": 0.07221609354019165, "learning_rate": 0.01, "loss": 1.9845, "step": 58539 }, { "epoch": 6.011706715958102, "grad_norm": 0.11168903857469559, "learning_rate": 0.01, "loss": 1.991, "step": 58542 }, { "epoch": 6.012014787430684, "grad_norm": 0.06332577764987946, "learning_rate": 0.01, "loss": 1.9906, "step": 58545 }, { "epoch": 6.012322858903265, "grad_norm": 0.0567488819360733, "learning_rate": 0.01, "loss": 1.9833, "step": 58548 }, { "epoch": 6.012630930375847, "grad_norm": 0.04636390134692192, "learning_rate": 0.01, "loss": 1.9829, "step": 58551 }, { "epoch": 6.012939001848429, "grad_norm": 0.040273915976285934, "learning_rate": 0.01, "loss": 1.9852, "step": 58554 }, { "epoch": 6.01324707332101, "grad_norm": 0.09343009442090988, "learning_rate": 0.01, "loss": 1.9805, "step": 58557 }, { "epoch": 6.013555144793592, "grad_norm": 0.07643181830644608, "learning_rate": 0.01, "loss": 1.9507, "step": 58560 }, { "epoch": 6.013863216266174, "grad_norm": 0.1327466368675232, "learning_rate": 0.01, "loss": 2.0052, "step": 58563 }, { "epoch": 6.014171287738756, "grad_norm": 0.1656189262866974, "learning_rate": 0.01, "loss": 1.9879, "step": 58566 }, { "epoch": 6.014479359211337, "grad_norm": 0.07180788367986679, "learning_rate": 0.01, "loss": 1.9798, "step": 58569 }, { "epoch": 6.014787430683919, "grad_norm": 0.04216361045837402, "learning_rate": 0.01, "loss": 1.9812, "step": 58572 }, { "epoch": 6.0150955021565, "grad_norm": 0.04374970495700836, "learning_rate": 0.01, "loss": 1.9683, "step": 58575 }, { "epoch": 6.015403573629082, "grad_norm": 0.038325972855091095, "learning_rate": 0.01, "loss": 1.9749, "step": 58578 }, { "epoch": 6.015711645101663, "grad_norm": 0.03523325175046921, "learning_rate": 0.01, "loss": 1.9839, "step": 58581 }, { "epoch": 6.016019716574245, "grad_norm": 0.03328186646103859, "learning_rate": 0.01, "loss": 1.9689, "step": 58584 }, { "epoch": 6.016327788046826, "grad_norm": 0.09052954614162445, "learning_rate": 0.01, "loss": 1.9815, "step": 58587 }, { "epoch": 6.0166358595194085, "grad_norm": 0.08605097979307175, "learning_rate": 0.01, "loss": 2.0121, "step": 58590 }, { "epoch": 6.0169439309919905, "grad_norm": 0.05200384184718132, "learning_rate": 0.01, "loss": 1.9738, "step": 58593 }, { "epoch": 6.017252002464572, "grad_norm": 0.07615986466407776, "learning_rate": 0.01, "loss": 1.9905, "step": 58596 }, { "epoch": 6.017560073937154, "grad_norm": 0.043676577508449554, "learning_rate": 0.01, "loss": 1.9933, "step": 58599 }, { "epoch": 6.017868145409735, "grad_norm": 0.058520250022411346, "learning_rate": 0.01, "loss": 1.9768, "step": 58602 }, { "epoch": 6.018176216882317, "grad_norm": 0.07133082300424576, "learning_rate": 0.01, "loss": 1.9723, "step": 58605 }, { "epoch": 6.018484288354898, "grad_norm": 0.08711257576942444, "learning_rate": 0.01, "loss": 1.9774, "step": 58608 }, { "epoch": 6.01879235982748, "grad_norm": 0.0483718067407608, "learning_rate": 0.01, "loss": 1.932, "step": 58611 }, { "epoch": 6.019100431300061, "grad_norm": 0.05014586076140404, "learning_rate": 0.01, "loss": 1.9768, "step": 58614 }, { "epoch": 6.019408502772643, "grad_norm": 0.10905557870864868, "learning_rate": 0.01, "loss": 1.9667, "step": 58617 }, { "epoch": 6.0197165742452245, "grad_norm": 0.03817577287554741, "learning_rate": 0.01, "loss": 1.9943, "step": 58620 }, { "epoch": 6.020024645717807, "grad_norm": 0.0783611536026001, "learning_rate": 0.01, "loss": 1.9599, "step": 58623 }, { "epoch": 6.020332717190388, "grad_norm": 0.0832376778125763, "learning_rate": 0.01, "loss": 1.9884, "step": 58626 }, { "epoch": 6.02064078866297, "grad_norm": 0.05190072953701019, "learning_rate": 0.01, "loss": 1.9639, "step": 58629 }, { "epoch": 6.020948860135552, "grad_norm": 0.09829907864332199, "learning_rate": 0.01, "loss": 2.0106, "step": 58632 }, { "epoch": 6.021256931608133, "grad_norm": 0.08857165277004242, "learning_rate": 0.01, "loss": 2.0049, "step": 58635 }, { "epoch": 6.021565003080715, "grad_norm": 0.07858851552009583, "learning_rate": 0.01, "loss": 1.9861, "step": 58638 }, { "epoch": 6.021873074553296, "grad_norm": 0.04164360463619232, "learning_rate": 0.01, "loss": 2.0072, "step": 58641 }, { "epoch": 6.022181146025878, "grad_norm": 0.037627752870321274, "learning_rate": 0.01, "loss": 1.9653, "step": 58644 }, { "epoch": 6.0224892174984594, "grad_norm": 0.03442845493555069, "learning_rate": 0.01, "loss": 1.9934, "step": 58647 }, { "epoch": 6.0227972889710415, "grad_norm": 0.04693920910358429, "learning_rate": 0.01, "loss": 2.0012, "step": 58650 }, { "epoch": 6.023105360443623, "grad_norm": 0.07563184201717377, "learning_rate": 0.01, "loss": 1.9913, "step": 58653 }, { "epoch": 6.023413431916205, "grad_norm": 0.08100397139787674, "learning_rate": 0.01, "loss": 1.9822, "step": 58656 }, { "epoch": 6.023721503388786, "grad_norm": 0.12132440507411957, "learning_rate": 0.01, "loss": 1.969, "step": 58659 }, { "epoch": 6.024029574861368, "grad_norm": 0.04983309283852577, "learning_rate": 0.01, "loss": 1.9661, "step": 58662 }, { "epoch": 6.024337646333949, "grad_norm": 0.06472596526145935, "learning_rate": 0.01, "loss": 2.0084, "step": 58665 }, { "epoch": 6.024645717806531, "grad_norm": 0.034389570355415344, "learning_rate": 0.01, "loss": 1.9762, "step": 58668 }, { "epoch": 6.024953789279113, "grad_norm": 0.10626552999019623, "learning_rate": 0.01, "loss": 2.0083, "step": 58671 }, { "epoch": 6.025261860751694, "grad_norm": 0.0675535798072815, "learning_rate": 0.01, "loss": 1.9948, "step": 58674 }, { "epoch": 6.025569932224276, "grad_norm": 0.04010651633143425, "learning_rate": 0.01, "loss": 1.969, "step": 58677 }, { "epoch": 6.0258780036968576, "grad_norm": 0.034451622515916824, "learning_rate": 0.01, "loss": 1.9716, "step": 58680 }, { "epoch": 6.02618607516944, "grad_norm": 0.06580517441034317, "learning_rate": 0.01, "loss": 1.9973, "step": 58683 }, { "epoch": 6.026494146642021, "grad_norm": 0.05853043496608734, "learning_rate": 0.01, "loss": 1.9855, "step": 58686 }, { "epoch": 6.026802218114603, "grad_norm": 0.04377220198512077, "learning_rate": 0.01, "loss": 1.9655, "step": 58689 }, { "epoch": 6.027110289587184, "grad_norm": 0.03288479149341583, "learning_rate": 0.01, "loss": 1.978, "step": 58692 }, { "epoch": 6.027418361059766, "grad_norm": 0.04060712829232216, "learning_rate": 0.01, "loss": 1.9982, "step": 58695 }, { "epoch": 6.027726432532347, "grad_norm": 0.11332166939973831, "learning_rate": 0.01, "loss": 1.9685, "step": 58698 }, { "epoch": 6.028034504004929, "grad_norm": 0.13282176852226257, "learning_rate": 0.01, "loss": 1.9528, "step": 58701 }, { "epoch": 6.02834257547751, "grad_norm": 0.08038832992315292, "learning_rate": 0.01, "loss": 2.0108, "step": 58704 }, { "epoch": 6.0286506469500925, "grad_norm": 0.05667036771774292, "learning_rate": 0.01, "loss": 1.9945, "step": 58707 }, { "epoch": 6.0289587184226745, "grad_norm": 0.06444176286458969, "learning_rate": 0.01, "loss": 1.972, "step": 58710 }, { "epoch": 6.029266789895256, "grad_norm": 0.08207248896360397, "learning_rate": 0.01, "loss": 1.9731, "step": 58713 }, { "epoch": 6.029574861367838, "grad_norm": 0.057919155806303024, "learning_rate": 0.01, "loss": 1.9799, "step": 58716 }, { "epoch": 6.029882932840419, "grad_norm": 0.0630873367190361, "learning_rate": 0.01, "loss": 2.004, "step": 58719 }, { "epoch": 6.030191004313001, "grad_norm": 0.14096465706825256, "learning_rate": 0.01, "loss": 2.0029, "step": 58722 }, { "epoch": 6.030499075785582, "grad_norm": 0.06392169743776321, "learning_rate": 0.01, "loss": 1.9605, "step": 58725 }, { "epoch": 6.030807147258164, "grad_norm": 0.051594968885183334, "learning_rate": 0.01, "loss": 1.9709, "step": 58728 }, { "epoch": 6.031115218730745, "grad_norm": 0.05645729973912239, "learning_rate": 0.01, "loss": 1.977, "step": 58731 }, { "epoch": 6.031423290203327, "grad_norm": 0.056138813495635986, "learning_rate": 0.01, "loss": 2.016, "step": 58734 }, { "epoch": 6.0317313616759085, "grad_norm": 0.08171719312667847, "learning_rate": 0.01, "loss": 1.9866, "step": 58737 }, { "epoch": 6.032039433148491, "grad_norm": 0.0954081118106842, "learning_rate": 0.01, "loss": 1.9761, "step": 58740 }, { "epoch": 6.032347504621072, "grad_norm": 0.09134764224290848, "learning_rate": 0.01, "loss": 1.9718, "step": 58743 }, { "epoch": 6.032655576093654, "grad_norm": 0.05880044028162956, "learning_rate": 0.01, "loss": 1.9819, "step": 58746 }, { "epoch": 6.032963647566235, "grad_norm": 0.06801950186491013, "learning_rate": 0.01, "loss": 2.0078, "step": 58749 }, { "epoch": 6.033271719038817, "grad_norm": 0.038541264832019806, "learning_rate": 0.01, "loss": 1.9595, "step": 58752 }, { "epoch": 6.033579790511399, "grad_norm": 0.03735314682126045, "learning_rate": 0.01, "loss": 1.9851, "step": 58755 }, { "epoch": 6.03388786198398, "grad_norm": 0.10068950057029724, "learning_rate": 0.01, "loss": 2.0018, "step": 58758 }, { "epoch": 6.034195933456562, "grad_norm": 0.04904381185770035, "learning_rate": 0.01, "loss": 1.9996, "step": 58761 }, { "epoch": 6.034504004929143, "grad_norm": 0.05660933256149292, "learning_rate": 0.01, "loss": 2.0051, "step": 58764 }, { "epoch": 6.0348120764017255, "grad_norm": 0.0632736012339592, "learning_rate": 0.01, "loss": 2.0073, "step": 58767 }, { "epoch": 6.035120147874307, "grad_norm": 0.06228591129183769, "learning_rate": 0.01, "loss": 1.9989, "step": 58770 }, { "epoch": 6.035428219346889, "grad_norm": 0.03460165485739708, "learning_rate": 0.01, "loss": 1.9976, "step": 58773 }, { "epoch": 6.03573629081947, "grad_norm": 0.04163900017738342, "learning_rate": 0.01, "loss": 1.983, "step": 58776 }, { "epoch": 6.036044362292052, "grad_norm": 0.07276707142591476, "learning_rate": 0.01, "loss": 1.9823, "step": 58779 }, { "epoch": 6.036352433764633, "grad_norm": 0.07738452404737473, "learning_rate": 0.01, "loss": 1.996, "step": 58782 }, { "epoch": 6.036660505237215, "grad_norm": 0.1065252274274826, "learning_rate": 0.01, "loss": 2.0061, "step": 58785 }, { "epoch": 6.036968576709796, "grad_norm": 0.05357460305094719, "learning_rate": 0.01, "loss": 1.9918, "step": 58788 }, { "epoch": 6.037276648182378, "grad_norm": 0.03857807815074921, "learning_rate": 0.01, "loss": 1.9788, "step": 58791 }, { "epoch": 6.03758471965496, "grad_norm": 0.029968131333589554, "learning_rate": 0.01, "loss": 1.9904, "step": 58794 }, { "epoch": 6.0378927911275415, "grad_norm": 0.0720914974808693, "learning_rate": 0.01, "loss": 1.995, "step": 58797 }, { "epoch": 6.038200862600124, "grad_norm": 0.12587358057498932, "learning_rate": 0.01, "loss": 1.9925, "step": 58800 }, { "epoch": 6.038508934072705, "grad_norm": 0.07049199193716049, "learning_rate": 0.01, "loss": 1.9816, "step": 58803 }, { "epoch": 6.038817005545287, "grad_norm": 0.05485573783516884, "learning_rate": 0.01, "loss": 2.0002, "step": 58806 }, { "epoch": 6.039125077017868, "grad_norm": 0.05481892451643944, "learning_rate": 0.01, "loss": 1.9901, "step": 58809 }, { "epoch": 6.03943314849045, "grad_norm": 0.07906284928321838, "learning_rate": 0.01, "loss": 1.9584, "step": 58812 }, { "epoch": 6.039741219963031, "grad_norm": 0.10519934445619583, "learning_rate": 0.01, "loss": 1.9589, "step": 58815 }, { "epoch": 6.040049291435613, "grad_norm": 0.06294261664152145, "learning_rate": 0.01, "loss": 2.0014, "step": 58818 }, { "epoch": 6.040357362908194, "grad_norm": 0.0904410257935524, "learning_rate": 0.01, "loss": 1.9626, "step": 58821 }, { "epoch": 6.040665434380776, "grad_norm": 0.053544074296951294, "learning_rate": 0.01, "loss": 1.9882, "step": 58824 }, { "epoch": 6.040973505853358, "grad_norm": 0.07926991581916809, "learning_rate": 0.01, "loss": 1.968, "step": 58827 }, { "epoch": 6.04128157732594, "grad_norm": 0.09197907149791718, "learning_rate": 0.01, "loss": 1.9756, "step": 58830 }, { "epoch": 6.041589648798522, "grad_norm": 0.05957088991999626, "learning_rate": 0.01, "loss": 1.9689, "step": 58833 }, { "epoch": 6.041897720271103, "grad_norm": 0.1431286334991455, "learning_rate": 0.01, "loss": 1.9854, "step": 58836 }, { "epoch": 6.042205791743685, "grad_norm": 0.06428467482328415, "learning_rate": 0.01, "loss": 1.9761, "step": 58839 }, { "epoch": 6.042513863216266, "grad_norm": 0.07960224896669388, "learning_rate": 0.01, "loss": 1.9728, "step": 58842 }, { "epoch": 6.042821934688848, "grad_norm": 0.040846168994903564, "learning_rate": 0.01, "loss": 1.9875, "step": 58845 }, { "epoch": 6.043130006161429, "grad_norm": 0.04430864006280899, "learning_rate": 0.01, "loss": 1.9945, "step": 58848 }, { "epoch": 6.043438077634011, "grad_norm": 0.04534313082695007, "learning_rate": 0.01, "loss": 1.9728, "step": 58851 }, { "epoch": 6.0437461491065925, "grad_norm": 0.04214495047926903, "learning_rate": 0.01, "loss": 1.9798, "step": 58854 }, { "epoch": 6.0440542205791745, "grad_norm": 0.036912716925144196, "learning_rate": 0.01, "loss": 2.0197, "step": 58857 }, { "epoch": 6.044362292051756, "grad_norm": 0.05208496004343033, "learning_rate": 0.01, "loss": 1.9674, "step": 58860 }, { "epoch": 6.044670363524338, "grad_norm": 0.10304973274469376, "learning_rate": 0.01, "loss": 1.9975, "step": 58863 }, { "epoch": 6.044978434996919, "grad_norm": 0.14296580851078033, "learning_rate": 0.01, "loss": 1.9819, "step": 58866 }, { "epoch": 6.045286506469501, "grad_norm": 0.037938348948955536, "learning_rate": 0.01, "loss": 1.9863, "step": 58869 }, { "epoch": 6.045594577942083, "grad_norm": 0.054850075393915176, "learning_rate": 0.01, "loss": 1.9933, "step": 58872 }, { "epoch": 6.045902649414664, "grad_norm": 0.0785508081316948, "learning_rate": 0.01, "loss": 1.974, "step": 58875 }, { "epoch": 6.046210720887246, "grad_norm": 0.07267174124717712, "learning_rate": 0.01, "loss": 1.9835, "step": 58878 }, { "epoch": 6.046518792359827, "grad_norm": 0.04367669299244881, "learning_rate": 0.01, "loss": 1.9942, "step": 58881 }, { "epoch": 6.046826863832409, "grad_norm": 0.046264924108982086, "learning_rate": 0.01, "loss": 2.0044, "step": 58884 }, { "epoch": 6.047134935304991, "grad_norm": 0.08904755860567093, "learning_rate": 0.01, "loss": 2.0053, "step": 58887 }, { "epoch": 6.047443006777573, "grad_norm": 0.06151941046118736, "learning_rate": 0.01, "loss": 1.9807, "step": 58890 }, { "epoch": 6.047751078250154, "grad_norm": 0.09928801655769348, "learning_rate": 0.01, "loss": 1.9916, "step": 58893 }, { "epoch": 6.048059149722736, "grad_norm": 0.04402837157249451, "learning_rate": 0.01, "loss": 1.9925, "step": 58896 }, { "epoch": 6.048367221195317, "grad_norm": 0.08484535664319992, "learning_rate": 0.01, "loss": 1.9957, "step": 58899 }, { "epoch": 6.048675292667899, "grad_norm": 0.07044728100299835, "learning_rate": 0.01, "loss": 1.9752, "step": 58902 }, { "epoch": 6.04898336414048, "grad_norm": 0.05778975039720535, "learning_rate": 0.01, "loss": 1.999, "step": 58905 }, { "epoch": 6.049291435613062, "grad_norm": 0.08870556205511093, "learning_rate": 0.01, "loss": 1.9794, "step": 58908 }, { "epoch": 6.049599507085643, "grad_norm": 0.06904757022857666, "learning_rate": 0.01, "loss": 1.9988, "step": 58911 }, { "epoch": 6.0499075785582255, "grad_norm": 0.07772015035152435, "learning_rate": 0.01, "loss": 1.9439, "step": 58914 }, { "epoch": 6.0502156500308075, "grad_norm": 0.05567057430744171, "learning_rate": 0.01, "loss": 1.9855, "step": 58917 }, { "epoch": 6.050523721503389, "grad_norm": 0.06966744363307953, "learning_rate": 0.01, "loss": 1.9801, "step": 58920 }, { "epoch": 6.050831792975971, "grad_norm": 0.05111026018857956, "learning_rate": 0.01, "loss": 1.9818, "step": 58923 }, { "epoch": 6.051139864448552, "grad_norm": 0.07328280061483383, "learning_rate": 0.01, "loss": 2.0044, "step": 58926 }, { "epoch": 6.051447935921134, "grad_norm": 0.06918063014745712, "learning_rate": 0.01, "loss": 1.9504, "step": 58929 }, { "epoch": 6.051756007393715, "grad_norm": 0.09920765459537506, "learning_rate": 0.01, "loss": 2.0058, "step": 58932 }, { "epoch": 6.052064078866297, "grad_norm": 0.057603057473897934, "learning_rate": 0.01, "loss": 1.9888, "step": 58935 }, { "epoch": 6.052372150338878, "grad_norm": 0.0861181914806366, "learning_rate": 0.01, "loss": 2.0107, "step": 58938 }, { "epoch": 6.05268022181146, "grad_norm": 0.1477275937795639, "learning_rate": 0.01, "loss": 1.9598, "step": 58941 }, { "epoch": 6.0529882932840415, "grad_norm": 0.04588627070188522, "learning_rate": 0.01, "loss": 1.9911, "step": 58944 }, { "epoch": 6.053296364756624, "grad_norm": 0.09523765742778778, "learning_rate": 0.01, "loss": 1.9673, "step": 58947 }, { "epoch": 6.053604436229205, "grad_norm": 0.05901789665222168, "learning_rate": 0.01, "loss": 2.0197, "step": 58950 }, { "epoch": 6.053912507701787, "grad_norm": 0.0464630164206028, "learning_rate": 0.01, "loss": 1.964, "step": 58953 }, { "epoch": 6.054220579174369, "grad_norm": 0.06684694439172745, "learning_rate": 0.01, "loss": 1.9787, "step": 58956 }, { "epoch": 6.05452865064695, "grad_norm": 0.062367189675569534, "learning_rate": 0.01, "loss": 1.9693, "step": 58959 }, { "epoch": 6.054836722119532, "grad_norm": 0.050568412989377975, "learning_rate": 0.01, "loss": 1.9858, "step": 58962 }, { "epoch": 6.055144793592113, "grad_norm": 0.05513317883014679, "learning_rate": 0.01, "loss": 1.9839, "step": 58965 }, { "epoch": 6.055452865064695, "grad_norm": 0.05048702657222748, "learning_rate": 0.01, "loss": 1.9873, "step": 58968 }, { "epoch": 6.055760936537276, "grad_norm": 0.04989680275321007, "learning_rate": 0.01, "loss": 1.971, "step": 58971 }, { "epoch": 6.0560690080098585, "grad_norm": 0.03208424523472786, "learning_rate": 0.01, "loss": 2.002, "step": 58974 }, { "epoch": 6.05637707948244, "grad_norm": 0.11022038012742996, "learning_rate": 0.01, "loss": 1.9974, "step": 58977 }, { "epoch": 6.056685150955022, "grad_norm": 0.10186657309532166, "learning_rate": 0.01, "loss": 1.9808, "step": 58980 }, { "epoch": 6.056993222427603, "grad_norm": 0.04729219898581505, "learning_rate": 0.01, "loss": 1.9879, "step": 58983 }, { "epoch": 6.057301293900185, "grad_norm": 0.08434822410345078, "learning_rate": 0.01, "loss": 2.0063, "step": 58986 }, { "epoch": 6.057609365372766, "grad_norm": 0.05614599958062172, "learning_rate": 0.01, "loss": 2.009, "step": 58989 }, { "epoch": 6.057917436845348, "grad_norm": 0.06520257890224457, "learning_rate": 0.01, "loss": 1.9684, "step": 58992 }, { "epoch": 6.05822550831793, "grad_norm": 0.035195183008909225, "learning_rate": 0.01, "loss": 1.9842, "step": 58995 }, { "epoch": 6.058533579790511, "grad_norm": 0.08378977328538895, "learning_rate": 0.01, "loss": 1.9709, "step": 58998 }, { "epoch": 6.058841651263093, "grad_norm": 0.05082874745130539, "learning_rate": 0.01, "loss": 2.0167, "step": 59001 }, { "epoch": 6.0591497227356745, "grad_norm": 0.10246657580137253, "learning_rate": 0.01, "loss": 1.9932, "step": 59004 }, { "epoch": 6.059457794208257, "grad_norm": 0.0681087076663971, "learning_rate": 0.01, "loss": 1.9932, "step": 59007 }, { "epoch": 6.059765865680838, "grad_norm": 0.10320194065570831, "learning_rate": 0.01, "loss": 1.9569, "step": 59010 }, { "epoch": 6.06007393715342, "grad_norm": 0.04554738476872444, "learning_rate": 0.01, "loss": 2.0199, "step": 59013 }, { "epoch": 6.060382008626001, "grad_norm": 0.11251513659954071, "learning_rate": 0.01, "loss": 1.9776, "step": 59016 }, { "epoch": 6.060690080098583, "grad_norm": 0.042101431638002396, "learning_rate": 0.01, "loss": 1.9971, "step": 59019 }, { "epoch": 6.060998151571164, "grad_norm": 0.05242328718304634, "learning_rate": 0.01, "loss": 1.977, "step": 59022 }, { "epoch": 6.061306223043746, "grad_norm": 0.07696748524904251, "learning_rate": 0.01, "loss": 1.9847, "step": 59025 }, { "epoch": 6.061614294516327, "grad_norm": 0.07448645681142807, "learning_rate": 0.01, "loss": 1.9781, "step": 59028 }, { "epoch": 6.061922365988909, "grad_norm": 0.05332387611269951, "learning_rate": 0.01, "loss": 1.9886, "step": 59031 }, { "epoch": 6.0622304374614915, "grad_norm": 0.046122957020998, "learning_rate": 0.01, "loss": 2.0017, "step": 59034 }, { "epoch": 6.062538508934073, "grad_norm": 0.06084294244647026, "learning_rate": 0.01, "loss": 1.9973, "step": 59037 }, { "epoch": 6.062846580406655, "grad_norm": 0.09956135600805283, "learning_rate": 0.01, "loss": 1.9777, "step": 59040 }, { "epoch": 6.063154651879236, "grad_norm": 0.05223178490996361, "learning_rate": 0.01, "loss": 1.9828, "step": 59043 }, { "epoch": 6.063462723351818, "grad_norm": 0.08285564929246902, "learning_rate": 0.01, "loss": 1.9842, "step": 59046 }, { "epoch": 6.063770794824399, "grad_norm": 0.05346845090389252, "learning_rate": 0.01, "loss": 1.9875, "step": 59049 }, { "epoch": 6.064078866296981, "grad_norm": 0.08269158750772476, "learning_rate": 0.01, "loss": 1.9905, "step": 59052 }, { "epoch": 6.064386937769562, "grad_norm": 0.07285866141319275, "learning_rate": 0.01, "loss": 1.9683, "step": 59055 }, { "epoch": 6.064695009242144, "grad_norm": 0.07736973464488983, "learning_rate": 0.01, "loss": 1.9928, "step": 59058 }, { "epoch": 6.0650030807147255, "grad_norm": 0.0628119483590126, "learning_rate": 0.01, "loss": 2.0, "step": 59061 }, { "epoch": 6.0653111521873075, "grad_norm": 0.044494885951280594, "learning_rate": 0.01, "loss": 2.0048, "step": 59064 }, { "epoch": 6.065619223659889, "grad_norm": 0.1270086020231247, "learning_rate": 0.01, "loss": 1.9853, "step": 59067 }, { "epoch": 6.065927295132471, "grad_norm": 0.045628707855939865, "learning_rate": 0.01, "loss": 1.9862, "step": 59070 }, { "epoch": 6.066235366605053, "grad_norm": 0.07165969908237457, "learning_rate": 0.01, "loss": 1.9738, "step": 59073 }, { "epoch": 6.066543438077634, "grad_norm": 0.047646794468164444, "learning_rate": 0.01, "loss": 1.9962, "step": 59076 }, { "epoch": 6.066851509550216, "grad_norm": 0.05064648762345314, "learning_rate": 0.01, "loss": 1.9851, "step": 59079 }, { "epoch": 6.067159581022797, "grad_norm": 0.07075773924589157, "learning_rate": 0.01, "loss": 1.9746, "step": 59082 }, { "epoch": 6.067467652495379, "grad_norm": 0.0734126940369606, "learning_rate": 0.01, "loss": 2.0036, "step": 59085 }, { "epoch": 6.06777572396796, "grad_norm": 0.07513909041881561, "learning_rate": 0.01, "loss": 2.0011, "step": 59088 }, { "epoch": 6.068083795440542, "grad_norm": 0.04252862557768822, "learning_rate": 0.01, "loss": 1.9909, "step": 59091 }, { "epoch": 6.068391866913124, "grad_norm": 0.06020990386605263, "learning_rate": 0.01, "loss": 1.9744, "step": 59094 }, { "epoch": 6.068699938385706, "grad_norm": 0.048400718718767166, "learning_rate": 0.01, "loss": 1.9986, "step": 59097 }, { "epoch": 6.069008009858287, "grad_norm": 0.10299364477396011, "learning_rate": 0.01, "loss": 1.9918, "step": 59100 }, { "epoch": 6.069316081330869, "grad_norm": 0.07474274933338165, "learning_rate": 0.01, "loss": 2.0238, "step": 59103 }, { "epoch": 6.06962415280345, "grad_norm": 0.069256491959095, "learning_rate": 0.01, "loss": 1.9933, "step": 59106 }, { "epoch": 6.069932224276032, "grad_norm": 0.07016000151634216, "learning_rate": 0.01, "loss": 1.9659, "step": 59109 }, { "epoch": 6.070240295748613, "grad_norm": 0.10577663034200668, "learning_rate": 0.01, "loss": 1.9724, "step": 59112 }, { "epoch": 6.070548367221195, "grad_norm": 0.10170361399650574, "learning_rate": 0.01, "loss": 1.9839, "step": 59115 }, { "epoch": 6.070856438693777, "grad_norm": 0.03669926896691322, "learning_rate": 0.01, "loss": 1.9588, "step": 59118 }, { "epoch": 6.0711645101663585, "grad_norm": 0.04145694151520729, "learning_rate": 0.01, "loss": 1.9565, "step": 59121 }, { "epoch": 6.0714725816389405, "grad_norm": 0.052021101117134094, "learning_rate": 0.01, "loss": 1.9919, "step": 59124 }, { "epoch": 6.071780653111522, "grad_norm": 0.10580527782440186, "learning_rate": 0.01, "loss": 1.9736, "step": 59127 }, { "epoch": 6.072088724584104, "grad_norm": 0.05877790227532387, "learning_rate": 0.01, "loss": 2.0073, "step": 59130 }, { "epoch": 6.072396796056685, "grad_norm": 0.047968316823244095, "learning_rate": 0.01, "loss": 1.9982, "step": 59133 }, { "epoch": 6.072704867529267, "grad_norm": 0.05399814620614052, "learning_rate": 0.01, "loss": 1.9785, "step": 59136 }, { "epoch": 6.073012939001848, "grad_norm": 0.0343160405755043, "learning_rate": 0.01, "loss": 1.9685, "step": 59139 }, { "epoch": 6.07332101047443, "grad_norm": 0.032716646790504456, "learning_rate": 0.01, "loss": 1.989, "step": 59142 }, { "epoch": 6.073629081947011, "grad_norm": 0.09958682209253311, "learning_rate": 0.01, "loss": 1.9854, "step": 59145 }, { "epoch": 6.073937153419593, "grad_norm": 0.14549368619918823, "learning_rate": 0.01, "loss": 1.9769, "step": 59148 }, { "epoch": 6.0742452248921746, "grad_norm": 0.12808917462825775, "learning_rate": 0.01, "loss": 2.0228, "step": 59151 }, { "epoch": 6.074553296364757, "grad_norm": 0.06274955719709396, "learning_rate": 0.01, "loss": 1.9703, "step": 59154 }, { "epoch": 6.074861367837339, "grad_norm": 0.06658618897199631, "learning_rate": 0.01, "loss": 1.9678, "step": 59157 }, { "epoch": 6.07516943930992, "grad_norm": 0.07997333258390427, "learning_rate": 0.01, "loss": 2.0145, "step": 59160 }, { "epoch": 6.075477510782502, "grad_norm": 0.040492795407772064, "learning_rate": 0.01, "loss": 1.9726, "step": 59163 }, { "epoch": 6.075785582255083, "grad_norm": 0.03949406370520592, "learning_rate": 0.01, "loss": 1.9907, "step": 59166 }, { "epoch": 6.076093653727665, "grad_norm": 0.0330752469599247, "learning_rate": 0.01, "loss": 1.9876, "step": 59169 }, { "epoch": 6.076401725200246, "grad_norm": 0.03734998777508736, "learning_rate": 0.01, "loss": 1.9643, "step": 59172 }, { "epoch": 6.076709796672828, "grad_norm": 0.04145140200853348, "learning_rate": 0.01, "loss": 1.9748, "step": 59175 }, { "epoch": 6.0770178681454095, "grad_norm": 0.04934060946106911, "learning_rate": 0.01, "loss": 1.9791, "step": 59178 }, { "epoch": 6.0773259396179915, "grad_norm": 0.09385284036397934, "learning_rate": 0.01, "loss": 2.0134, "step": 59181 }, { "epoch": 6.077634011090573, "grad_norm": 0.14405569434165955, "learning_rate": 0.01, "loss": 1.9802, "step": 59184 }, { "epoch": 6.077942082563155, "grad_norm": 0.12107555568218231, "learning_rate": 0.01, "loss": 1.9945, "step": 59187 }, { "epoch": 6.078250154035736, "grad_norm": 0.05842713639140129, "learning_rate": 0.01, "loss": 1.9649, "step": 59190 }, { "epoch": 6.078558225508318, "grad_norm": 0.049650490283966064, "learning_rate": 0.01, "loss": 1.9795, "step": 59193 }, { "epoch": 6.0788662969809, "grad_norm": 0.06532774120569229, "learning_rate": 0.01, "loss": 2.0231, "step": 59196 }, { "epoch": 6.079174368453481, "grad_norm": 0.03809603303670883, "learning_rate": 0.01, "loss": 1.9844, "step": 59199 }, { "epoch": 6.079482439926063, "grad_norm": 0.07845079898834229, "learning_rate": 0.01, "loss": 1.9974, "step": 59202 }, { "epoch": 6.079790511398644, "grad_norm": 0.15672674775123596, "learning_rate": 0.01, "loss": 1.997, "step": 59205 }, { "epoch": 6.080098582871226, "grad_norm": 0.11329050362110138, "learning_rate": 0.01, "loss": 2.0003, "step": 59208 }, { "epoch": 6.080406654343808, "grad_norm": 0.08986864238977432, "learning_rate": 0.01, "loss": 1.9638, "step": 59211 }, { "epoch": 6.08071472581639, "grad_norm": 0.040678489953279495, "learning_rate": 0.01, "loss": 1.9791, "step": 59214 }, { "epoch": 6.081022797288971, "grad_norm": 0.09467331320047379, "learning_rate": 0.01, "loss": 2.0112, "step": 59217 }, { "epoch": 6.081330868761553, "grad_norm": 0.0797513797879219, "learning_rate": 0.01, "loss": 1.9946, "step": 59220 }, { "epoch": 6.081638940234134, "grad_norm": 0.04497670754790306, "learning_rate": 0.01, "loss": 1.9816, "step": 59223 }, { "epoch": 6.081947011706716, "grad_norm": 0.08867593109607697, "learning_rate": 0.01, "loss": 1.9627, "step": 59226 }, { "epoch": 6.082255083179297, "grad_norm": 0.1013980433344841, "learning_rate": 0.01, "loss": 1.9837, "step": 59229 }, { "epoch": 6.082563154651879, "grad_norm": 0.05733179301023483, "learning_rate": 0.01, "loss": 1.9862, "step": 59232 }, { "epoch": 6.082871226124461, "grad_norm": 0.09696310758590698, "learning_rate": 0.01, "loss": 1.9978, "step": 59235 }, { "epoch": 6.0831792975970425, "grad_norm": 0.07615906745195389, "learning_rate": 0.01, "loss": 2.0072, "step": 59238 }, { "epoch": 6.0834873690696245, "grad_norm": 0.08034536242485046, "learning_rate": 0.01, "loss": 1.9871, "step": 59241 }, { "epoch": 6.083795440542206, "grad_norm": 0.04975961148738861, "learning_rate": 0.01, "loss": 1.9946, "step": 59244 }, { "epoch": 6.084103512014788, "grad_norm": 0.09680989384651184, "learning_rate": 0.01, "loss": 2.0012, "step": 59247 }, { "epoch": 6.084411583487369, "grad_norm": 0.06167134270071983, "learning_rate": 0.01, "loss": 1.9781, "step": 59250 }, { "epoch": 6.084719654959951, "grad_norm": 0.08627012372016907, "learning_rate": 0.01, "loss": 2.0036, "step": 59253 }, { "epoch": 6.085027726432532, "grad_norm": 0.053778212517499924, "learning_rate": 0.01, "loss": 2.0024, "step": 59256 }, { "epoch": 6.085335797905114, "grad_norm": 0.10851852595806122, "learning_rate": 0.01, "loss": 2.0025, "step": 59259 }, { "epoch": 6.085643869377695, "grad_norm": 0.08398763090372086, "learning_rate": 0.01, "loss": 1.9835, "step": 59262 }, { "epoch": 6.085951940850277, "grad_norm": 0.09030786901712418, "learning_rate": 0.01, "loss": 2.009, "step": 59265 }, { "epoch": 6.0862600123228585, "grad_norm": 0.08578209578990936, "learning_rate": 0.01, "loss": 1.9648, "step": 59268 }, { "epoch": 6.086568083795441, "grad_norm": 0.06893924623727798, "learning_rate": 0.01, "loss": 1.9847, "step": 59271 }, { "epoch": 6.086876155268023, "grad_norm": 0.044449809938669205, "learning_rate": 0.01, "loss": 1.9844, "step": 59274 }, { "epoch": 6.087184226740604, "grad_norm": 0.03283555805683136, "learning_rate": 0.01, "loss": 1.9807, "step": 59277 }, { "epoch": 6.087492298213186, "grad_norm": 0.05007379502058029, "learning_rate": 0.01, "loss": 1.994, "step": 59280 }, { "epoch": 6.087800369685767, "grad_norm": 0.05644026771187782, "learning_rate": 0.01, "loss": 1.9968, "step": 59283 }, { "epoch": 6.088108441158349, "grad_norm": 0.06911731511354446, "learning_rate": 0.01, "loss": 1.9543, "step": 59286 }, { "epoch": 6.08841651263093, "grad_norm": 0.06676710397005081, "learning_rate": 0.01, "loss": 1.9876, "step": 59289 }, { "epoch": 6.088724584103512, "grad_norm": 0.03128058835864067, "learning_rate": 0.01, "loss": 2.0105, "step": 59292 }, { "epoch": 6.089032655576093, "grad_norm": 0.05113793537020683, "learning_rate": 0.01, "loss": 1.966, "step": 59295 }, { "epoch": 6.0893407270486755, "grad_norm": 0.10867103934288025, "learning_rate": 0.01, "loss": 2.0168, "step": 59298 }, { "epoch": 6.089648798521257, "grad_norm": 0.10439618676900864, "learning_rate": 0.01, "loss": 1.9903, "step": 59301 }, { "epoch": 6.089956869993839, "grad_norm": 0.06279850751161575, "learning_rate": 0.01, "loss": 1.9788, "step": 59304 }, { "epoch": 6.09026494146642, "grad_norm": 0.07557038217782974, "learning_rate": 0.01, "loss": 1.9879, "step": 59307 }, { "epoch": 6.090573012939002, "grad_norm": 0.06895217299461365, "learning_rate": 0.01, "loss": 1.9832, "step": 59310 }, { "epoch": 6.090881084411583, "grad_norm": 0.058836501091718674, "learning_rate": 0.01, "loss": 1.976, "step": 59313 }, { "epoch": 6.091189155884165, "grad_norm": 0.10100754350423813, "learning_rate": 0.01, "loss": 1.9963, "step": 59316 }, { "epoch": 6.091497227356747, "grad_norm": 0.03705455735325813, "learning_rate": 0.01, "loss": 1.9814, "step": 59319 }, { "epoch": 6.091805298829328, "grad_norm": 0.09770546853542328, "learning_rate": 0.01, "loss": 1.9778, "step": 59322 }, { "epoch": 6.09211337030191, "grad_norm": 0.07368703931570053, "learning_rate": 0.01, "loss": 1.9476, "step": 59325 }, { "epoch": 6.0924214417744915, "grad_norm": 0.07088816910982132, "learning_rate": 0.01, "loss": 2.0124, "step": 59328 }, { "epoch": 6.092729513247074, "grad_norm": 0.05165349692106247, "learning_rate": 0.01, "loss": 1.9781, "step": 59331 }, { "epoch": 6.093037584719655, "grad_norm": 0.08270641416311264, "learning_rate": 0.01, "loss": 1.991, "step": 59334 }, { "epoch": 6.093345656192237, "grad_norm": 0.07178719341754913, "learning_rate": 0.01, "loss": 2.0069, "step": 59337 }, { "epoch": 6.093653727664818, "grad_norm": 0.07591982930898666, "learning_rate": 0.01, "loss": 1.9647, "step": 59340 }, { "epoch": 6.0939617991374, "grad_norm": 0.08362317830324173, "learning_rate": 0.01, "loss": 1.9717, "step": 59343 }, { "epoch": 6.094269870609981, "grad_norm": 0.04838939011096954, "learning_rate": 0.01, "loss": 1.9897, "step": 59346 }, { "epoch": 6.094577942082563, "grad_norm": 0.049368396401405334, "learning_rate": 0.01, "loss": 1.9877, "step": 59349 }, { "epoch": 6.094886013555144, "grad_norm": 0.05581490695476532, "learning_rate": 0.01, "loss": 1.9736, "step": 59352 }, { "epoch": 6.095194085027726, "grad_norm": 0.04965098947286606, "learning_rate": 0.01, "loss": 1.9675, "step": 59355 }, { "epoch": 6.0955021565003085, "grad_norm": 0.03643220290541649, "learning_rate": 0.01, "loss": 2.0234, "step": 59358 }, { "epoch": 6.09581022797289, "grad_norm": 0.08060404658317566, "learning_rate": 0.01, "loss": 1.987, "step": 59361 }, { "epoch": 6.096118299445472, "grad_norm": 0.08821488916873932, "learning_rate": 0.01, "loss": 1.9602, "step": 59364 }, { "epoch": 6.096426370918053, "grad_norm": 0.04556925222277641, "learning_rate": 0.01, "loss": 2.0006, "step": 59367 }, { "epoch": 6.096734442390635, "grad_norm": 0.07568595558404922, "learning_rate": 0.01, "loss": 1.989, "step": 59370 }, { "epoch": 6.097042513863216, "grad_norm": 0.06441906094551086, "learning_rate": 0.01, "loss": 1.9778, "step": 59373 }, { "epoch": 6.097350585335798, "grad_norm": 0.04911012202501297, "learning_rate": 0.01, "loss": 2.0094, "step": 59376 }, { "epoch": 6.097658656808379, "grad_norm": 0.10038016736507416, "learning_rate": 0.01, "loss": 1.97, "step": 59379 }, { "epoch": 6.097966728280961, "grad_norm": 0.08682964742183685, "learning_rate": 0.01, "loss": 1.9936, "step": 59382 }, { "epoch": 6.0982747997535425, "grad_norm": 0.07904212176799774, "learning_rate": 0.01, "loss": 1.9821, "step": 59385 }, { "epoch": 6.0985828712261245, "grad_norm": 0.07314693182706833, "learning_rate": 0.01, "loss": 1.9913, "step": 59388 }, { "epoch": 6.098890942698706, "grad_norm": 0.04866940528154373, "learning_rate": 0.01, "loss": 1.998, "step": 59391 }, { "epoch": 6.099199014171288, "grad_norm": 0.040898289531469345, "learning_rate": 0.01, "loss": 1.9953, "step": 59394 }, { "epoch": 6.09950708564387, "grad_norm": 0.04597887024283409, "learning_rate": 0.01, "loss": 2.0, "step": 59397 }, { "epoch": 6.099815157116451, "grad_norm": 0.04504725709557533, "learning_rate": 0.01, "loss": 2.0008, "step": 59400 }, { "epoch": 6.100123228589033, "grad_norm": 0.032154761254787445, "learning_rate": 0.01, "loss": 1.9804, "step": 59403 }, { "epoch": 6.100431300061614, "grad_norm": 0.06787566095590591, "learning_rate": 0.01, "loss": 1.999, "step": 59406 }, { "epoch": 6.100739371534196, "grad_norm": 0.07191510498523712, "learning_rate": 0.01, "loss": 1.9984, "step": 59409 }, { "epoch": 6.101047443006777, "grad_norm": 0.08066849410533905, "learning_rate": 0.01, "loss": 1.9812, "step": 59412 }, { "epoch": 6.101355514479359, "grad_norm": 0.05546758696436882, "learning_rate": 0.01, "loss": 1.9777, "step": 59415 }, { "epoch": 6.101663585951941, "grad_norm": 0.059205375611782074, "learning_rate": 0.01, "loss": 1.9817, "step": 59418 }, { "epoch": 6.101971657424523, "grad_norm": 0.03468778356909752, "learning_rate": 0.01, "loss": 1.9937, "step": 59421 }, { "epoch": 6.102279728897104, "grad_norm": 0.06100203096866608, "learning_rate": 0.01, "loss": 2.0237, "step": 59424 }, { "epoch": 6.102587800369686, "grad_norm": 0.04229642450809479, "learning_rate": 0.01, "loss": 1.969, "step": 59427 }, { "epoch": 6.102895871842267, "grad_norm": 0.09364788234233856, "learning_rate": 0.01, "loss": 1.9774, "step": 59430 }, { "epoch": 6.103203943314849, "grad_norm": 0.0706750899553299, "learning_rate": 0.01, "loss": 1.9873, "step": 59433 }, { "epoch": 6.103512014787431, "grad_norm": 0.1148315817117691, "learning_rate": 0.01, "loss": 1.9675, "step": 59436 }, { "epoch": 6.103820086260012, "grad_norm": 0.057128921151161194, "learning_rate": 0.01, "loss": 1.9544, "step": 59439 }, { "epoch": 6.104128157732594, "grad_norm": 0.054126545786857605, "learning_rate": 0.01, "loss": 2.0055, "step": 59442 }, { "epoch": 6.1044362292051755, "grad_norm": 0.050927091389894485, "learning_rate": 0.01, "loss": 1.9675, "step": 59445 }, { "epoch": 6.1047443006777575, "grad_norm": 0.05484678968787193, "learning_rate": 0.01, "loss": 1.9928, "step": 59448 }, { "epoch": 6.105052372150339, "grad_norm": 0.06659942865371704, "learning_rate": 0.01, "loss": 1.9842, "step": 59451 }, { "epoch": 6.105360443622921, "grad_norm": 0.06512758880853653, "learning_rate": 0.01, "loss": 1.9836, "step": 59454 }, { "epoch": 6.105668515095502, "grad_norm": 0.04978008568286896, "learning_rate": 0.01, "loss": 1.9708, "step": 59457 }, { "epoch": 6.105976586568084, "grad_norm": 0.03648846223950386, "learning_rate": 0.01, "loss": 1.9725, "step": 59460 }, { "epoch": 6.106284658040665, "grad_norm": 0.049807704985141754, "learning_rate": 0.01, "loss": 1.9768, "step": 59463 }, { "epoch": 6.106592729513247, "grad_norm": 0.04505782201886177, "learning_rate": 0.01, "loss": 1.9688, "step": 59466 }, { "epoch": 6.106900800985828, "grad_norm": 0.05675986409187317, "learning_rate": 0.01, "loss": 1.9758, "step": 59469 }, { "epoch": 6.10720887245841, "grad_norm": 0.06363193690776825, "learning_rate": 0.01, "loss": 1.9841, "step": 59472 }, { "epoch": 6.107516943930992, "grad_norm": 0.038001857697963715, "learning_rate": 0.01, "loss": 1.9912, "step": 59475 }, { "epoch": 6.107825015403574, "grad_norm": 0.034627631306648254, "learning_rate": 0.01, "loss": 1.976, "step": 59478 }, { "epoch": 6.108133086876156, "grad_norm": 0.0814884752035141, "learning_rate": 0.01, "loss": 1.9742, "step": 59481 }, { "epoch": 6.108441158348737, "grad_norm": 0.0862545445561409, "learning_rate": 0.01, "loss": 1.9793, "step": 59484 }, { "epoch": 6.108749229821319, "grad_norm": 0.05416722968220711, "learning_rate": 0.01, "loss": 1.9794, "step": 59487 }, { "epoch": 6.1090573012939, "grad_norm": 0.04082367196679115, "learning_rate": 0.01, "loss": 1.9933, "step": 59490 }, { "epoch": 6.109365372766482, "grad_norm": 0.0981779396533966, "learning_rate": 0.01, "loss": 1.9745, "step": 59493 }, { "epoch": 6.109673444239063, "grad_norm": 0.03428742289543152, "learning_rate": 0.01, "loss": 1.9762, "step": 59496 }, { "epoch": 6.109981515711645, "grad_norm": 0.07878245413303375, "learning_rate": 0.01, "loss": 1.9798, "step": 59499 }, { "epoch": 6.110289587184226, "grad_norm": 0.1018940880894661, "learning_rate": 0.01, "loss": 1.9923, "step": 59502 }, { "epoch": 6.1105976586568085, "grad_norm": 0.03978511691093445, "learning_rate": 0.01, "loss": 1.9941, "step": 59505 }, { "epoch": 6.11090573012939, "grad_norm": 0.05915730074048042, "learning_rate": 0.01, "loss": 1.9899, "step": 59508 }, { "epoch": 6.111213801601972, "grad_norm": 0.0455721952021122, "learning_rate": 0.01, "loss": 1.9844, "step": 59511 }, { "epoch": 6.111521873074553, "grad_norm": 0.050054021179676056, "learning_rate": 0.01, "loss": 1.9907, "step": 59514 }, { "epoch": 6.111829944547135, "grad_norm": 0.0369170643389225, "learning_rate": 0.01, "loss": 1.9817, "step": 59517 }, { "epoch": 6.112138016019717, "grad_norm": 0.044435255229473114, "learning_rate": 0.01, "loss": 2.0025, "step": 59520 }, { "epoch": 6.112446087492298, "grad_norm": 0.04968203604221344, "learning_rate": 0.01, "loss": 1.9816, "step": 59523 }, { "epoch": 6.11275415896488, "grad_norm": 0.04474148899316788, "learning_rate": 0.01, "loss": 1.9734, "step": 59526 }, { "epoch": 6.113062230437461, "grad_norm": 0.03932742401957512, "learning_rate": 0.01, "loss": 1.9926, "step": 59529 }, { "epoch": 6.113370301910043, "grad_norm": 0.10069919377565384, "learning_rate": 0.01, "loss": 2.0154, "step": 59532 }, { "epoch": 6.1136783733826245, "grad_norm": 0.08742933720350266, "learning_rate": 0.01, "loss": 1.9891, "step": 59535 }, { "epoch": 6.113986444855207, "grad_norm": 0.15486708283424377, "learning_rate": 0.01, "loss": 1.9888, "step": 59538 }, { "epoch": 6.114294516327788, "grad_norm": 0.05859851837158203, "learning_rate": 0.01, "loss": 2.01, "step": 59541 }, { "epoch": 6.11460258780037, "grad_norm": 0.04623505100607872, "learning_rate": 0.01, "loss": 2.0049, "step": 59544 }, { "epoch": 6.114910659272951, "grad_norm": 0.037663958966732025, "learning_rate": 0.01, "loss": 1.9699, "step": 59547 }, { "epoch": 6.115218730745533, "grad_norm": 0.03428487107157707, "learning_rate": 0.01, "loss": 2.0076, "step": 59550 }, { "epoch": 6.115526802218114, "grad_norm": 0.03549535945057869, "learning_rate": 0.01, "loss": 1.984, "step": 59553 }, { "epoch": 6.115834873690696, "grad_norm": 0.0357876755297184, "learning_rate": 0.01, "loss": 1.9636, "step": 59556 }, { "epoch": 6.116142945163278, "grad_norm": 0.05952336639165878, "learning_rate": 0.01, "loss": 1.9522, "step": 59559 }, { "epoch": 6.116451016635859, "grad_norm": 0.06661629676818848, "learning_rate": 0.01, "loss": 1.9885, "step": 59562 }, { "epoch": 6.1167590881084415, "grad_norm": 0.037326935678720474, "learning_rate": 0.01, "loss": 1.959, "step": 59565 }, { "epoch": 6.117067159581023, "grad_norm": 0.08297394216060638, "learning_rate": 0.01, "loss": 1.9881, "step": 59568 }, { "epoch": 6.117375231053605, "grad_norm": 0.08954669535160065, "learning_rate": 0.01, "loss": 1.9726, "step": 59571 }, { "epoch": 6.117683302526186, "grad_norm": 0.06428781896829605, "learning_rate": 0.01, "loss": 1.9688, "step": 59574 }, { "epoch": 6.117991373998768, "grad_norm": 0.0963866114616394, "learning_rate": 0.01, "loss": 2.0015, "step": 59577 }, { "epoch": 6.118299445471349, "grad_norm": 0.07301589101552963, "learning_rate": 0.01, "loss": 2.0128, "step": 59580 }, { "epoch": 6.118607516943931, "grad_norm": 0.05073026567697525, "learning_rate": 0.01, "loss": 1.9731, "step": 59583 }, { "epoch": 6.118915588416512, "grad_norm": 0.04244513809680939, "learning_rate": 0.01, "loss": 1.9694, "step": 59586 }, { "epoch": 6.119223659889094, "grad_norm": 0.0322258435189724, "learning_rate": 0.01, "loss": 1.9584, "step": 59589 }, { "epoch": 6.1195317313616755, "grad_norm": 0.040786877274513245, "learning_rate": 0.01, "loss": 1.9797, "step": 59592 }, { "epoch": 6.1198398028342575, "grad_norm": 0.1062447801232338, "learning_rate": 0.01, "loss": 1.9888, "step": 59595 }, { "epoch": 6.12014787430684, "grad_norm": 0.07283103466033936, "learning_rate": 0.01, "loss": 1.9989, "step": 59598 }, { "epoch": 6.120455945779421, "grad_norm": 0.04273154214024544, "learning_rate": 0.01, "loss": 1.9896, "step": 59601 }, { "epoch": 6.120764017252003, "grad_norm": 0.0657401755452156, "learning_rate": 0.01, "loss": 1.9798, "step": 59604 }, { "epoch": 6.121072088724584, "grad_norm": 0.03795840963721275, "learning_rate": 0.01, "loss": 1.9599, "step": 59607 }, { "epoch": 6.121380160197166, "grad_norm": 0.03355659544467926, "learning_rate": 0.01, "loss": 2.0068, "step": 59610 }, { "epoch": 6.121688231669747, "grad_norm": 0.13272467255592346, "learning_rate": 0.01, "loss": 1.9945, "step": 59613 }, { "epoch": 6.121996303142329, "grad_norm": 0.07595691084861755, "learning_rate": 0.01, "loss": 1.979, "step": 59616 }, { "epoch": 6.12230437461491, "grad_norm": 0.0829743966460228, "learning_rate": 0.01, "loss": 1.988, "step": 59619 }, { "epoch": 6.122612446087492, "grad_norm": 0.05950823426246643, "learning_rate": 0.01, "loss": 1.986, "step": 59622 }, { "epoch": 6.122920517560074, "grad_norm": 0.06740408390760422, "learning_rate": 0.01, "loss": 1.9864, "step": 59625 }, { "epoch": 6.123228589032656, "grad_norm": 0.03415486961603165, "learning_rate": 0.01, "loss": 1.9805, "step": 59628 }, { "epoch": 6.123536660505237, "grad_norm": 0.06124688684940338, "learning_rate": 0.01, "loss": 1.9978, "step": 59631 }, { "epoch": 6.123844731977819, "grad_norm": 0.06151755899190903, "learning_rate": 0.01, "loss": 1.9895, "step": 59634 }, { "epoch": 6.124152803450401, "grad_norm": 0.12702985107898712, "learning_rate": 0.01, "loss": 2.0098, "step": 59637 }, { "epoch": 6.124460874922982, "grad_norm": 0.08845409005880356, "learning_rate": 0.01, "loss": 1.9834, "step": 59640 }, { "epoch": 6.124768946395564, "grad_norm": 0.061156027019023895, "learning_rate": 0.01, "loss": 1.9725, "step": 59643 }, { "epoch": 6.125077017868145, "grad_norm": 0.050899405032396317, "learning_rate": 0.01, "loss": 1.9905, "step": 59646 }, { "epoch": 6.125385089340727, "grad_norm": 0.066854327917099, "learning_rate": 0.01, "loss": 1.9638, "step": 59649 }, { "epoch": 6.1256931608133085, "grad_norm": 0.06697458028793335, "learning_rate": 0.01, "loss": 1.9864, "step": 59652 }, { "epoch": 6.1260012322858906, "grad_norm": 0.05130474641919136, "learning_rate": 0.01, "loss": 1.996, "step": 59655 }, { "epoch": 6.126309303758472, "grad_norm": 0.04654531553387642, "learning_rate": 0.01, "loss": 1.9818, "step": 59658 }, { "epoch": 6.126617375231054, "grad_norm": 0.036213308572769165, "learning_rate": 0.01, "loss": 1.9758, "step": 59661 }, { "epoch": 6.126925446703635, "grad_norm": 0.052255984395742416, "learning_rate": 0.01, "loss": 1.9706, "step": 59664 }, { "epoch": 6.127233518176217, "grad_norm": 0.07511945068836212, "learning_rate": 0.01, "loss": 1.9924, "step": 59667 }, { "epoch": 6.127541589648798, "grad_norm": 0.09632497280836105, "learning_rate": 0.01, "loss": 1.978, "step": 59670 }, { "epoch": 6.12784966112138, "grad_norm": 0.059645794332027435, "learning_rate": 0.01, "loss": 1.9908, "step": 59673 }, { "epoch": 6.128157732593962, "grad_norm": 0.08466697484254837, "learning_rate": 0.01, "loss": 1.9728, "step": 59676 }, { "epoch": 6.128465804066543, "grad_norm": 0.06881053000688553, "learning_rate": 0.01, "loss": 1.9875, "step": 59679 }, { "epoch": 6.1287738755391254, "grad_norm": 0.04392458125948906, "learning_rate": 0.01, "loss": 1.9733, "step": 59682 }, { "epoch": 6.129081947011707, "grad_norm": 0.046918466687202454, "learning_rate": 0.01, "loss": 2.006, "step": 59685 }, { "epoch": 6.129390018484289, "grad_norm": 0.13943582773208618, "learning_rate": 0.01, "loss": 1.944, "step": 59688 }, { "epoch": 6.12969808995687, "grad_norm": 0.032962556928396225, "learning_rate": 0.01, "loss": 2.0044, "step": 59691 }, { "epoch": 6.130006161429452, "grad_norm": 0.04881718382239342, "learning_rate": 0.01, "loss": 1.9738, "step": 59694 }, { "epoch": 6.130314232902033, "grad_norm": 0.049873992800712585, "learning_rate": 0.01, "loss": 1.9694, "step": 59697 }, { "epoch": 6.130622304374615, "grad_norm": 0.11788579076528549, "learning_rate": 0.01, "loss": 1.9892, "step": 59700 }, { "epoch": 6.130930375847196, "grad_norm": 0.06456585973501205, "learning_rate": 0.01, "loss": 1.9675, "step": 59703 }, { "epoch": 6.131238447319778, "grad_norm": 0.04507961496710777, "learning_rate": 0.01, "loss": 1.9788, "step": 59706 }, { "epoch": 6.1315465187923595, "grad_norm": 0.04093556106090546, "learning_rate": 0.01, "loss": 1.992, "step": 59709 }, { "epoch": 6.1318545902649415, "grad_norm": 0.034374650567770004, "learning_rate": 0.01, "loss": 1.9818, "step": 59712 }, { "epoch": 6.132162661737523, "grad_norm": 0.09779324382543564, "learning_rate": 0.01, "loss": 1.9681, "step": 59715 }, { "epoch": 6.132470733210105, "grad_norm": 0.03912382200360298, "learning_rate": 0.01, "loss": 1.9946, "step": 59718 }, { "epoch": 6.132778804682687, "grad_norm": 0.046537820249795914, "learning_rate": 0.01, "loss": 1.9881, "step": 59721 }, { "epoch": 6.133086876155268, "grad_norm": 0.04848983883857727, "learning_rate": 0.01, "loss": 1.9756, "step": 59724 }, { "epoch": 6.13339494762785, "grad_norm": 0.03337480127811432, "learning_rate": 0.01, "loss": 1.9633, "step": 59727 }, { "epoch": 6.133703019100431, "grad_norm": 0.06988492608070374, "learning_rate": 0.01, "loss": 1.9742, "step": 59730 }, { "epoch": 6.134011090573013, "grad_norm": 0.1184145137667656, "learning_rate": 0.01, "loss": 1.9723, "step": 59733 }, { "epoch": 6.134319162045594, "grad_norm": 0.1065261960029602, "learning_rate": 0.01, "loss": 1.9516, "step": 59736 }, { "epoch": 6.134627233518176, "grad_norm": 0.03989608585834503, "learning_rate": 0.01, "loss": 1.9807, "step": 59739 }, { "epoch": 6.134935304990758, "grad_norm": 0.05950331315398216, "learning_rate": 0.01, "loss": 1.9715, "step": 59742 }, { "epoch": 6.13524337646334, "grad_norm": 0.044078536331653595, "learning_rate": 0.01, "loss": 1.9683, "step": 59745 }, { "epoch": 6.135551447935921, "grad_norm": 0.06196853518486023, "learning_rate": 0.01, "loss": 1.9875, "step": 59748 }, { "epoch": 6.135859519408503, "grad_norm": 0.05450844019651413, "learning_rate": 0.01, "loss": 1.9894, "step": 59751 }, { "epoch": 6.136167590881084, "grad_norm": 0.05260879918932915, "learning_rate": 0.01, "loss": 1.9602, "step": 59754 }, { "epoch": 6.136475662353666, "grad_norm": 0.04335511103272438, "learning_rate": 0.01, "loss": 1.9801, "step": 59757 }, { "epoch": 6.136783733826248, "grad_norm": 0.05219843611121178, "learning_rate": 0.01, "loss": 2.0052, "step": 59760 }, { "epoch": 6.137091805298829, "grad_norm": 0.06641986966133118, "learning_rate": 0.01, "loss": 2.021, "step": 59763 }, { "epoch": 6.137399876771411, "grad_norm": 0.10496216267347336, "learning_rate": 0.01, "loss": 2.001, "step": 59766 }, { "epoch": 6.1377079482439925, "grad_norm": 0.1305655688047409, "learning_rate": 0.01, "loss": 1.972, "step": 59769 }, { "epoch": 6.1380160197165745, "grad_norm": 0.06683500856161118, "learning_rate": 0.01, "loss": 1.9883, "step": 59772 }, { "epoch": 6.138324091189156, "grad_norm": 0.06884671747684479, "learning_rate": 0.01, "loss": 1.975, "step": 59775 }, { "epoch": 6.138632162661738, "grad_norm": 0.04899144545197487, "learning_rate": 0.01, "loss": 2.0069, "step": 59778 }, { "epoch": 6.138940234134319, "grad_norm": 0.061058346182107925, "learning_rate": 0.01, "loss": 1.9815, "step": 59781 }, { "epoch": 6.139248305606901, "grad_norm": 0.03729567304253578, "learning_rate": 0.01, "loss": 1.9593, "step": 59784 }, { "epoch": 6.139556377079482, "grad_norm": 0.1232062354683876, "learning_rate": 0.01, "loss": 2.0011, "step": 59787 }, { "epoch": 6.139864448552064, "grad_norm": 0.07398378103971481, "learning_rate": 0.01, "loss": 1.9824, "step": 59790 }, { "epoch": 6.140172520024645, "grad_norm": 0.06523609161376953, "learning_rate": 0.01, "loss": 1.9623, "step": 59793 }, { "epoch": 6.140480591497227, "grad_norm": 0.04748447984457016, "learning_rate": 0.01, "loss": 1.9731, "step": 59796 }, { "epoch": 6.140788662969809, "grad_norm": 0.10762014240026474, "learning_rate": 0.01, "loss": 2.0012, "step": 59799 }, { "epoch": 6.141096734442391, "grad_norm": 0.05152611434459686, "learning_rate": 0.01, "loss": 1.9775, "step": 59802 }, { "epoch": 6.141404805914973, "grad_norm": 0.03151378408074379, "learning_rate": 0.01, "loss": 1.9896, "step": 59805 }, { "epoch": 6.141712877387554, "grad_norm": 0.045448195189237595, "learning_rate": 0.01, "loss": 1.9635, "step": 59808 }, { "epoch": 6.142020948860136, "grad_norm": 0.03704848885536194, "learning_rate": 0.01, "loss": 1.9596, "step": 59811 }, { "epoch": 6.142329020332717, "grad_norm": 0.0592067614197731, "learning_rate": 0.01, "loss": 1.9417, "step": 59814 }, { "epoch": 6.142637091805299, "grad_norm": 0.11523483693599701, "learning_rate": 0.01, "loss": 1.9801, "step": 59817 }, { "epoch": 6.14294516327788, "grad_norm": 0.14963838458061218, "learning_rate": 0.01, "loss": 1.9822, "step": 59820 }, { "epoch": 6.143253234750462, "grad_norm": 0.08962684869766235, "learning_rate": 0.01, "loss": 1.9894, "step": 59823 }, { "epoch": 6.143561306223043, "grad_norm": 0.06388752162456512, "learning_rate": 0.01, "loss": 1.9935, "step": 59826 }, { "epoch": 6.1438693776956255, "grad_norm": 0.06632399559020996, "learning_rate": 0.01, "loss": 1.9724, "step": 59829 }, { "epoch": 6.144177449168207, "grad_norm": 0.08517513424158096, "learning_rate": 0.01, "loss": 1.988, "step": 59832 }, { "epoch": 6.144485520640789, "grad_norm": 0.05397310480475426, "learning_rate": 0.01, "loss": 1.9894, "step": 59835 }, { "epoch": 6.144793592113371, "grad_norm": 0.04470159485936165, "learning_rate": 0.01, "loss": 1.962, "step": 59838 }, { "epoch": 6.145101663585952, "grad_norm": 0.048731885850429535, "learning_rate": 0.01, "loss": 2.0055, "step": 59841 }, { "epoch": 6.145409735058534, "grad_norm": 0.13363894820213318, "learning_rate": 0.01, "loss": 1.9862, "step": 59844 }, { "epoch": 6.145717806531115, "grad_norm": 0.04807397723197937, "learning_rate": 0.01, "loss": 1.9891, "step": 59847 }, { "epoch": 6.146025878003697, "grad_norm": 0.05982908979058266, "learning_rate": 0.01, "loss": 1.993, "step": 59850 }, { "epoch": 6.146333949476278, "grad_norm": 0.0730501338839531, "learning_rate": 0.01, "loss": 1.9865, "step": 59853 }, { "epoch": 6.14664202094886, "grad_norm": 0.04355951398611069, "learning_rate": 0.01, "loss": 1.9638, "step": 59856 }, { "epoch": 6.1469500924214415, "grad_norm": 0.10257246345281601, "learning_rate": 0.01, "loss": 1.9685, "step": 59859 }, { "epoch": 6.147258163894024, "grad_norm": 0.0933048278093338, "learning_rate": 0.01, "loss": 1.9677, "step": 59862 }, { "epoch": 6.147566235366605, "grad_norm": 0.03633146733045578, "learning_rate": 0.01, "loss": 1.9681, "step": 59865 }, { "epoch": 6.147874306839187, "grad_norm": 0.07809139788150787, "learning_rate": 0.01, "loss": 1.9746, "step": 59868 }, { "epoch": 6.148182378311768, "grad_norm": 0.06432943791151047, "learning_rate": 0.01, "loss": 1.9611, "step": 59871 }, { "epoch": 6.14849044978435, "grad_norm": 0.04523482546210289, "learning_rate": 0.01, "loss": 1.9928, "step": 59874 }, { "epoch": 6.148798521256932, "grad_norm": 0.04328586533665657, "learning_rate": 0.01, "loss": 1.978, "step": 59877 }, { "epoch": 6.149106592729513, "grad_norm": 0.04336148872971535, "learning_rate": 0.01, "loss": 1.9849, "step": 59880 }, { "epoch": 6.149414664202095, "grad_norm": 0.11612840741872787, "learning_rate": 0.01, "loss": 1.9663, "step": 59883 }, { "epoch": 6.149722735674676, "grad_norm": 0.040377210825681686, "learning_rate": 0.01, "loss": 1.9995, "step": 59886 }, { "epoch": 6.1500308071472585, "grad_norm": 0.05374842882156372, "learning_rate": 0.01, "loss": 1.9995, "step": 59889 }, { "epoch": 6.15033887861984, "grad_norm": 0.042278483510017395, "learning_rate": 0.01, "loss": 1.9693, "step": 59892 }, { "epoch": 6.150646950092422, "grad_norm": 0.06057736650109291, "learning_rate": 0.01, "loss": 1.9718, "step": 59895 }, { "epoch": 6.150955021565003, "grad_norm": 0.07426716387271881, "learning_rate": 0.01, "loss": 1.9868, "step": 59898 }, { "epoch": 6.151263093037585, "grad_norm": 0.06555135548114777, "learning_rate": 0.01, "loss": 2.0028, "step": 59901 }, { "epoch": 6.151571164510166, "grad_norm": 0.031795963644981384, "learning_rate": 0.01, "loss": 1.9818, "step": 59904 }, { "epoch": 6.151879235982748, "grad_norm": 0.0928960070014, "learning_rate": 0.01, "loss": 1.9872, "step": 59907 }, { "epoch": 6.152187307455329, "grad_norm": 0.05229020118713379, "learning_rate": 0.01, "loss": 1.9801, "step": 59910 }, { "epoch": 6.152495378927911, "grad_norm": 0.05363921821117401, "learning_rate": 0.01, "loss": 1.9655, "step": 59913 }, { "epoch": 6.1528034504004925, "grad_norm": 0.05340690538287163, "learning_rate": 0.01, "loss": 1.9615, "step": 59916 }, { "epoch": 6.1531115218730745, "grad_norm": 0.04054640233516693, "learning_rate": 0.01, "loss": 1.9815, "step": 59919 }, { "epoch": 6.153419593345657, "grad_norm": 0.05909667909145355, "learning_rate": 0.01, "loss": 1.9926, "step": 59922 }, { "epoch": 6.153727664818238, "grad_norm": 0.04531288146972656, "learning_rate": 0.01, "loss": 1.9673, "step": 59925 }, { "epoch": 6.15403573629082, "grad_norm": 0.041250940412282944, "learning_rate": 0.01, "loss": 1.9777, "step": 59928 }, { "epoch": 6.154343807763401, "grad_norm": 0.042447373270988464, "learning_rate": 0.01, "loss": 1.9769, "step": 59931 }, { "epoch": 6.154651879235983, "grad_norm": 0.06417303532361984, "learning_rate": 0.01, "loss": 1.973, "step": 59934 }, { "epoch": 6.154959950708564, "grad_norm": 0.12572647631168365, "learning_rate": 0.01, "loss": 1.971, "step": 59937 }, { "epoch": 6.155268022181146, "grad_norm": 0.06711404025554657, "learning_rate": 0.01, "loss": 1.9693, "step": 59940 }, { "epoch": 6.155576093653727, "grad_norm": 0.09349125623703003, "learning_rate": 0.01, "loss": 1.9971, "step": 59943 }, { "epoch": 6.155884165126309, "grad_norm": 0.046295300126075745, "learning_rate": 0.01, "loss": 1.984, "step": 59946 }, { "epoch": 6.156192236598891, "grad_norm": 0.08443499356508255, "learning_rate": 0.01, "loss": 1.9803, "step": 59949 }, { "epoch": 6.156500308071473, "grad_norm": 0.048439864069223404, "learning_rate": 0.01, "loss": 2.0002, "step": 59952 }, { "epoch": 6.156808379544054, "grad_norm": 0.10180726647377014, "learning_rate": 0.01, "loss": 1.9799, "step": 59955 }, { "epoch": 6.157116451016636, "grad_norm": 0.03144579380750656, "learning_rate": 0.01, "loss": 2.0125, "step": 59958 }, { "epoch": 6.157424522489218, "grad_norm": 0.0712040439248085, "learning_rate": 0.01, "loss": 1.982, "step": 59961 }, { "epoch": 6.157732593961799, "grad_norm": 0.054967526346445084, "learning_rate": 0.01, "loss": 2.0012, "step": 59964 }, { "epoch": 6.158040665434381, "grad_norm": 0.04168044403195381, "learning_rate": 0.01, "loss": 1.9635, "step": 59967 }, { "epoch": 6.158348736906962, "grad_norm": 0.05962495878338814, "learning_rate": 0.01, "loss": 1.9924, "step": 59970 }, { "epoch": 6.158656808379544, "grad_norm": 0.11579214036464691, "learning_rate": 0.01, "loss": 2.0038, "step": 59973 }, { "epoch": 6.1589648798521255, "grad_norm": 0.03569880872964859, "learning_rate": 0.01, "loss": 1.9566, "step": 59976 }, { "epoch": 6.1592729513247075, "grad_norm": 0.09522563964128494, "learning_rate": 0.01, "loss": 2.0097, "step": 59979 }, { "epoch": 6.159581022797289, "grad_norm": 0.07732608169317245, "learning_rate": 0.01, "loss": 1.9811, "step": 59982 }, { "epoch": 6.159889094269871, "grad_norm": 0.057550933212041855, "learning_rate": 0.01, "loss": 1.9875, "step": 59985 }, { "epoch": 6.160197165742452, "grad_norm": 0.0942176878452301, "learning_rate": 0.01, "loss": 1.9938, "step": 59988 }, { "epoch": 6.160505237215034, "grad_norm": 0.0959196537733078, "learning_rate": 0.01, "loss": 1.9759, "step": 59991 }, { "epoch": 6.160813308687615, "grad_norm": 0.07278607040643692, "learning_rate": 0.01, "loss": 2.0074, "step": 59994 }, { "epoch": 6.161121380160197, "grad_norm": 0.05867347866296768, "learning_rate": 0.01, "loss": 1.9827, "step": 59997 }, { "epoch": 6.161429451632779, "grad_norm": 0.03255612030625343, "learning_rate": 0.01, "loss": 1.9843, "step": 60000 }, { "epoch": 6.16173752310536, "grad_norm": 0.056017834693193436, "learning_rate": 0.01, "loss": 1.9827, "step": 60003 }, { "epoch": 6.162045594577942, "grad_norm": 0.0943060889840126, "learning_rate": 0.01, "loss": 1.9873, "step": 60006 }, { "epoch": 6.162353666050524, "grad_norm": 0.08472935855388641, "learning_rate": 0.01, "loss": 2.004, "step": 60009 }, { "epoch": 6.162661737523106, "grad_norm": 0.053891006857156754, "learning_rate": 0.01, "loss": 1.9926, "step": 60012 }, { "epoch": 6.162969808995687, "grad_norm": 0.042883194983005524, "learning_rate": 0.01, "loss": 1.9827, "step": 60015 }, { "epoch": 6.163277880468269, "grad_norm": 0.05750950425863266, "learning_rate": 0.01, "loss": 1.9844, "step": 60018 }, { "epoch": 6.16358595194085, "grad_norm": 0.07917001843452454, "learning_rate": 0.01, "loss": 1.9833, "step": 60021 }, { "epoch": 6.163894023413432, "grad_norm": 0.06416572630405426, "learning_rate": 0.01, "loss": 1.9474, "step": 60024 }, { "epoch": 6.164202094886013, "grad_norm": 0.06587263941764832, "learning_rate": 0.01, "loss": 1.9872, "step": 60027 }, { "epoch": 6.164510166358595, "grad_norm": 0.09511373192071915, "learning_rate": 0.01, "loss": 2.0006, "step": 60030 }, { "epoch": 6.164818237831176, "grad_norm": 0.11690583825111389, "learning_rate": 0.01, "loss": 1.9966, "step": 60033 }, { "epoch": 6.1651263093037585, "grad_norm": 0.05916045978665352, "learning_rate": 0.01, "loss": 1.9648, "step": 60036 }, { "epoch": 6.1654343807763405, "grad_norm": 0.07679677754640579, "learning_rate": 0.01, "loss": 1.9894, "step": 60039 }, { "epoch": 6.165742452248922, "grad_norm": 0.05868346244096756, "learning_rate": 0.01, "loss": 2.0044, "step": 60042 }, { "epoch": 6.166050523721504, "grad_norm": 0.04336436092853546, "learning_rate": 0.01, "loss": 1.9956, "step": 60045 }, { "epoch": 6.166358595194085, "grad_norm": 0.053842004388570786, "learning_rate": 0.01, "loss": 1.9794, "step": 60048 }, { "epoch": 6.166666666666667, "grad_norm": 0.05783376097679138, "learning_rate": 0.01, "loss": 1.972, "step": 60051 }, { "epoch": 6.166974738139248, "grad_norm": 0.12786927819252014, "learning_rate": 0.01, "loss": 1.9734, "step": 60054 }, { "epoch": 6.16728280961183, "grad_norm": 0.10869529098272324, "learning_rate": 0.01, "loss": 2.0226, "step": 60057 }, { "epoch": 6.167590881084411, "grad_norm": 0.09978947043418884, "learning_rate": 0.01, "loss": 1.9633, "step": 60060 }, { "epoch": 6.167898952556993, "grad_norm": 0.06388422101736069, "learning_rate": 0.01, "loss": 1.9816, "step": 60063 }, { "epoch": 6.1682070240295745, "grad_norm": 0.05348409339785576, "learning_rate": 0.01, "loss": 1.9873, "step": 60066 }, { "epoch": 6.168515095502157, "grad_norm": 0.047684285789728165, "learning_rate": 0.01, "loss": 1.9827, "step": 60069 }, { "epoch": 6.168823166974738, "grad_norm": 0.031933482736349106, "learning_rate": 0.01, "loss": 1.9905, "step": 60072 }, { "epoch": 6.16913123844732, "grad_norm": 0.06409978866577148, "learning_rate": 0.01, "loss": 1.9719, "step": 60075 }, { "epoch": 6.169439309919902, "grad_norm": 0.07085554301738739, "learning_rate": 0.01, "loss": 1.9824, "step": 60078 }, { "epoch": 6.169747381392483, "grad_norm": 0.04958326742053032, "learning_rate": 0.01, "loss": 1.9736, "step": 60081 }, { "epoch": 6.170055452865065, "grad_norm": 0.037209220230579376, "learning_rate": 0.01, "loss": 1.9834, "step": 60084 }, { "epoch": 6.170363524337646, "grad_norm": 0.044104501605033875, "learning_rate": 0.01, "loss": 2.0076, "step": 60087 }, { "epoch": 6.170671595810228, "grad_norm": 0.06651319563388824, "learning_rate": 0.01, "loss": 1.9807, "step": 60090 }, { "epoch": 6.1709796672828094, "grad_norm": 0.15120342373847961, "learning_rate": 0.01, "loss": 1.9816, "step": 60093 }, { "epoch": 6.1712877387553915, "grad_norm": 0.17715202271938324, "learning_rate": 0.01, "loss": 2.015, "step": 60096 }, { "epoch": 6.171595810227973, "grad_norm": 0.07036132365465164, "learning_rate": 0.01, "loss": 2.0109, "step": 60099 }, { "epoch": 6.171903881700555, "grad_norm": 0.05515659227967262, "learning_rate": 0.01, "loss": 1.9692, "step": 60102 }, { "epoch": 6.172211953173136, "grad_norm": 0.054766733199357986, "learning_rate": 0.01, "loss": 1.9973, "step": 60105 }, { "epoch": 6.172520024645718, "grad_norm": 0.06652894616127014, "learning_rate": 0.01, "loss": 1.9999, "step": 60108 }, { "epoch": 6.172828096118299, "grad_norm": 0.05079234391450882, "learning_rate": 0.01, "loss": 1.9829, "step": 60111 }, { "epoch": 6.173136167590881, "grad_norm": 0.04592439532279968, "learning_rate": 0.01, "loss": 1.9934, "step": 60114 }, { "epoch": 6.173444239063462, "grad_norm": 0.03100336715579033, "learning_rate": 0.01, "loss": 1.9765, "step": 60117 }, { "epoch": 6.173752310536044, "grad_norm": 0.03455538675189018, "learning_rate": 0.01, "loss": 1.9661, "step": 60120 }, { "epoch": 6.174060382008626, "grad_norm": 0.04403119906783104, "learning_rate": 0.01, "loss": 1.9687, "step": 60123 }, { "epoch": 6.1743684534812076, "grad_norm": 0.1155620738863945, "learning_rate": 0.01, "loss": 2.0095, "step": 60126 }, { "epoch": 6.17467652495379, "grad_norm": 0.06684102863073349, "learning_rate": 0.01, "loss": 1.9644, "step": 60129 }, { "epoch": 6.174984596426371, "grad_norm": 0.04837580397725105, "learning_rate": 0.01, "loss": 1.9848, "step": 60132 }, { "epoch": 6.175292667898953, "grad_norm": 0.11638902872800827, "learning_rate": 0.01, "loss": 2.0013, "step": 60135 }, { "epoch": 6.175600739371534, "grad_norm": 0.06774953007698059, "learning_rate": 0.01, "loss": 1.9734, "step": 60138 }, { "epoch": 6.175908810844116, "grad_norm": 0.05518386512994766, "learning_rate": 0.01, "loss": 1.9865, "step": 60141 }, { "epoch": 6.176216882316697, "grad_norm": 0.03791549429297447, "learning_rate": 0.01, "loss": 1.9825, "step": 60144 }, { "epoch": 6.176524953789279, "grad_norm": 0.0599411316215992, "learning_rate": 0.01, "loss": 1.9722, "step": 60147 }, { "epoch": 6.17683302526186, "grad_norm": 0.08578135818243027, "learning_rate": 0.01, "loss": 1.9894, "step": 60150 }, { "epoch": 6.1771410967344424, "grad_norm": 0.04651505872607231, "learning_rate": 0.01, "loss": 1.9996, "step": 60153 }, { "epoch": 6.177449168207024, "grad_norm": 0.09119927883148193, "learning_rate": 0.01, "loss": 2.0062, "step": 60156 }, { "epoch": 6.177757239679606, "grad_norm": 0.12429966032505035, "learning_rate": 0.01, "loss": 1.9935, "step": 60159 }, { "epoch": 6.178065311152188, "grad_norm": 0.0350540392100811, "learning_rate": 0.01, "loss": 1.9885, "step": 60162 }, { "epoch": 6.178373382624769, "grad_norm": 0.06904268264770508, "learning_rate": 0.01, "loss": 1.9439, "step": 60165 }, { "epoch": 6.178681454097351, "grad_norm": 0.06134537607431412, "learning_rate": 0.01, "loss": 1.9955, "step": 60168 }, { "epoch": 6.178989525569932, "grad_norm": 0.07607719302177429, "learning_rate": 0.01, "loss": 2.0021, "step": 60171 }, { "epoch": 6.179297597042514, "grad_norm": 0.1023942083120346, "learning_rate": 0.01, "loss": 1.9637, "step": 60174 }, { "epoch": 6.179605668515095, "grad_norm": 0.04248664528131485, "learning_rate": 0.01, "loss": 1.9515, "step": 60177 }, { "epoch": 6.179913739987677, "grad_norm": 0.036357518285512924, "learning_rate": 0.01, "loss": 1.9509, "step": 60180 }, { "epoch": 6.1802218114602585, "grad_norm": 0.04085879400372505, "learning_rate": 0.01, "loss": 2.0125, "step": 60183 }, { "epoch": 6.180529882932841, "grad_norm": 0.05432041734457016, "learning_rate": 0.01, "loss": 1.9783, "step": 60186 }, { "epoch": 6.180837954405422, "grad_norm": 0.0373106487095356, "learning_rate": 0.01, "loss": 1.9559, "step": 60189 }, { "epoch": 6.181146025878004, "grad_norm": 0.13600775599479675, "learning_rate": 0.01, "loss": 2.0022, "step": 60192 }, { "epoch": 6.181454097350585, "grad_norm": 0.10217521339654922, "learning_rate": 0.01, "loss": 1.9617, "step": 60195 }, { "epoch": 6.181762168823167, "grad_norm": 0.11078688502311707, "learning_rate": 0.01, "loss": 1.9419, "step": 60198 }, { "epoch": 6.182070240295749, "grad_norm": 0.11216065287590027, "learning_rate": 0.01, "loss": 1.9614, "step": 60201 }, { "epoch": 6.18237831176833, "grad_norm": 0.07498262822628021, "learning_rate": 0.01, "loss": 2.004, "step": 60204 }, { "epoch": 6.182686383240912, "grad_norm": 0.0481102392077446, "learning_rate": 0.01, "loss": 1.9932, "step": 60207 }, { "epoch": 6.182994454713493, "grad_norm": 0.06563237309455872, "learning_rate": 0.01, "loss": 1.9979, "step": 60210 }, { "epoch": 6.1833025261860755, "grad_norm": 0.065886951982975, "learning_rate": 0.01, "loss": 1.9777, "step": 60213 }, { "epoch": 6.183610597658657, "grad_norm": 0.06081174686551094, "learning_rate": 0.01, "loss": 2.0077, "step": 60216 }, { "epoch": 6.183918669131239, "grad_norm": 0.040813907980918884, "learning_rate": 0.01, "loss": 1.9749, "step": 60219 }, { "epoch": 6.18422674060382, "grad_norm": 0.08460288494825363, "learning_rate": 0.01, "loss": 1.9912, "step": 60222 }, { "epoch": 6.184534812076402, "grad_norm": 0.1325504183769226, "learning_rate": 0.01, "loss": 1.9751, "step": 60225 }, { "epoch": 6.184842883548983, "grad_norm": 0.047543518245220184, "learning_rate": 0.01, "loss": 1.9975, "step": 60228 }, { "epoch": 6.185150955021565, "grad_norm": 0.09578222036361694, "learning_rate": 0.01, "loss": 1.9674, "step": 60231 }, { "epoch": 6.185459026494146, "grad_norm": 0.08167849481105804, "learning_rate": 0.01, "loss": 1.9771, "step": 60234 }, { "epoch": 6.185767097966728, "grad_norm": 0.04946918785572052, "learning_rate": 0.01, "loss": 1.9927, "step": 60237 }, { "epoch": 6.1860751694393095, "grad_norm": 0.08602156490087509, "learning_rate": 0.01, "loss": 1.9754, "step": 60240 }, { "epoch": 6.1863832409118915, "grad_norm": 0.07393812388181686, "learning_rate": 0.01, "loss": 1.974, "step": 60243 }, { "epoch": 6.186691312384474, "grad_norm": 0.04238250479102135, "learning_rate": 0.01, "loss": 1.9755, "step": 60246 }, { "epoch": 6.186999383857055, "grad_norm": 0.05332363396883011, "learning_rate": 0.01, "loss": 1.9832, "step": 60249 }, { "epoch": 6.187307455329637, "grad_norm": 0.048810552805662155, "learning_rate": 0.01, "loss": 1.9795, "step": 60252 }, { "epoch": 6.187615526802218, "grad_norm": 0.04000997915863991, "learning_rate": 0.01, "loss": 1.9742, "step": 60255 }, { "epoch": 6.1879235982748, "grad_norm": 0.07165291905403137, "learning_rate": 0.01, "loss": 2.0003, "step": 60258 }, { "epoch": 6.188231669747381, "grad_norm": 0.04843559488654137, "learning_rate": 0.01, "loss": 1.9686, "step": 60261 }, { "epoch": 6.188539741219963, "grad_norm": 0.048593342304229736, "learning_rate": 0.01, "loss": 1.9987, "step": 60264 }, { "epoch": 6.188847812692544, "grad_norm": 0.05250190943479538, "learning_rate": 0.01, "loss": 1.9647, "step": 60267 }, { "epoch": 6.189155884165126, "grad_norm": 0.09123876690864563, "learning_rate": 0.01, "loss": 1.9791, "step": 60270 }, { "epoch": 6.189463955637708, "grad_norm": 0.11671872437000275, "learning_rate": 0.01, "loss": 1.9752, "step": 60273 }, { "epoch": 6.18977202711029, "grad_norm": 0.048420678824186325, "learning_rate": 0.01, "loss": 1.9576, "step": 60276 }, { "epoch": 6.190080098582872, "grad_norm": 0.11455632746219635, "learning_rate": 0.01, "loss": 1.9589, "step": 60279 }, { "epoch": 6.190388170055453, "grad_norm": 0.08793000131845474, "learning_rate": 0.01, "loss": 1.9864, "step": 60282 }, { "epoch": 6.190696241528035, "grad_norm": 0.0460558645427227, "learning_rate": 0.01, "loss": 1.9712, "step": 60285 }, { "epoch": 6.191004313000616, "grad_norm": 0.0427497997879982, "learning_rate": 0.01, "loss": 1.9849, "step": 60288 }, { "epoch": 6.191312384473198, "grad_norm": 0.04136178269982338, "learning_rate": 0.01, "loss": 2.0002, "step": 60291 }, { "epoch": 6.191620455945779, "grad_norm": 0.06737589836120605, "learning_rate": 0.01, "loss": 1.9883, "step": 60294 }, { "epoch": 6.191928527418361, "grad_norm": 0.05001933127641678, "learning_rate": 0.01, "loss": 1.9759, "step": 60297 }, { "epoch": 6.1922365988909425, "grad_norm": 0.13523389399051666, "learning_rate": 0.01, "loss": 1.9871, "step": 60300 }, { "epoch": 6.1925446703635245, "grad_norm": 0.09118752926588058, "learning_rate": 0.01, "loss": 1.9865, "step": 60303 }, { "epoch": 6.192852741836106, "grad_norm": 0.061937130987644196, "learning_rate": 0.01, "loss": 1.9935, "step": 60306 }, { "epoch": 6.193160813308688, "grad_norm": 0.09417203068733215, "learning_rate": 0.01, "loss": 1.9688, "step": 60309 }, { "epoch": 6.193468884781269, "grad_norm": 0.1443474441766739, "learning_rate": 0.01, "loss": 1.9744, "step": 60312 }, { "epoch": 6.193776956253851, "grad_norm": 0.12889619171619415, "learning_rate": 0.01, "loss": 1.9847, "step": 60315 }, { "epoch": 6.194085027726432, "grad_norm": 0.08010325580835342, "learning_rate": 0.01, "loss": 1.9624, "step": 60318 }, { "epoch": 6.194393099199014, "grad_norm": 0.0863533690571785, "learning_rate": 0.01, "loss": 2.0098, "step": 60321 }, { "epoch": 6.194701170671596, "grad_norm": 0.0709909126162529, "learning_rate": 0.01, "loss": 1.9902, "step": 60324 }, { "epoch": 6.195009242144177, "grad_norm": 0.04236047342419624, "learning_rate": 0.01, "loss": 2.0178, "step": 60327 }, { "epoch": 6.195317313616759, "grad_norm": 0.06927076727151871, "learning_rate": 0.01, "loss": 2.0021, "step": 60330 }, { "epoch": 6.195625385089341, "grad_norm": 0.07250656932592392, "learning_rate": 0.01, "loss": 1.9854, "step": 60333 }, { "epoch": 6.195933456561923, "grad_norm": 0.06588046252727509, "learning_rate": 0.01, "loss": 1.9782, "step": 60336 }, { "epoch": 6.196241528034504, "grad_norm": 0.07093591243028641, "learning_rate": 0.01, "loss": 1.9851, "step": 60339 }, { "epoch": 6.196549599507086, "grad_norm": 0.10864640772342682, "learning_rate": 0.01, "loss": 1.979, "step": 60342 }, { "epoch": 6.196857670979667, "grad_norm": 0.12769301235675812, "learning_rate": 0.01, "loss": 1.9879, "step": 60345 }, { "epoch": 6.197165742452249, "grad_norm": 0.03568786382675171, "learning_rate": 0.01, "loss": 1.983, "step": 60348 }, { "epoch": 6.19747381392483, "grad_norm": 0.08632007986307144, "learning_rate": 0.01, "loss": 1.9598, "step": 60351 }, { "epoch": 6.197781885397412, "grad_norm": 0.047804027795791626, "learning_rate": 0.01, "loss": 1.995, "step": 60354 }, { "epoch": 6.198089956869993, "grad_norm": 0.08226168155670166, "learning_rate": 0.01, "loss": 1.979, "step": 60357 }, { "epoch": 6.1983980283425755, "grad_norm": 0.06796471774578094, "learning_rate": 0.01, "loss": 1.9944, "step": 60360 }, { "epoch": 6.1987060998151575, "grad_norm": 0.05324438586831093, "learning_rate": 0.01, "loss": 1.9974, "step": 60363 }, { "epoch": 6.199014171287739, "grad_norm": 0.05389709025621414, "learning_rate": 0.01, "loss": 1.9995, "step": 60366 }, { "epoch": 6.199322242760321, "grad_norm": 0.04287771135568619, "learning_rate": 0.01, "loss": 1.9836, "step": 60369 }, { "epoch": 6.199630314232902, "grad_norm": 0.06289256364107132, "learning_rate": 0.01, "loss": 1.9918, "step": 60372 }, { "epoch": 6.199938385705484, "grad_norm": 0.04109280928969383, "learning_rate": 0.01, "loss": 1.9742, "step": 60375 }, { "epoch": 6.200246457178065, "grad_norm": 0.13262084126472473, "learning_rate": 0.01, "loss": 1.9935, "step": 60378 }, { "epoch": 6.200554528650647, "grad_norm": 0.05487149953842163, "learning_rate": 0.01, "loss": 1.9792, "step": 60381 }, { "epoch": 6.200862600123228, "grad_norm": 0.08410999178886414, "learning_rate": 0.01, "loss": 1.9633, "step": 60384 }, { "epoch": 6.20117067159581, "grad_norm": 0.09868727624416351, "learning_rate": 0.01, "loss": 1.9959, "step": 60387 }, { "epoch": 6.2014787430683915, "grad_norm": 0.09402374923229218, "learning_rate": 0.01, "loss": 1.9723, "step": 60390 }, { "epoch": 6.201786814540974, "grad_norm": 0.09398900717496872, "learning_rate": 0.01, "loss": 1.968, "step": 60393 }, { "epoch": 6.202094886013555, "grad_norm": 0.07040904462337494, "learning_rate": 0.01, "loss": 1.9725, "step": 60396 }, { "epoch": 6.202402957486137, "grad_norm": 0.09048958867788315, "learning_rate": 0.01, "loss": 1.9861, "step": 60399 }, { "epoch": 6.202711028958719, "grad_norm": 0.08249911665916443, "learning_rate": 0.01, "loss": 1.9723, "step": 60402 }, { "epoch": 6.2030191004313, "grad_norm": 0.1375819742679596, "learning_rate": 0.01, "loss": 1.9723, "step": 60405 }, { "epoch": 6.203327171903882, "grad_norm": 0.12005623430013657, "learning_rate": 0.01, "loss": 1.9528, "step": 60408 }, { "epoch": 6.203635243376463, "grad_norm": 0.08359742164611816, "learning_rate": 0.01, "loss": 1.9895, "step": 60411 }, { "epoch": 6.203943314849045, "grad_norm": 0.08574346452951431, "learning_rate": 0.01, "loss": 1.9863, "step": 60414 }, { "epoch": 6.204251386321626, "grad_norm": 0.04601282626390457, "learning_rate": 0.01, "loss": 1.9718, "step": 60417 }, { "epoch": 6.2045594577942085, "grad_norm": 0.04463723674416542, "learning_rate": 0.01, "loss": 1.965, "step": 60420 }, { "epoch": 6.20486752926679, "grad_norm": 0.05424230918288231, "learning_rate": 0.01, "loss": 2.0251, "step": 60423 }, { "epoch": 6.205175600739372, "grad_norm": 0.0748244971036911, "learning_rate": 0.01, "loss": 1.9804, "step": 60426 }, { "epoch": 6.205483672211953, "grad_norm": 0.05594291165471077, "learning_rate": 0.01, "loss": 2.0024, "step": 60429 }, { "epoch": 6.205791743684535, "grad_norm": 0.03947063907980919, "learning_rate": 0.01, "loss": 1.9796, "step": 60432 }, { "epoch": 6.206099815157116, "grad_norm": 0.037693090736866, "learning_rate": 0.01, "loss": 1.9803, "step": 60435 }, { "epoch": 6.206407886629698, "grad_norm": 0.09147128462791443, "learning_rate": 0.01, "loss": 1.955, "step": 60438 }, { "epoch": 6.206715958102279, "grad_norm": 0.03525812551379204, "learning_rate": 0.01, "loss": 1.9847, "step": 60441 }, { "epoch": 6.207024029574861, "grad_norm": 0.0658460482954979, "learning_rate": 0.01, "loss": 1.9751, "step": 60444 }, { "epoch": 6.207332101047443, "grad_norm": 0.10629269480705261, "learning_rate": 0.01, "loss": 1.9983, "step": 60447 }, { "epoch": 6.2076401725200245, "grad_norm": 0.07760986685752869, "learning_rate": 0.01, "loss": 1.9858, "step": 60450 }, { "epoch": 6.207948243992607, "grad_norm": 0.0387595109641552, "learning_rate": 0.01, "loss": 1.9586, "step": 60453 }, { "epoch": 6.208256315465188, "grad_norm": 0.04037671163678169, "learning_rate": 0.01, "loss": 2.0052, "step": 60456 }, { "epoch": 6.20856438693777, "grad_norm": 0.049021679908037186, "learning_rate": 0.01, "loss": 1.9937, "step": 60459 }, { "epoch": 6.208872458410351, "grad_norm": 0.04115486890077591, "learning_rate": 0.01, "loss": 2.0025, "step": 60462 }, { "epoch": 6.209180529882933, "grad_norm": 0.06775692850351334, "learning_rate": 0.01, "loss": 1.9843, "step": 60465 }, { "epoch": 6.209488601355514, "grad_norm": 0.04244436323642731, "learning_rate": 0.01, "loss": 1.9982, "step": 60468 }, { "epoch": 6.209796672828096, "grad_norm": 0.046518050134181976, "learning_rate": 0.01, "loss": 1.9756, "step": 60471 }, { "epoch": 6.210104744300677, "grad_norm": 0.058811433613300323, "learning_rate": 0.01, "loss": 1.9948, "step": 60474 }, { "epoch": 6.210412815773259, "grad_norm": 0.08617477864027023, "learning_rate": 0.01, "loss": 1.9902, "step": 60477 }, { "epoch": 6.2107208872458415, "grad_norm": 0.1398414820432663, "learning_rate": 0.01, "loss": 1.9772, "step": 60480 }, { "epoch": 6.211028958718423, "grad_norm": 0.04411006346344948, "learning_rate": 0.01, "loss": 2.0152, "step": 60483 }, { "epoch": 6.211337030191005, "grad_norm": 0.05274950712919235, "learning_rate": 0.01, "loss": 1.9776, "step": 60486 }, { "epoch": 6.211645101663586, "grad_norm": 0.03863611817359924, "learning_rate": 0.01, "loss": 1.9826, "step": 60489 }, { "epoch": 6.211953173136168, "grad_norm": 0.04664864018559456, "learning_rate": 0.01, "loss": 1.9876, "step": 60492 }, { "epoch": 6.212261244608749, "grad_norm": 0.03843964263796806, "learning_rate": 0.01, "loss": 1.9772, "step": 60495 }, { "epoch": 6.212569316081331, "grad_norm": 0.02867240644991398, "learning_rate": 0.01, "loss": 1.9705, "step": 60498 }, { "epoch": 6.212877387553912, "grad_norm": 0.03859257698059082, "learning_rate": 0.01, "loss": 1.9635, "step": 60501 }, { "epoch": 6.213185459026494, "grad_norm": 0.04639716073870659, "learning_rate": 0.01, "loss": 2.0111, "step": 60504 }, { "epoch": 6.2134935304990755, "grad_norm": 0.05346795171499252, "learning_rate": 0.01, "loss": 1.9781, "step": 60507 }, { "epoch": 6.2138016019716575, "grad_norm": 0.09336802363395691, "learning_rate": 0.01, "loss": 2.0141, "step": 60510 }, { "epoch": 6.214109673444239, "grad_norm": 0.10028377920389175, "learning_rate": 0.01, "loss": 1.9978, "step": 60513 }, { "epoch": 6.214417744916821, "grad_norm": 0.04280233010649681, "learning_rate": 0.01, "loss": 2.0057, "step": 60516 }, { "epoch": 6.214725816389402, "grad_norm": 0.07395056635141373, "learning_rate": 0.01, "loss": 2.0094, "step": 60519 }, { "epoch": 6.215033887861984, "grad_norm": 0.08924415707588196, "learning_rate": 0.01, "loss": 1.9956, "step": 60522 }, { "epoch": 6.215341959334566, "grad_norm": 0.06368307024240494, "learning_rate": 0.01, "loss": 1.9676, "step": 60525 }, { "epoch": 6.215650030807147, "grad_norm": 0.07443444430828094, "learning_rate": 0.01, "loss": 2.0003, "step": 60528 }, { "epoch": 6.215958102279729, "grad_norm": 0.06193174049258232, "learning_rate": 0.01, "loss": 1.9948, "step": 60531 }, { "epoch": 6.21626617375231, "grad_norm": 0.05765648931264877, "learning_rate": 0.01, "loss": 1.9736, "step": 60534 }, { "epoch": 6.216574245224892, "grad_norm": 0.07838603854179382, "learning_rate": 0.01, "loss": 1.9695, "step": 60537 }, { "epoch": 6.216882316697474, "grad_norm": 0.0865786075592041, "learning_rate": 0.01, "loss": 1.9764, "step": 60540 }, { "epoch": 6.217190388170056, "grad_norm": 0.06937874108552933, "learning_rate": 0.01, "loss": 1.9574, "step": 60543 }, { "epoch": 6.217498459642637, "grad_norm": 0.04611770063638687, "learning_rate": 0.01, "loss": 1.9854, "step": 60546 }, { "epoch": 6.217806531115219, "grad_norm": 0.10261213779449463, "learning_rate": 0.01, "loss": 1.9841, "step": 60549 }, { "epoch": 6.2181146025878, "grad_norm": 0.06451880186796188, "learning_rate": 0.01, "loss": 1.9798, "step": 60552 }, { "epoch": 6.218422674060382, "grad_norm": 0.06459330767393112, "learning_rate": 0.01, "loss": 1.9723, "step": 60555 }, { "epoch": 6.218730745532963, "grad_norm": 0.06935889273881912, "learning_rate": 0.01, "loss": 1.9817, "step": 60558 }, { "epoch": 6.219038817005545, "grad_norm": 0.07407599687576294, "learning_rate": 0.01, "loss": 2.0052, "step": 60561 }, { "epoch": 6.219346888478127, "grad_norm": 0.09501077234745026, "learning_rate": 0.01, "loss": 1.9886, "step": 60564 }, { "epoch": 6.2196549599507085, "grad_norm": 0.0854761153459549, "learning_rate": 0.01, "loss": 1.9674, "step": 60567 }, { "epoch": 6.2199630314232905, "grad_norm": 0.052260614931583405, "learning_rate": 0.01, "loss": 1.9851, "step": 60570 }, { "epoch": 6.220271102895872, "grad_norm": 0.03612583130598068, "learning_rate": 0.01, "loss": 1.9695, "step": 60573 }, { "epoch": 6.220579174368454, "grad_norm": 0.04795749485492706, "learning_rate": 0.01, "loss": 1.9709, "step": 60576 }, { "epoch": 6.220887245841035, "grad_norm": 0.055898912250995636, "learning_rate": 0.01, "loss": 1.9725, "step": 60579 }, { "epoch": 6.221195317313617, "grad_norm": 0.056687548756599426, "learning_rate": 0.01, "loss": 1.9871, "step": 60582 }, { "epoch": 6.221503388786198, "grad_norm": 0.052614741027355194, "learning_rate": 0.01, "loss": 1.9953, "step": 60585 }, { "epoch": 6.22181146025878, "grad_norm": 0.08860262483358383, "learning_rate": 0.01, "loss": 1.9751, "step": 60588 }, { "epoch": 6.222119531731361, "grad_norm": 0.10606038570404053, "learning_rate": 0.01, "loss": 1.9732, "step": 60591 }, { "epoch": 6.222427603203943, "grad_norm": 0.05913405120372772, "learning_rate": 0.01, "loss": 1.9739, "step": 60594 }, { "epoch": 6.2227356746765246, "grad_norm": 0.05625823140144348, "learning_rate": 0.01, "loss": 1.9921, "step": 60597 }, { "epoch": 6.223043746149107, "grad_norm": 0.10503005236387253, "learning_rate": 0.01, "loss": 2.0125, "step": 60600 }, { "epoch": 6.223351817621689, "grad_norm": 0.06931479275226593, "learning_rate": 0.01, "loss": 1.979, "step": 60603 }, { "epoch": 6.22365988909427, "grad_norm": 0.07577981054782867, "learning_rate": 0.01, "loss": 1.9817, "step": 60606 }, { "epoch": 6.223967960566852, "grad_norm": 0.04593481123447418, "learning_rate": 0.01, "loss": 1.9587, "step": 60609 }, { "epoch": 6.224276032039433, "grad_norm": 0.040800996124744415, "learning_rate": 0.01, "loss": 1.976, "step": 60612 }, { "epoch": 6.224584103512015, "grad_norm": 0.035023946315050125, "learning_rate": 0.01, "loss": 1.9943, "step": 60615 }, { "epoch": 6.224892174984596, "grad_norm": 0.10616744309663773, "learning_rate": 0.01, "loss": 1.9685, "step": 60618 }, { "epoch": 6.225200246457178, "grad_norm": 0.06860567629337311, "learning_rate": 0.01, "loss": 1.9822, "step": 60621 }, { "epoch": 6.2255083179297594, "grad_norm": 0.06851069629192352, "learning_rate": 0.01, "loss": 1.9878, "step": 60624 }, { "epoch": 6.2258163894023415, "grad_norm": 0.06767469644546509, "learning_rate": 0.01, "loss": 1.9739, "step": 60627 }, { "epoch": 6.226124460874923, "grad_norm": 0.06542889773845673, "learning_rate": 0.01, "loss": 1.9793, "step": 60630 }, { "epoch": 6.226432532347505, "grad_norm": 0.0505717433989048, "learning_rate": 0.01, "loss": 1.9912, "step": 60633 }, { "epoch": 6.226740603820086, "grad_norm": 0.04307612031698227, "learning_rate": 0.01, "loss": 1.9811, "step": 60636 }, { "epoch": 6.227048675292668, "grad_norm": 0.12549397349357605, "learning_rate": 0.01, "loss": 1.9679, "step": 60639 }, { "epoch": 6.227356746765249, "grad_norm": 0.10073670744895935, "learning_rate": 0.01, "loss": 1.9931, "step": 60642 }, { "epoch": 6.227664818237831, "grad_norm": 0.042758628726005554, "learning_rate": 0.01, "loss": 1.9888, "step": 60645 }, { "epoch": 6.227972889710413, "grad_norm": 0.0379779078066349, "learning_rate": 0.01, "loss": 1.9946, "step": 60648 }, { "epoch": 6.228280961182994, "grad_norm": 0.05374911054968834, "learning_rate": 0.01, "loss": 1.9951, "step": 60651 }, { "epoch": 6.228589032655576, "grad_norm": 0.0438760407269001, "learning_rate": 0.01, "loss": 1.9998, "step": 60654 }, { "epoch": 6.228897104128158, "grad_norm": 0.05077781528234482, "learning_rate": 0.01, "loss": 2.0091, "step": 60657 }, { "epoch": 6.22920517560074, "grad_norm": 0.05097432807087898, "learning_rate": 0.01, "loss": 1.9708, "step": 60660 }, { "epoch": 6.229513247073321, "grad_norm": 0.061216987669467926, "learning_rate": 0.01, "loss": 1.9734, "step": 60663 }, { "epoch": 6.229821318545903, "grad_norm": 0.056796032935380936, "learning_rate": 0.01, "loss": 1.9859, "step": 60666 }, { "epoch": 6.230129390018484, "grad_norm": 0.04003208130598068, "learning_rate": 0.01, "loss": 1.9785, "step": 60669 }, { "epoch": 6.230437461491066, "grad_norm": 0.07271415740251541, "learning_rate": 0.01, "loss": 1.9655, "step": 60672 }, { "epoch": 6.230745532963647, "grad_norm": 0.07599439471960068, "learning_rate": 0.01, "loss": 1.9656, "step": 60675 }, { "epoch": 6.231053604436229, "grad_norm": 0.40395355224609375, "learning_rate": 0.01, "loss": 1.9841, "step": 60678 }, { "epoch": 6.231361675908811, "grad_norm": 0.10945183783769608, "learning_rate": 0.01, "loss": 2.0014, "step": 60681 }, { "epoch": 6.2316697473813925, "grad_norm": 0.10694218426942825, "learning_rate": 0.01, "loss": 1.9744, "step": 60684 }, { "epoch": 6.2319778188539745, "grad_norm": 0.05942070484161377, "learning_rate": 0.01, "loss": 2.0023, "step": 60687 }, { "epoch": 6.232285890326556, "grad_norm": 0.039438262581825256, "learning_rate": 0.01, "loss": 1.9747, "step": 60690 }, { "epoch": 6.232593961799138, "grad_norm": 0.042470790445804596, "learning_rate": 0.01, "loss": 1.9772, "step": 60693 }, { "epoch": 6.232902033271719, "grad_norm": 0.05736652761697769, "learning_rate": 0.01, "loss": 1.9556, "step": 60696 }, { "epoch": 6.233210104744301, "grad_norm": 0.03797580674290657, "learning_rate": 0.01, "loss": 1.9456, "step": 60699 }, { "epoch": 6.233518176216882, "grad_norm": 0.042780667543411255, "learning_rate": 0.01, "loss": 2.0001, "step": 60702 }, { "epoch": 6.233826247689464, "grad_norm": 0.04742565006017685, "learning_rate": 0.01, "loss": 1.9853, "step": 60705 }, { "epoch": 6.234134319162045, "grad_norm": 0.10806404799222946, "learning_rate": 0.01, "loss": 1.9911, "step": 60708 }, { "epoch": 6.234442390634627, "grad_norm": 0.09392410516738892, "learning_rate": 0.01, "loss": 1.9927, "step": 60711 }, { "epoch": 6.2347504621072085, "grad_norm": 0.06125201657414436, "learning_rate": 0.01, "loss": 1.996, "step": 60714 }, { "epoch": 6.235058533579791, "grad_norm": 0.08396472036838531, "learning_rate": 0.01, "loss": 1.9971, "step": 60717 }, { "epoch": 6.235366605052372, "grad_norm": 0.05900922417640686, "learning_rate": 0.01, "loss": 1.9767, "step": 60720 }, { "epoch": 6.235674676524954, "grad_norm": 0.10509319603443146, "learning_rate": 0.01, "loss": 2.003, "step": 60723 }, { "epoch": 6.235982747997536, "grad_norm": 0.13138900697231293, "learning_rate": 0.01, "loss": 1.9611, "step": 60726 }, { "epoch": 6.236290819470117, "grad_norm": 0.05676103010773659, "learning_rate": 0.01, "loss": 1.984, "step": 60729 }, { "epoch": 6.236598890942699, "grad_norm": 0.04155607149004936, "learning_rate": 0.01, "loss": 1.9766, "step": 60732 }, { "epoch": 6.23690696241528, "grad_norm": 0.03284106031060219, "learning_rate": 0.01, "loss": 1.9932, "step": 60735 }, { "epoch": 6.237215033887862, "grad_norm": 0.03673785179853439, "learning_rate": 0.01, "loss": 1.9973, "step": 60738 }, { "epoch": 6.237523105360443, "grad_norm": 0.04972091317176819, "learning_rate": 0.01, "loss": 1.9757, "step": 60741 }, { "epoch": 6.2378311768330255, "grad_norm": 0.06821681559085846, "learning_rate": 0.01, "loss": 1.987, "step": 60744 }, { "epoch": 6.238139248305607, "grad_norm": 0.14047065377235413, "learning_rate": 0.01, "loss": 1.9633, "step": 60747 }, { "epoch": 6.238447319778189, "grad_norm": 0.09423762559890747, "learning_rate": 0.01, "loss": 1.9788, "step": 60750 }, { "epoch": 6.23875539125077, "grad_norm": 0.08889120817184448, "learning_rate": 0.01, "loss": 1.9857, "step": 60753 }, { "epoch": 6.239063462723352, "grad_norm": 0.07696396857500076, "learning_rate": 0.01, "loss": 1.9761, "step": 60756 }, { "epoch": 6.239371534195933, "grad_norm": 0.13203272223472595, "learning_rate": 0.01, "loss": 1.9983, "step": 60759 }, { "epoch": 6.239679605668515, "grad_norm": 0.07637298852205276, "learning_rate": 0.01, "loss": 1.9643, "step": 60762 }, { "epoch": 6.239987677141097, "grad_norm": 0.035956770181655884, "learning_rate": 0.01, "loss": 1.9915, "step": 60765 }, { "epoch": 6.240295748613678, "grad_norm": 0.051339924335479736, "learning_rate": 0.01, "loss": 2.0075, "step": 60768 }, { "epoch": 6.24060382008626, "grad_norm": 0.05336092412471771, "learning_rate": 0.01, "loss": 1.9703, "step": 60771 }, { "epoch": 6.2409118915588415, "grad_norm": 0.061884574592113495, "learning_rate": 0.01, "loss": 1.9873, "step": 60774 }, { "epoch": 6.241219963031424, "grad_norm": 0.0763407051563263, "learning_rate": 0.01, "loss": 1.9792, "step": 60777 }, { "epoch": 6.241528034504005, "grad_norm": 0.06376196444034576, "learning_rate": 0.01, "loss": 1.9804, "step": 60780 }, { "epoch": 6.241836105976587, "grad_norm": 0.04951758310198784, "learning_rate": 0.01, "loss": 1.9824, "step": 60783 }, { "epoch": 6.242144177449168, "grad_norm": 0.03801729902625084, "learning_rate": 0.01, "loss": 1.9851, "step": 60786 }, { "epoch": 6.24245224892175, "grad_norm": 0.05235125496983528, "learning_rate": 0.01, "loss": 1.9879, "step": 60789 }, { "epoch": 6.242760320394331, "grad_norm": 0.09875135123729706, "learning_rate": 0.01, "loss": 1.9864, "step": 60792 }, { "epoch": 6.243068391866913, "grad_norm": 0.07917656749486923, "learning_rate": 0.01, "loss": 1.9791, "step": 60795 }, { "epoch": 6.243376463339494, "grad_norm": 0.09031876921653748, "learning_rate": 0.01, "loss": 1.9829, "step": 60798 }, { "epoch": 6.243684534812076, "grad_norm": 0.03974262252449989, "learning_rate": 0.01, "loss": 2.0007, "step": 60801 }, { "epoch": 6.2439926062846585, "grad_norm": 0.10321113467216492, "learning_rate": 0.01, "loss": 1.997, "step": 60804 }, { "epoch": 6.24430067775724, "grad_norm": 0.041257478296756744, "learning_rate": 0.01, "loss": 1.978, "step": 60807 }, { "epoch": 6.244608749229822, "grad_norm": 0.054699040949344635, "learning_rate": 0.01, "loss": 1.9478, "step": 60810 }, { "epoch": 6.244916820702403, "grad_norm": 0.06867776066064835, "learning_rate": 0.01, "loss": 1.9855, "step": 60813 }, { "epoch": 6.245224892174985, "grad_norm": 0.04628169536590576, "learning_rate": 0.01, "loss": 1.9868, "step": 60816 }, { "epoch": 6.245532963647566, "grad_norm": 0.04721110686659813, "learning_rate": 0.01, "loss": 1.981, "step": 60819 }, { "epoch": 6.245841035120148, "grad_norm": 0.1265910118818283, "learning_rate": 0.01, "loss": 1.9804, "step": 60822 }, { "epoch": 6.246149106592729, "grad_norm": 0.09225190430879593, "learning_rate": 0.01, "loss": 1.9928, "step": 60825 }, { "epoch": 6.246457178065311, "grad_norm": 0.06333420425653458, "learning_rate": 0.01, "loss": 1.9758, "step": 60828 }, { "epoch": 6.2467652495378925, "grad_norm": 0.06048038601875305, "learning_rate": 0.01, "loss": 1.9889, "step": 60831 }, { "epoch": 6.2470733210104745, "grad_norm": 0.032611507922410965, "learning_rate": 0.01, "loss": 1.9895, "step": 60834 }, { "epoch": 6.247381392483056, "grad_norm": 0.038066837936639786, "learning_rate": 0.01, "loss": 1.9914, "step": 60837 }, { "epoch": 6.247689463955638, "grad_norm": 0.0952901542186737, "learning_rate": 0.01, "loss": 1.979, "step": 60840 }, { "epoch": 6.247997535428219, "grad_norm": 0.051439084112644196, "learning_rate": 0.01, "loss": 1.9403, "step": 60843 }, { "epoch": 6.248305606900801, "grad_norm": 0.12137739360332489, "learning_rate": 0.01, "loss": 1.9708, "step": 60846 }, { "epoch": 6.248613678373383, "grad_norm": 0.04107406362891197, "learning_rate": 0.01, "loss": 1.9545, "step": 60849 }, { "epoch": 6.248921749845964, "grad_norm": 0.053470056504011154, "learning_rate": 0.01, "loss": 1.9892, "step": 60852 }, { "epoch": 6.249229821318546, "grad_norm": 0.10296319425106049, "learning_rate": 0.01, "loss": 2.0136, "step": 60855 }, { "epoch": 6.249537892791127, "grad_norm": 0.05375886335968971, "learning_rate": 0.01, "loss": 1.9957, "step": 60858 }, { "epoch": 6.249845964263709, "grad_norm": 0.04653402417898178, "learning_rate": 0.01, "loss": 1.9702, "step": 60861 }, { "epoch": 6.250154035736291, "grad_norm": 0.04048166796565056, "learning_rate": 0.01, "loss": 1.9928, "step": 60864 }, { "epoch": 6.250462107208873, "grad_norm": 0.04350602254271507, "learning_rate": 0.01, "loss": 1.9798, "step": 60867 }, { "epoch": 6.250770178681454, "grad_norm": 0.06153450533747673, "learning_rate": 0.01, "loss": 1.9806, "step": 60870 }, { "epoch": 6.251078250154036, "grad_norm": 0.05672883987426758, "learning_rate": 0.01, "loss": 1.9943, "step": 60873 }, { "epoch": 6.251386321626617, "grad_norm": 0.04813450202345848, "learning_rate": 0.01, "loss": 1.9887, "step": 60876 }, { "epoch": 6.251694393099199, "grad_norm": 0.03838599845767021, "learning_rate": 0.01, "loss": 2.0072, "step": 60879 }, { "epoch": 6.252002464571781, "grad_norm": 0.04607195779681206, "learning_rate": 0.01, "loss": 1.97, "step": 60882 }, { "epoch": 6.252310536044362, "grad_norm": 0.06564761698246002, "learning_rate": 0.01, "loss": 1.9766, "step": 60885 }, { "epoch": 6.252618607516944, "grad_norm": 0.0656748041510582, "learning_rate": 0.01, "loss": 1.9651, "step": 60888 }, { "epoch": 6.2529266789895255, "grad_norm": 0.09336625039577484, "learning_rate": 0.01, "loss": 1.9875, "step": 60891 }, { "epoch": 6.2532347504621075, "grad_norm": 0.08100081235170364, "learning_rate": 0.01, "loss": 1.9658, "step": 60894 }, { "epoch": 6.253542821934689, "grad_norm": 0.06392189860343933, "learning_rate": 0.01, "loss": 1.975, "step": 60897 }, { "epoch": 6.253850893407271, "grad_norm": 0.11884801834821701, "learning_rate": 0.01, "loss": 1.9931, "step": 60900 }, { "epoch": 6.254158964879852, "grad_norm": 0.08046597987413406, "learning_rate": 0.01, "loss": 1.9856, "step": 60903 }, { "epoch": 6.254467036352434, "grad_norm": 0.03278736025094986, "learning_rate": 0.01, "loss": 1.966, "step": 60906 }, { "epoch": 6.254775107825015, "grad_norm": 0.043066561222076416, "learning_rate": 0.01, "loss": 1.9956, "step": 60909 }, { "epoch": 6.255083179297597, "grad_norm": 0.08042460680007935, "learning_rate": 0.01, "loss": 1.9978, "step": 60912 }, { "epoch": 6.255391250770178, "grad_norm": 0.05132247880101204, "learning_rate": 0.01, "loss": 1.9898, "step": 60915 }, { "epoch": 6.25569932224276, "grad_norm": 0.04719965532422066, "learning_rate": 0.01, "loss": 1.9818, "step": 60918 }, { "epoch": 6.2560073937153415, "grad_norm": 0.08247886598110199, "learning_rate": 0.01, "loss": 1.9532, "step": 60921 }, { "epoch": 6.256315465187924, "grad_norm": 0.0856601819396019, "learning_rate": 0.01, "loss": 2.0051, "step": 60924 }, { "epoch": 6.256623536660506, "grad_norm": 0.06547616422176361, "learning_rate": 0.01, "loss": 1.9799, "step": 60927 }, { "epoch": 6.256931608133087, "grad_norm": 0.04583069309592247, "learning_rate": 0.01, "loss": 1.9743, "step": 60930 }, { "epoch": 6.257239679605669, "grad_norm": 0.1286034882068634, "learning_rate": 0.01, "loss": 2.0131, "step": 60933 }, { "epoch": 6.25754775107825, "grad_norm": 0.07549592107534409, "learning_rate": 0.01, "loss": 1.9873, "step": 60936 }, { "epoch": 6.257855822550832, "grad_norm": 0.06334639340639114, "learning_rate": 0.01, "loss": 1.9611, "step": 60939 }, { "epoch": 6.258163894023413, "grad_norm": 0.050806958228349686, "learning_rate": 0.01, "loss": 1.9642, "step": 60942 }, { "epoch": 6.258471965495995, "grad_norm": 0.03487928584218025, "learning_rate": 0.01, "loss": 1.9887, "step": 60945 }, { "epoch": 6.258780036968576, "grad_norm": 0.03992629423737526, "learning_rate": 0.01, "loss": 1.9632, "step": 60948 }, { "epoch": 6.2590881084411585, "grad_norm": 0.06908503919839859, "learning_rate": 0.01, "loss": 1.9727, "step": 60951 }, { "epoch": 6.25939617991374, "grad_norm": 0.10641307383775711, "learning_rate": 0.01, "loss": 1.9681, "step": 60954 }, { "epoch": 6.259704251386322, "grad_norm": 0.05809535086154938, "learning_rate": 0.01, "loss": 1.966, "step": 60957 }, { "epoch": 6.260012322858903, "grad_norm": 0.1139754205942154, "learning_rate": 0.01, "loss": 1.9958, "step": 60960 }, { "epoch": 6.260320394331485, "grad_norm": 0.1501069813966751, "learning_rate": 0.01, "loss": 1.9962, "step": 60963 }, { "epoch": 6.260628465804067, "grad_norm": 0.10279611498117447, "learning_rate": 0.01, "loss": 1.9849, "step": 60966 }, { "epoch": 6.260936537276648, "grad_norm": 0.04817177355289459, "learning_rate": 0.01, "loss": 1.9908, "step": 60969 }, { "epoch": 6.26124460874923, "grad_norm": 0.036743566393852234, "learning_rate": 0.01, "loss": 1.9561, "step": 60972 }, { "epoch": 6.261552680221811, "grad_norm": 0.041490793228149414, "learning_rate": 0.01, "loss": 1.9838, "step": 60975 }, { "epoch": 6.261860751694393, "grad_norm": 0.034102845937013626, "learning_rate": 0.01, "loss": 1.9866, "step": 60978 }, { "epoch": 6.2621688231669745, "grad_norm": 0.04929140582680702, "learning_rate": 0.01, "loss": 1.9571, "step": 60981 }, { "epoch": 6.262476894639557, "grad_norm": 0.1550266444683075, "learning_rate": 0.01, "loss": 1.9877, "step": 60984 }, { "epoch": 6.262784966112138, "grad_norm": 0.1096281111240387, "learning_rate": 0.01, "loss": 1.9832, "step": 60987 }, { "epoch": 6.26309303758472, "grad_norm": 0.057930875569581985, "learning_rate": 0.01, "loss": 1.9795, "step": 60990 }, { "epoch": 6.263401109057301, "grad_norm": 0.04393153637647629, "learning_rate": 0.01, "loss": 1.9468, "step": 60993 }, { "epoch": 6.263709180529883, "grad_norm": 0.039023157209157944, "learning_rate": 0.01, "loss": 1.9739, "step": 60996 }, { "epoch": 6.264017252002464, "grad_norm": 0.05385271832346916, "learning_rate": 0.01, "loss": 1.9809, "step": 60999 }, { "epoch": 6.264325323475046, "grad_norm": 0.11487401276826859, "learning_rate": 0.01, "loss": 1.9636, "step": 61002 }, { "epoch": 6.264633394947628, "grad_norm": 0.12395527213811874, "learning_rate": 0.01, "loss": 1.9657, "step": 61005 }, { "epoch": 6.264941466420209, "grad_norm": 0.05898820608854294, "learning_rate": 0.01, "loss": 1.9912, "step": 61008 }, { "epoch": 6.2652495378927915, "grad_norm": 0.06743215769529343, "learning_rate": 0.01, "loss": 1.9511, "step": 61011 }, { "epoch": 6.265557609365373, "grad_norm": 0.04019298404455185, "learning_rate": 0.01, "loss": 1.9813, "step": 61014 }, { "epoch": 6.265865680837955, "grad_norm": 0.053353726863861084, "learning_rate": 0.01, "loss": 1.9869, "step": 61017 }, { "epoch": 6.266173752310536, "grad_norm": 0.05660318583250046, "learning_rate": 0.01, "loss": 1.9907, "step": 61020 }, { "epoch": 6.266481823783118, "grad_norm": 0.08507215976715088, "learning_rate": 0.01, "loss": 2.0229, "step": 61023 }, { "epoch": 6.266789895255699, "grad_norm": 0.10339481383562088, "learning_rate": 0.01, "loss": 1.9907, "step": 61026 }, { "epoch": 6.267097966728281, "grad_norm": 0.06297086924314499, "learning_rate": 0.01, "loss": 1.9701, "step": 61029 }, { "epoch": 6.267406038200862, "grad_norm": 0.03891312703490257, "learning_rate": 0.01, "loss": 1.9829, "step": 61032 }, { "epoch": 6.267714109673444, "grad_norm": 0.05895547196269035, "learning_rate": 0.01, "loss": 1.984, "step": 61035 }, { "epoch": 6.2680221811460255, "grad_norm": 0.058469437062740326, "learning_rate": 0.01, "loss": 1.9852, "step": 61038 }, { "epoch": 6.2683302526186075, "grad_norm": 0.04402352124452591, "learning_rate": 0.01, "loss": 1.9456, "step": 61041 }, { "epoch": 6.268638324091189, "grad_norm": 0.07434606552124023, "learning_rate": 0.01, "loss": 1.9757, "step": 61044 }, { "epoch": 6.268946395563771, "grad_norm": 0.07005775719881058, "learning_rate": 0.01, "loss": 1.9783, "step": 61047 }, { "epoch": 6.269254467036353, "grad_norm": 0.08956246078014374, "learning_rate": 0.01, "loss": 1.9753, "step": 61050 }, { "epoch": 6.269562538508934, "grad_norm": 0.04520926997065544, "learning_rate": 0.01, "loss": 1.9769, "step": 61053 }, { "epoch": 6.269870609981516, "grad_norm": 0.09920626133680344, "learning_rate": 0.01, "loss": 1.9738, "step": 61056 }, { "epoch": 6.270178681454097, "grad_norm": 0.10885010659694672, "learning_rate": 0.01, "loss": 1.9978, "step": 61059 }, { "epoch": 6.270486752926679, "grad_norm": 0.0662059485912323, "learning_rate": 0.01, "loss": 1.9751, "step": 61062 }, { "epoch": 6.27079482439926, "grad_norm": 0.09873577207326889, "learning_rate": 0.01, "loss": 1.9955, "step": 61065 }, { "epoch": 6.271102895871842, "grad_norm": 0.0979217141866684, "learning_rate": 0.01, "loss": 1.9631, "step": 61068 }, { "epoch": 6.271410967344424, "grad_norm": 0.06409544497728348, "learning_rate": 0.01, "loss": 1.9676, "step": 61071 }, { "epoch": 6.271719038817006, "grad_norm": 0.05898788571357727, "learning_rate": 0.01, "loss": 1.9755, "step": 61074 }, { "epoch": 6.272027110289587, "grad_norm": 0.048941727727651596, "learning_rate": 0.01, "loss": 2.0042, "step": 61077 }, { "epoch": 6.272335181762169, "grad_norm": 0.04335403069853783, "learning_rate": 0.01, "loss": 1.9931, "step": 61080 }, { "epoch": 6.272643253234751, "grad_norm": 0.08291465789079666, "learning_rate": 0.01, "loss": 1.9936, "step": 61083 }, { "epoch": 6.272951324707332, "grad_norm": 0.09848134219646454, "learning_rate": 0.01, "loss": 1.9843, "step": 61086 }, { "epoch": 6.273259396179914, "grad_norm": 0.10949241369962692, "learning_rate": 0.01, "loss": 1.9686, "step": 61089 }, { "epoch": 6.273567467652495, "grad_norm": 0.07039642333984375, "learning_rate": 0.01, "loss": 1.9971, "step": 61092 }, { "epoch": 6.273875539125077, "grad_norm": 0.0475379079580307, "learning_rate": 0.01, "loss": 2.0068, "step": 61095 }, { "epoch": 6.2741836105976585, "grad_norm": 0.07531355321407318, "learning_rate": 0.01, "loss": 1.99, "step": 61098 }, { "epoch": 6.2744916820702406, "grad_norm": 0.04126337915658951, "learning_rate": 0.01, "loss": 1.9788, "step": 61101 }, { "epoch": 6.274799753542822, "grad_norm": 0.03969768062233925, "learning_rate": 0.01, "loss": 1.9955, "step": 61104 }, { "epoch": 6.275107825015404, "grad_norm": 0.06768359243869781, "learning_rate": 0.01, "loss": 1.9693, "step": 61107 }, { "epoch": 6.275415896487985, "grad_norm": 0.10613830387592316, "learning_rate": 0.01, "loss": 1.956, "step": 61110 }, { "epoch": 6.275723967960567, "grad_norm": 0.09580209851264954, "learning_rate": 0.01, "loss": 1.9867, "step": 61113 }, { "epoch": 6.276032039433148, "grad_norm": 0.06177694723010063, "learning_rate": 0.01, "loss": 1.9846, "step": 61116 }, { "epoch": 6.27634011090573, "grad_norm": 0.10451437532901764, "learning_rate": 0.01, "loss": 1.9762, "step": 61119 }, { "epoch": 6.276648182378311, "grad_norm": 0.1352027803659439, "learning_rate": 0.01, "loss": 1.993, "step": 61122 }, { "epoch": 6.276956253850893, "grad_norm": 0.0892416313290596, "learning_rate": 0.01, "loss": 1.9876, "step": 61125 }, { "epoch": 6.2772643253234754, "grad_norm": 0.04253193363547325, "learning_rate": 0.01, "loss": 1.9932, "step": 61128 }, { "epoch": 6.277572396796057, "grad_norm": 0.03616737946867943, "learning_rate": 0.01, "loss": 1.9522, "step": 61131 }, { "epoch": 6.277880468268639, "grad_norm": 0.09631824493408203, "learning_rate": 0.01, "loss": 1.9932, "step": 61134 }, { "epoch": 6.27818853974122, "grad_norm": 0.08627616614103317, "learning_rate": 0.01, "loss": 1.9745, "step": 61137 }, { "epoch": 6.278496611213802, "grad_norm": 0.07595238834619522, "learning_rate": 0.01, "loss": 1.9912, "step": 61140 }, { "epoch": 6.278804682686383, "grad_norm": 0.05115518346428871, "learning_rate": 0.01, "loss": 1.9808, "step": 61143 }, { "epoch": 6.279112754158965, "grad_norm": 0.07910732924938202, "learning_rate": 0.01, "loss": 1.9566, "step": 61146 }, { "epoch": 6.279420825631546, "grad_norm": 0.07935528457164764, "learning_rate": 0.01, "loss": 1.9893, "step": 61149 }, { "epoch": 6.279728897104128, "grad_norm": 0.09080187976360321, "learning_rate": 0.01, "loss": 1.9684, "step": 61152 }, { "epoch": 6.2800369685767095, "grad_norm": 0.07743295282125473, "learning_rate": 0.01, "loss": 1.9616, "step": 61155 }, { "epoch": 6.2803450400492915, "grad_norm": 0.07669807970523834, "learning_rate": 0.01, "loss": 1.9813, "step": 61158 }, { "epoch": 6.280653111521873, "grad_norm": 0.09190505743026733, "learning_rate": 0.01, "loss": 1.954, "step": 61161 }, { "epoch": 6.280961182994455, "grad_norm": 0.052665840834379196, "learning_rate": 0.01, "loss": 1.998, "step": 61164 }, { "epoch": 6.281269254467037, "grad_norm": 0.05133817344903946, "learning_rate": 0.01, "loss": 1.9841, "step": 61167 }, { "epoch": 6.281577325939618, "grad_norm": 0.038431648164987564, "learning_rate": 0.01, "loss": 1.9788, "step": 61170 }, { "epoch": 6.2818853974122, "grad_norm": 0.057378388941287994, "learning_rate": 0.01, "loss": 1.9623, "step": 61173 }, { "epoch": 6.282193468884781, "grad_norm": 0.08355475962162018, "learning_rate": 0.01, "loss": 1.9952, "step": 61176 }, { "epoch": 6.282501540357363, "grad_norm": 0.07666066288948059, "learning_rate": 0.01, "loss": 2.0007, "step": 61179 }, { "epoch": 6.282809611829944, "grad_norm": 0.12239743769168854, "learning_rate": 0.01, "loss": 1.9792, "step": 61182 }, { "epoch": 6.283117683302526, "grad_norm": 0.10445159673690796, "learning_rate": 0.01, "loss": 1.9845, "step": 61185 }, { "epoch": 6.283425754775108, "grad_norm": 0.04401460289955139, "learning_rate": 0.01, "loss": 1.9758, "step": 61188 }, { "epoch": 6.28373382624769, "grad_norm": 0.08247647434473038, "learning_rate": 0.01, "loss": 1.9774, "step": 61191 }, { "epoch": 6.284041897720271, "grad_norm": 0.06200701370835304, "learning_rate": 0.01, "loss": 1.9557, "step": 61194 }, { "epoch": 6.284349969192853, "grad_norm": 0.04233565926551819, "learning_rate": 0.01, "loss": 1.9883, "step": 61197 }, { "epoch": 6.284658040665434, "grad_norm": 0.09187982976436615, "learning_rate": 0.01, "loss": 1.9991, "step": 61200 }, { "epoch": 6.284966112138016, "grad_norm": 0.03373527526855469, "learning_rate": 0.01, "loss": 1.9599, "step": 61203 }, { "epoch": 6.285274183610598, "grad_norm": 0.09450080990791321, "learning_rate": 0.01, "loss": 1.9756, "step": 61206 }, { "epoch": 6.285582255083179, "grad_norm": 0.048632752150297165, "learning_rate": 0.01, "loss": 1.9823, "step": 61209 }, { "epoch": 6.285890326555761, "grad_norm": 0.11564646661281586, "learning_rate": 0.01, "loss": 1.9859, "step": 61212 }, { "epoch": 6.2861983980283425, "grad_norm": 0.04203884303569794, "learning_rate": 0.01, "loss": 1.9946, "step": 61215 }, { "epoch": 6.2865064695009245, "grad_norm": 0.08716481178998947, "learning_rate": 0.01, "loss": 1.9774, "step": 61218 }, { "epoch": 6.286814540973506, "grad_norm": 0.07940181344747543, "learning_rate": 0.01, "loss": 1.9667, "step": 61221 }, { "epoch": 6.287122612446088, "grad_norm": 0.06720132380723953, "learning_rate": 0.01, "loss": 1.9655, "step": 61224 }, { "epoch": 6.287430683918669, "grad_norm": 0.05566677451133728, "learning_rate": 0.01, "loss": 1.9849, "step": 61227 }, { "epoch": 6.287738755391251, "grad_norm": 0.09876339137554169, "learning_rate": 0.01, "loss": 1.9709, "step": 61230 }, { "epoch": 6.288046826863832, "grad_norm": 0.054778262972831726, "learning_rate": 0.01, "loss": 1.9652, "step": 61233 }, { "epoch": 6.288354898336414, "grad_norm": 0.06283058226108551, "learning_rate": 0.01, "loss": 1.9678, "step": 61236 }, { "epoch": 6.288662969808995, "grad_norm": 0.054917361587285995, "learning_rate": 0.01, "loss": 2.0248, "step": 61239 }, { "epoch": 6.288971041281577, "grad_norm": 0.10483105480670929, "learning_rate": 0.01, "loss": 1.9816, "step": 61242 }, { "epoch": 6.2892791127541585, "grad_norm": 0.07183388620615005, "learning_rate": 0.01, "loss": 1.9869, "step": 61245 }, { "epoch": 6.289587184226741, "grad_norm": 0.12913282215595245, "learning_rate": 0.01, "loss": 1.9802, "step": 61248 }, { "epoch": 6.289895255699323, "grad_norm": 0.1015811562538147, "learning_rate": 0.01, "loss": 1.992, "step": 61251 }, { "epoch": 6.290203327171904, "grad_norm": 0.09693120419979095, "learning_rate": 0.01, "loss": 1.9711, "step": 61254 }, { "epoch": 6.290511398644486, "grad_norm": 0.05746886506676674, "learning_rate": 0.01, "loss": 1.9914, "step": 61257 }, { "epoch": 6.290819470117067, "grad_norm": 0.07819090038537979, "learning_rate": 0.01, "loss": 1.9485, "step": 61260 }, { "epoch": 6.291127541589649, "grad_norm": 0.039794620126485825, "learning_rate": 0.01, "loss": 1.9623, "step": 61263 }, { "epoch": 6.29143561306223, "grad_norm": 0.06474246084690094, "learning_rate": 0.01, "loss": 1.9886, "step": 61266 }, { "epoch": 6.291743684534812, "grad_norm": 0.05018305405974388, "learning_rate": 0.01, "loss": 1.9792, "step": 61269 }, { "epoch": 6.292051756007393, "grad_norm": 0.10312799364328384, "learning_rate": 0.01, "loss": 1.9678, "step": 61272 }, { "epoch": 6.2923598274799755, "grad_norm": 0.0430091917514801, "learning_rate": 0.01, "loss": 1.9882, "step": 61275 }, { "epoch": 6.292667898952557, "grad_norm": 0.08931942284107208, "learning_rate": 0.01, "loss": 1.9937, "step": 61278 }, { "epoch": 6.292975970425139, "grad_norm": 0.09583033621311188, "learning_rate": 0.01, "loss": 1.98, "step": 61281 }, { "epoch": 6.293284041897721, "grad_norm": 0.048897985368967056, "learning_rate": 0.01, "loss": 1.9996, "step": 61284 }, { "epoch": 6.293592113370302, "grad_norm": 0.07638606429100037, "learning_rate": 0.01, "loss": 1.968, "step": 61287 }, { "epoch": 6.293900184842884, "grad_norm": 0.07323414832353592, "learning_rate": 0.01, "loss": 1.9631, "step": 61290 }, { "epoch": 6.294208256315465, "grad_norm": 0.08963891863822937, "learning_rate": 0.01, "loss": 2.002, "step": 61293 }, { "epoch": 6.294516327788047, "grad_norm": 0.047504641115665436, "learning_rate": 0.01, "loss": 1.962, "step": 61296 }, { "epoch": 6.294824399260628, "grad_norm": 0.04849178344011307, "learning_rate": 0.01, "loss": 1.983, "step": 61299 }, { "epoch": 6.29513247073321, "grad_norm": 0.06779339909553528, "learning_rate": 0.01, "loss": 1.9797, "step": 61302 }, { "epoch": 6.2954405422057915, "grad_norm": 0.03595037758350372, "learning_rate": 0.01, "loss": 1.9868, "step": 61305 }, { "epoch": 6.295748613678374, "grad_norm": 0.04232453927397728, "learning_rate": 0.01, "loss": 1.9948, "step": 61308 }, { "epoch": 6.296056685150955, "grad_norm": 0.03607799485325813, "learning_rate": 0.01, "loss": 1.9924, "step": 61311 }, { "epoch": 6.296364756623537, "grad_norm": 0.07026591151952744, "learning_rate": 0.01, "loss": 1.9812, "step": 61314 }, { "epoch": 6.296672828096118, "grad_norm": 0.09154605120420456, "learning_rate": 0.01, "loss": 1.9743, "step": 61317 }, { "epoch": 6.2969808995687, "grad_norm": 0.0732119083404541, "learning_rate": 0.01, "loss": 2.0005, "step": 61320 }, { "epoch": 6.297288971041281, "grad_norm": 0.12241560965776443, "learning_rate": 0.01, "loss": 1.9862, "step": 61323 }, { "epoch": 6.297597042513863, "grad_norm": 0.04552261903882027, "learning_rate": 0.01, "loss": 1.9819, "step": 61326 }, { "epoch": 6.297905113986445, "grad_norm": 0.10219983011484146, "learning_rate": 0.01, "loss": 1.9771, "step": 61329 }, { "epoch": 6.298213185459026, "grad_norm": 0.09614024311304092, "learning_rate": 0.01, "loss": 1.9891, "step": 61332 }, { "epoch": 6.2985212569316085, "grad_norm": 0.049760278314352036, "learning_rate": 0.01, "loss": 1.9792, "step": 61335 }, { "epoch": 6.29882932840419, "grad_norm": 0.11046284437179565, "learning_rate": 0.01, "loss": 1.9936, "step": 61338 }, { "epoch": 6.299137399876772, "grad_norm": 0.045198407024145126, "learning_rate": 0.01, "loss": 1.9717, "step": 61341 }, { "epoch": 6.299445471349353, "grad_norm": 0.0499146468937397, "learning_rate": 0.01, "loss": 1.9844, "step": 61344 }, { "epoch": 6.299753542821935, "grad_norm": 0.0700070783495903, "learning_rate": 0.01, "loss": 1.9667, "step": 61347 }, { "epoch": 6.300061614294516, "grad_norm": 0.042327769100666046, "learning_rate": 0.01, "loss": 1.9827, "step": 61350 }, { "epoch": 6.300369685767098, "grad_norm": 0.042714498937129974, "learning_rate": 0.01, "loss": 1.98, "step": 61353 }, { "epoch": 6.300677757239679, "grad_norm": 0.06567323952913284, "learning_rate": 0.01, "loss": 1.9802, "step": 61356 }, { "epoch": 6.300985828712261, "grad_norm": 0.07993308454751968, "learning_rate": 0.01, "loss": 1.9802, "step": 61359 }, { "epoch": 6.3012939001848425, "grad_norm": 0.08032864332199097, "learning_rate": 0.01, "loss": 2.0013, "step": 61362 }, { "epoch": 6.3016019716574245, "grad_norm": 0.05398751050233841, "learning_rate": 0.01, "loss": 1.9286, "step": 61365 }, { "epoch": 6.301910043130006, "grad_norm": 0.04342971369624138, "learning_rate": 0.01, "loss": 1.9741, "step": 61368 }, { "epoch": 6.302218114602588, "grad_norm": 0.0811370387673378, "learning_rate": 0.01, "loss": 1.9712, "step": 61371 }, { "epoch": 6.30252618607517, "grad_norm": 0.09355277568101883, "learning_rate": 0.01, "loss": 2.0003, "step": 61374 }, { "epoch": 6.302834257547751, "grad_norm": 0.06557541340589523, "learning_rate": 0.01, "loss": 1.9891, "step": 61377 }, { "epoch": 6.303142329020333, "grad_norm": 0.038711193948984146, "learning_rate": 0.01, "loss": 1.9859, "step": 61380 }, { "epoch": 6.303450400492914, "grad_norm": 0.04032447189092636, "learning_rate": 0.01, "loss": 1.9781, "step": 61383 }, { "epoch": 6.303758471965496, "grad_norm": 0.03779391944408417, "learning_rate": 0.01, "loss": 1.9768, "step": 61386 }, { "epoch": 6.304066543438077, "grad_norm": 0.054389867931604385, "learning_rate": 0.01, "loss": 1.994, "step": 61389 }, { "epoch": 6.304374614910659, "grad_norm": 0.0877804309129715, "learning_rate": 0.01, "loss": 1.9693, "step": 61392 }, { "epoch": 6.304682686383241, "grad_norm": 0.06545720249414444, "learning_rate": 0.01, "loss": 1.9409, "step": 61395 }, { "epoch": 6.304990757855823, "grad_norm": 0.10159642994403839, "learning_rate": 0.01, "loss": 1.9981, "step": 61398 }, { "epoch": 6.305298829328404, "grad_norm": 0.045314956456422806, "learning_rate": 0.01, "loss": 1.9609, "step": 61401 }, { "epoch": 6.305606900800986, "grad_norm": 0.1006140187382698, "learning_rate": 0.01, "loss": 1.962, "step": 61404 }, { "epoch": 6.305914972273568, "grad_norm": 0.09133684635162354, "learning_rate": 0.01, "loss": 1.9624, "step": 61407 }, { "epoch": 6.306223043746149, "grad_norm": 0.11910711973905563, "learning_rate": 0.01, "loss": 1.9799, "step": 61410 }, { "epoch": 6.306531115218731, "grad_norm": 0.11194338649511337, "learning_rate": 0.01, "loss": 1.9856, "step": 61413 }, { "epoch": 6.306839186691312, "grad_norm": 0.050022371113300323, "learning_rate": 0.01, "loss": 1.957, "step": 61416 }, { "epoch": 6.307147258163894, "grad_norm": 0.039385128766298294, "learning_rate": 0.01, "loss": 1.9669, "step": 61419 }, { "epoch": 6.3074553296364755, "grad_norm": 0.048881761729717255, "learning_rate": 0.01, "loss": 2.0099, "step": 61422 }, { "epoch": 6.3077634011090575, "grad_norm": 0.062211450189352036, "learning_rate": 0.01, "loss": 1.9975, "step": 61425 }, { "epoch": 6.308071472581639, "grad_norm": 0.047393426299095154, "learning_rate": 0.01, "loss": 1.9926, "step": 61428 }, { "epoch": 6.308379544054221, "grad_norm": 0.03558918833732605, "learning_rate": 0.01, "loss": 1.9794, "step": 61431 }, { "epoch": 6.308687615526802, "grad_norm": 0.05444757267832756, "learning_rate": 0.01, "loss": 1.9766, "step": 61434 }, { "epoch": 6.308995686999384, "grad_norm": 0.0579729862511158, "learning_rate": 0.01, "loss": 1.9952, "step": 61437 }, { "epoch": 6.309303758471965, "grad_norm": 0.10818339139223099, "learning_rate": 0.01, "loss": 1.9707, "step": 61440 }, { "epoch": 6.309611829944547, "grad_norm": 0.053433943539857864, "learning_rate": 0.01, "loss": 1.9886, "step": 61443 }, { "epoch": 6.309919901417128, "grad_norm": 0.09320402890443802, "learning_rate": 0.01, "loss": 1.9363, "step": 61446 }, { "epoch": 6.31022797288971, "grad_norm": 0.10474387556314468, "learning_rate": 0.01, "loss": 2.0013, "step": 61449 }, { "epoch": 6.310536044362292, "grad_norm": 0.059648603200912476, "learning_rate": 0.01, "loss": 1.9668, "step": 61452 }, { "epoch": 6.310844115834874, "grad_norm": 0.05086411163210869, "learning_rate": 0.01, "loss": 1.9748, "step": 61455 }, { "epoch": 6.311152187307456, "grad_norm": 0.05620935931801796, "learning_rate": 0.01, "loss": 1.9704, "step": 61458 }, { "epoch": 6.311460258780037, "grad_norm": 0.07844830304384232, "learning_rate": 0.01, "loss": 1.9962, "step": 61461 }, { "epoch": 6.311768330252619, "grad_norm": 0.1557673066854477, "learning_rate": 0.01, "loss": 1.9848, "step": 61464 }, { "epoch": 6.3120764017252, "grad_norm": 0.09628250449895859, "learning_rate": 0.01, "loss": 1.9956, "step": 61467 }, { "epoch": 6.312384473197782, "grad_norm": 0.045699939131736755, "learning_rate": 0.01, "loss": 2.0116, "step": 61470 }, { "epoch": 6.312692544670363, "grad_norm": 0.033145416527986526, "learning_rate": 0.01, "loss": 1.9749, "step": 61473 }, { "epoch": 6.313000616142945, "grad_norm": 0.03600894287228584, "learning_rate": 0.01, "loss": 1.9406, "step": 61476 }, { "epoch": 6.313308687615526, "grad_norm": 0.09642411768436432, "learning_rate": 0.01, "loss": 1.993, "step": 61479 }, { "epoch": 6.3136167590881085, "grad_norm": 0.047719743102788925, "learning_rate": 0.01, "loss": 1.9714, "step": 61482 }, { "epoch": 6.3139248305606905, "grad_norm": 0.0641685277223587, "learning_rate": 0.01, "loss": 1.9828, "step": 61485 }, { "epoch": 6.314232902033272, "grad_norm": 0.06864326447248459, "learning_rate": 0.01, "loss": 1.9792, "step": 61488 }, { "epoch": 6.314540973505854, "grad_norm": 0.062220584601163864, "learning_rate": 0.01, "loss": 1.988, "step": 61491 }, { "epoch": 6.314849044978435, "grad_norm": 0.0497710257768631, "learning_rate": 0.01, "loss": 1.9837, "step": 61494 }, { "epoch": 6.315157116451017, "grad_norm": 0.06263673305511475, "learning_rate": 0.01, "loss": 1.9553, "step": 61497 }, { "epoch": 6.315465187923598, "grad_norm": 0.1437608301639557, "learning_rate": 0.01, "loss": 1.9877, "step": 61500 }, { "epoch": 6.31577325939618, "grad_norm": 0.1427847445011139, "learning_rate": 0.01, "loss": 1.9823, "step": 61503 }, { "epoch": 6.316081330868761, "grad_norm": 0.06643825769424438, "learning_rate": 0.01, "loss": 1.9792, "step": 61506 }, { "epoch": 6.316389402341343, "grad_norm": 0.05207568779587746, "learning_rate": 0.01, "loss": 1.966, "step": 61509 }, { "epoch": 6.3166974738139245, "grad_norm": 0.05009368434548378, "learning_rate": 0.01, "loss": 2.0048, "step": 61512 }, { "epoch": 6.317005545286507, "grad_norm": 0.060909830033779144, "learning_rate": 0.01, "loss": 2.0055, "step": 61515 }, { "epoch": 6.317313616759088, "grad_norm": 0.050222769379615784, "learning_rate": 0.01, "loss": 2.0128, "step": 61518 }, { "epoch": 6.31762168823167, "grad_norm": 0.03374829515814781, "learning_rate": 0.01, "loss": 1.9725, "step": 61521 }, { "epoch": 6.317929759704251, "grad_norm": 0.07113096117973328, "learning_rate": 0.01, "loss": 1.9825, "step": 61524 }, { "epoch": 6.318237831176833, "grad_norm": 0.061376191675662994, "learning_rate": 0.01, "loss": 1.9869, "step": 61527 }, { "epoch": 6.318545902649415, "grad_norm": 0.07323316484689713, "learning_rate": 0.01, "loss": 1.9764, "step": 61530 }, { "epoch": 6.318853974121996, "grad_norm": 0.07050851732492447, "learning_rate": 0.01, "loss": 1.9988, "step": 61533 }, { "epoch": 6.319162045594578, "grad_norm": 0.04889465495944023, "learning_rate": 0.01, "loss": 1.9957, "step": 61536 }, { "epoch": 6.319470117067159, "grad_norm": 0.10975257307291031, "learning_rate": 0.01, "loss": 1.9758, "step": 61539 }, { "epoch": 6.3197781885397415, "grad_norm": 0.06909262388944626, "learning_rate": 0.01, "loss": 1.9868, "step": 61542 }, { "epoch": 6.320086260012323, "grad_norm": 0.0708390325307846, "learning_rate": 0.01, "loss": 1.9606, "step": 61545 }, { "epoch": 6.320394331484905, "grad_norm": 0.04191325604915619, "learning_rate": 0.01, "loss": 1.993, "step": 61548 }, { "epoch": 6.320702402957486, "grad_norm": 0.04027121514081955, "learning_rate": 0.01, "loss": 1.9663, "step": 61551 }, { "epoch": 6.321010474430068, "grad_norm": 0.03705594688653946, "learning_rate": 0.01, "loss": 1.9917, "step": 61554 }, { "epoch": 6.321318545902649, "grad_norm": 0.04737750440835953, "learning_rate": 0.01, "loss": 1.989, "step": 61557 }, { "epoch": 6.321626617375231, "grad_norm": 0.1080993041396141, "learning_rate": 0.01, "loss": 1.9938, "step": 61560 }, { "epoch": 6.321934688847812, "grad_norm": 0.08178503066301346, "learning_rate": 0.01, "loss": 1.9862, "step": 61563 }, { "epoch": 6.322242760320394, "grad_norm": 0.09055802971124649, "learning_rate": 0.01, "loss": 1.9869, "step": 61566 }, { "epoch": 6.3225508317929755, "grad_norm": 0.03342537209391594, "learning_rate": 0.01, "loss": 1.9723, "step": 61569 }, { "epoch": 6.3228589032655576, "grad_norm": 0.12082437425851822, "learning_rate": 0.01, "loss": 1.9915, "step": 61572 }, { "epoch": 6.32316697473814, "grad_norm": 0.05053761973977089, "learning_rate": 0.01, "loss": 1.9779, "step": 61575 }, { "epoch": 6.323475046210721, "grad_norm": 0.0372505746781826, "learning_rate": 0.01, "loss": 1.9748, "step": 61578 }, { "epoch": 6.323783117683303, "grad_norm": 0.0640554130077362, "learning_rate": 0.01, "loss": 1.9961, "step": 61581 }, { "epoch": 6.324091189155884, "grad_norm": 0.04472392797470093, "learning_rate": 0.01, "loss": 1.9859, "step": 61584 }, { "epoch": 6.324399260628466, "grad_norm": 0.09325999766588211, "learning_rate": 0.01, "loss": 1.9669, "step": 61587 }, { "epoch": 6.324707332101047, "grad_norm": 0.07012490928173065, "learning_rate": 0.01, "loss": 1.9937, "step": 61590 }, { "epoch": 6.325015403573629, "grad_norm": 0.13324911892414093, "learning_rate": 0.01, "loss": 1.9792, "step": 61593 }, { "epoch": 6.32532347504621, "grad_norm": 0.074961818754673, "learning_rate": 0.01, "loss": 1.9883, "step": 61596 }, { "epoch": 6.3256315465187924, "grad_norm": 0.06711915135383606, "learning_rate": 0.01, "loss": 1.9935, "step": 61599 }, { "epoch": 6.325939617991374, "grad_norm": 0.12803415954113007, "learning_rate": 0.01, "loss": 2.0037, "step": 61602 }, { "epoch": 6.326247689463956, "grad_norm": 0.07260297238826752, "learning_rate": 0.01, "loss": 1.9913, "step": 61605 }, { "epoch": 6.326555760936538, "grad_norm": 0.16781297326087952, "learning_rate": 0.01, "loss": 1.9804, "step": 61608 }, { "epoch": 6.326863832409119, "grad_norm": 0.05119356885552406, "learning_rate": 0.01, "loss": 2.0217, "step": 61611 }, { "epoch": 6.327171903881701, "grad_norm": 0.05765308439731598, "learning_rate": 0.01, "loss": 1.9813, "step": 61614 }, { "epoch": 6.327479975354282, "grad_norm": 0.039318110793828964, "learning_rate": 0.01, "loss": 1.9755, "step": 61617 }, { "epoch": 6.327788046826864, "grad_norm": 0.04655304551124573, "learning_rate": 0.01, "loss": 1.9894, "step": 61620 }, { "epoch": 6.328096118299445, "grad_norm": 0.08642113953828812, "learning_rate": 0.01, "loss": 1.9762, "step": 61623 }, { "epoch": 6.328404189772027, "grad_norm": 0.04844063147902489, "learning_rate": 0.01, "loss": 1.9705, "step": 61626 }, { "epoch": 6.3287122612446085, "grad_norm": 0.058742884546518326, "learning_rate": 0.01, "loss": 2.0026, "step": 61629 }, { "epoch": 6.3290203327171906, "grad_norm": 0.036662664264440536, "learning_rate": 0.01, "loss": 1.9805, "step": 61632 }, { "epoch": 6.329328404189772, "grad_norm": 0.08772391080856323, "learning_rate": 0.01, "loss": 2.0158, "step": 61635 }, { "epoch": 6.329636475662354, "grad_norm": 0.06502344459295273, "learning_rate": 0.01, "loss": 1.9812, "step": 61638 }, { "epoch": 6.329944547134935, "grad_norm": 0.06459985673427582, "learning_rate": 0.01, "loss": 1.9836, "step": 61641 }, { "epoch": 6.330252618607517, "grad_norm": 0.03769855946302414, "learning_rate": 0.01, "loss": 1.9956, "step": 61644 }, { "epoch": 6.330560690080098, "grad_norm": 0.046935372054576874, "learning_rate": 0.01, "loss": 2.0003, "step": 61647 }, { "epoch": 6.33086876155268, "grad_norm": 0.04765419661998749, "learning_rate": 0.01, "loss": 1.9738, "step": 61650 }, { "epoch": 6.331176833025262, "grad_norm": 0.07804597169160843, "learning_rate": 0.01, "loss": 1.961, "step": 61653 }, { "epoch": 6.331484904497843, "grad_norm": 0.04642481356859207, "learning_rate": 0.01, "loss": 1.9794, "step": 61656 }, { "epoch": 6.3317929759704255, "grad_norm": 0.11623187363147736, "learning_rate": 0.01, "loss": 2.0001, "step": 61659 }, { "epoch": 6.332101047443007, "grad_norm": 0.08578827977180481, "learning_rate": 0.01, "loss": 1.9888, "step": 61662 }, { "epoch": 6.332409118915589, "grad_norm": 0.05141110345721245, "learning_rate": 0.01, "loss": 1.9732, "step": 61665 }, { "epoch": 6.33271719038817, "grad_norm": 0.03601311147212982, "learning_rate": 0.01, "loss": 1.9787, "step": 61668 }, { "epoch": 6.333025261860752, "grad_norm": 0.06763188540935516, "learning_rate": 0.01, "loss": 1.9865, "step": 61671 }, { "epoch": 6.333333333333333, "grad_norm": 0.08627673238515854, "learning_rate": 0.01, "loss": 2.0061, "step": 61674 }, { "epoch": 6.333641404805915, "grad_norm": 0.06787147372961044, "learning_rate": 0.01, "loss": 1.9816, "step": 61677 }, { "epoch": 6.333949476278496, "grad_norm": 0.08557649701833725, "learning_rate": 0.01, "loss": 1.9927, "step": 61680 }, { "epoch": 6.334257547751078, "grad_norm": 0.08303692191839218, "learning_rate": 0.01, "loss": 1.9503, "step": 61683 }, { "epoch": 6.33456561922366, "grad_norm": 0.06721941381692886, "learning_rate": 0.01, "loss": 1.9774, "step": 61686 }, { "epoch": 6.3348736906962415, "grad_norm": 0.11784909665584564, "learning_rate": 0.01, "loss": 1.969, "step": 61689 }, { "epoch": 6.335181762168824, "grad_norm": 0.04680579900741577, "learning_rate": 0.01, "loss": 1.9926, "step": 61692 }, { "epoch": 6.335489833641405, "grad_norm": 0.10561893880367279, "learning_rate": 0.01, "loss": 1.9924, "step": 61695 }, { "epoch": 6.335797905113987, "grad_norm": 0.10209949314594269, "learning_rate": 0.01, "loss": 1.9784, "step": 61698 }, { "epoch": 6.336105976586568, "grad_norm": 0.0697985514998436, "learning_rate": 0.01, "loss": 1.9715, "step": 61701 }, { "epoch": 6.33641404805915, "grad_norm": 0.04770383983850479, "learning_rate": 0.01, "loss": 1.9882, "step": 61704 }, { "epoch": 6.336722119531731, "grad_norm": 0.03596745431423187, "learning_rate": 0.01, "loss": 1.9572, "step": 61707 }, { "epoch": 6.337030191004313, "grad_norm": 0.05980326980352402, "learning_rate": 0.01, "loss": 1.9913, "step": 61710 }, { "epoch": 6.337338262476894, "grad_norm": 0.04213361069560051, "learning_rate": 0.01, "loss": 1.987, "step": 61713 }, { "epoch": 6.337646333949476, "grad_norm": 0.12276975810527802, "learning_rate": 0.01, "loss": 1.9571, "step": 61716 }, { "epoch": 6.337954405422058, "grad_norm": 0.04725951328873634, "learning_rate": 0.01, "loss": 1.9701, "step": 61719 }, { "epoch": 6.33826247689464, "grad_norm": 0.08252322673797607, "learning_rate": 0.01, "loss": 1.9917, "step": 61722 }, { "epoch": 6.338570548367221, "grad_norm": 0.12866702675819397, "learning_rate": 0.01, "loss": 1.9933, "step": 61725 }, { "epoch": 6.338878619839803, "grad_norm": 0.11567038297653198, "learning_rate": 0.01, "loss": 1.9661, "step": 61728 }, { "epoch": 6.339186691312385, "grad_norm": 0.048908136785030365, "learning_rate": 0.01, "loss": 1.9765, "step": 61731 }, { "epoch": 6.339494762784966, "grad_norm": 0.04868915677070618, "learning_rate": 0.01, "loss": 1.9635, "step": 61734 }, { "epoch": 6.339802834257548, "grad_norm": 0.03802793473005295, "learning_rate": 0.01, "loss": 1.9711, "step": 61737 }, { "epoch": 6.340110905730129, "grad_norm": 0.040892910212278366, "learning_rate": 0.01, "loss": 1.9691, "step": 61740 }, { "epoch": 6.340418977202711, "grad_norm": 0.057897284626960754, "learning_rate": 0.01, "loss": 1.9802, "step": 61743 }, { "epoch": 6.3407270486752925, "grad_norm": 0.034404922276735306, "learning_rate": 0.01, "loss": 1.9694, "step": 61746 }, { "epoch": 6.3410351201478745, "grad_norm": 0.05493491142988205, "learning_rate": 0.01, "loss": 1.9567, "step": 61749 }, { "epoch": 6.341343191620456, "grad_norm": 0.09743682295084, "learning_rate": 0.01, "loss": 1.9753, "step": 61752 }, { "epoch": 6.341651263093038, "grad_norm": 0.06471186131238937, "learning_rate": 0.01, "loss": 1.9743, "step": 61755 }, { "epoch": 6.341959334565619, "grad_norm": 0.07594270259141922, "learning_rate": 0.01, "loss": 2.0016, "step": 61758 }, { "epoch": 6.342267406038201, "grad_norm": 0.05741607025265694, "learning_rate": 0.01, "loss": 1.9866, "step": 61761 }, { "epoch": 6.342575477510782, "grad_norm": 0.07117846608161926, "learning_rate": 0.01, "loss": 1.9908, "step": 61764 }, { "epoch": 6.342883548983364, "grad_norm": 0.08182870596647263, "learning_rate": 0.01, "loss": 1.9964, "step": 61767 }, { "epoch": 6.343191620455945, "grad_norm": 0.06772911548614502, "learning_rate": 0.01, "loss": 2.0039, "step": 61770 }, { "epoch": 6.343499691928527, "grad_norm": 0.04902360588312149, "learning_rate": 0.01, "loss": 1.9784, "step": 61773 }, { "epoch": 6.343807763401109, "grad_norm": 0.12090668827295303, "learning_rate": 0.01, "loss": 1.9589, "step": 61776 }, { "epoch": 6.344115834873691, "grad_norm": 0.04338901489973068, "learning_rate": 0.01, "loss": 1.9793, "step": 61779 }, { "epoch": 6.344423906346273, "grad_norm": 0.07592718303203583, "learning_rate": 0.01, "loss": 1.9873, "step": 61782 }, { "epoch": 6.344731977818854, "grad_norm": 0.0540313795208931, "learning_rate": 0.01, "loss": 1.9724, "step": 61785 }, { "epoch": 6.345040049291436, "grad_norm": 0.10975626856088638, "learning_rate": 0.01, "loss": 1.9752, "step": 61788 }, { "epoch": 6.345348120764017, "grad_norm": 0.1409357637166977, "learning_rate": 0.01, "loss": 1.9704, "step": 61791 }, { "epoch": 6.345656192236599, "grad_norm": 0.10727324336767197, "learning_rate": 0.01, "loss": 1.9466, "step": 61794 }, { "epoch": 6.34596426370918, "grad_norm": 0.08473125845193863, "learning_rate": 0.01, "loss": 1.9707, "step": 61797 }, { "epoch": 6.346272335181762, "grad_norm": 0.06947775185108185, "learning_rate": 0.01, "loss": 1.979, "step": 61800 }, { "epoch": 6.346580406654343, "grad_norm": 0.06887286901473999, "learning_rate": 0.01, "loss": 1.979, "step": 61803 }, { "epoch": 6.3468884781269255, "grad_norm": 0.08204744011163712, "learning_rate": 0.01, "loss": 1.9594, "step": 61806 }, { "epoch": 6.3471965495995075, "grad_norm": 0.06321869790554047, "learning_rate": 0.01, "loss": 1.9524, "step": 61809 }, { "epoch": 6.347504621072089, "grad_norm": 0.06997507065534592, "learning_rate": 0.01, "loss": 1.9743, "step": 61812 }, { "epoch": 6.347812692544671, "grad_norm": 0.04468904808163643, "learning_rate": 0.01, "loss": 1.9758, "step": 61815 }, { "epoch": 6.348120764017252, "grad_norm": 0.07359863072633743, "learning_rate": 0.01, "loss": 1.9894, "step": 61818 }, { "epoch": 6.348428835489834, "grad_norm": 0.08013535290956497, "learning_rate": 0.01, "loss": 1.9686, "step": 61821 }, { "epoch": 6.348736906962415, "grad_norm": 0.08246944099664688, "learning_rate": 0.01, "loss": 1.9988, "step": 61824 }, { "epoch": 6.349044978434997, "grad_norm": 0.05869217962026596, "learning_rate": 0.01, "loss": 1.9918, "step": 61827 }, { "epoch": 6.349353049907578, "grad_norm": 0.04672044888138771, "learning_rate": 0.01, "loss": 1.9645, "step": 61830 }, { "epoch": 6.34966112138016, "grad_norm": 0.03837782144546509, "learning_rate": 0.01, "loss": 1.9654, "step": 61833 }, { "epoch": 6.3499691928527415, "grad_norm": 0.04574459418654442, "learning_rate": 0.01, "loss": 1.9593, "step": 61836 }, { "epoch": 6.350277264325324, "grad_norm": 0.03845152631402016, "learning_rate": 0.01, "loss": 1.9607, "step": 61839 }, { "epoch": 6.350585335797905, "grad_norm": 0.03869105130434036, "learning_rate": 0.01, "loss": 1.9727, "step": 61842 }, { "epoch": 6.350893407270487, "grad_norm": 0.08753649145364761, "learning_rate": 0.01, "loss": 2.0112, "step": 61845 }, { "epoch": 6.351201478743068, "grad_norm": 0.05175444483757019, "learning_rate": 0.01, "loss": 1.9668, "step": 61848 }, { "epoch": 6.35150955021565, "grad_norm": 0.08007702976465225, "learning_rate": 0.01, "loss": 1.9545, "step": 61851 }, { "epoch": 6.351817621688232, "grad_norm": 0.08175135403871536, "learning_rate": 0.01, "loss": 1.9735, "step": 61854 }, { "epoch": 6.352125693160813, "grad_norm": 0.042124420404434204, "learning_rate": 0.01, "loss": 1.9739, "step": 61857 }, { "epoch": 6.352433764633395, "grad_norm": 0.030145036056637764, "learning_rate": 0.01, "loss": 1.9872, "step": 61860 }, { "epoch": 6.352741836105976, "grad_norm": 0.05578388646245003, "learning_rate": 0.01, "loss": 1.9838, "step": 61863 }, { "epoch": 6.3530499075785585, "grad_norm": 0.06224137172102928, "learning_rate": 0.01, "loss": 1.9986, "step": 61866 }, { "epoch": 6.35335797905114, "grad_norm": 0.03723108768463135, "learning_rate": 0.01, "loss": 1.9953, "step": 61869 }, { "epoch": 6.353666050523722, "grad_norm": 0.05681099742650986, "learning_rate": 0.01, "loss": 1.9688, "step": 61872 }, { "epoch": 6.353974121996303, "grad_norm": 0.06745577603578568, "learning_rate": 0.01, "loss": 1.9823, "step": 61875 }, { "epoch": 6.354282193468885, "grad_norm": 0.17442718148231506, "learning_rate": 0.01, "loss": 1.9928, "step": 61878 }, { "epoch": 6.354590264941466, "grad_norm": 0.07263945788145065, "learning_rate": 0.01, "loss": 1.9649, "step": 61881 }, { "epoch": 6.354898336414048, "grad_norm": 0.047694019973278046, "learning_rate": 0.01, "loss": 1.9744, "step": 61884 }, { "epoch": 6.35520640788663, "grad_norm": 0.03988689184188843, "learning_rate": 0.01, "loss": 1.9882, "step": 61887 }, { "epoch": 6.355514479359211, "grad_norm": 0.06448500603437424, "learning_rate": 0.01, "loss": 1.9847, "step": 61890 }, { "epoch": 6.355822550831793, "grad_norm": 0.09949947893619537, "learning_rate": 0.01, "loss": 1.9953, "step": 61893 }, { "epoch": 6.3561306223043745, "grad_norm": 0.04593118652701378, "learning_rate": 0.01, "loss": 1.9963, "step": 61896 }, { "epoch": 6.356438693776957, "grad_norm": 0.05074120685458183, "learning_rate": 0.01, "loss": 1.9895, "step": 61899 }, { "epoch": 6.356746765249538, "grad_norm": 0.044762320816516876, "learning_rate": 0.01, "loss": 1.9759, "step": 61902 }, { "epoch": 6.35705483672212, "grad_norm": 0.04705975204706192, "learning_rate": 0.01, "loss": 1.9911, "step": 61905 }, { "epoch": 6.357362908194701, "grad_norm": 0.03957728296518326, "learning_rate": 0.01, "loss": 1.9721, "step": 61908 }, { "epoch": 6.357670979667283, "grad_norm": 0.050351545214653015, "learning_rate": 0.01, "loss": 1.9772, "step": 61911 }, { "epoch": 6.357979051139864, "grad_norm": 0.05305488035082817, "learning_rate": 0.01, "loss": 1.9602, "step": 61914 }, { "epoch": 6.358287122612446, "grad_norm": 0.03815659508109093, "learning_rate": 0.01, "loss": 1.9816, "step": 61917 }, { "epoch": 6.358595194085027, "grad_norm": 0.09699538350105286, "learning_rate": 0.01, "loss": 1.9716, "step": 61920 }, { "epoch": 6.358903265557609, "grad_norm": 0.13865162432193756, "learning_rate": 0.01, "loss": 2.0001, "step": 61923 }, { "epoch": 6.359211337030191, "grad_norm": 0.0713433250784874, "learning_rate": 0.01, "loss": 1.9895, "step": 61926 }, { "epoch": 6.359519408502773, "grad_norm": 0.06301635503768921, "learning_rate": 0.01, "loss": 1.9614, "step": 61929 }, { "epoch": 6.359827479975355, "grad_norm": 0.06763561815023422, "learning_rate": 0.01, "loss": 2.0041, "step": 61932 }, { "epoch": 6.360135551447936, "grad_norm": 0.051800407469272614, "learning_rate": 0.01, "loss": 1.9735, "step": 61935 }, { "epoch": 6.360443622920518, "grad_norm": 0.07054926455020905, "learning_rate": 0.01, "loss": 1.9721, "step": 61938 }, { "epoch": 6.360751694393099, "grad_norm": 0.09424702078104019, "learning_rate": 0.01, "loss": 1.9971, "step": 61941 }, { "epoch": 6.361059765865681, "grad_norm": 0.06715767085552216, "learning_rate": 0.01, "loss": 1.9801, "step": 61944 }, { "epoch": 6.361367837338262, "grad_norm": 0.052323173731565475, "learning_rate": 0.01, "loss": 1.9941, "step": 61947 }, { "epoch": 6.361675908810844, "grad_norm": 0.04951905459165573, "learning_rate": 0.01, "loss": 1.9737, "step": 61950 }, { "epoch": 6.3619839802834255, "grad_norm": 0.056527990847826004, "learning_rate": 0.01, "loss": 1.9546, "step": 61953 }, { "epoch": 6.3622920517560075, "grad_norm": 0.04317193478345871, "learning_rate": 0.01, "loss": 1.9747, "step": 61956 }, { "epoch": 6.362600123228589, "grad_norm": 0.07142667472362518, "learning_rate": 0.01, "loss": 1.9552, "step": 61959 }, { "epoch": 6.362908194701171, "grad_norm": 0.087591253221035, "learning_rate": 0.01, "loss": 1.9825, "step": 61962 }, { "epoch": 6.363216266173752, "grad_norm": 0.044100482016801834, "learning_rate": 0.01, "loss": 1.9708, "step": 61965 }, { "epoch": 6.363524337646334, "grad_norm": 0.05616932362318039, "learning_rate": 0.01, "loss": 1.9685, "step": 61968 }, { "epoch": 6.363832409118915, "grad_norm": 0.05864831432700157, "learning_rate": 0.01, "loss": 1.9846, "step": 61971 }, { "epoch": 6.364140480591497, "grad_norm": 0.04249054566025734, "learning_rate": 0.01, "loss": 1.992, "step": 61974 }, { "epoch": 6.364448552064079, "grad_norm": 0.047203633934259415, "learning_rate": 0.01, "loss": 1.9739, "step": 61977 }, { "epoch": 6.36475662353666, "grad_norm": 0.0450938381254673, "learning_rate": 0.01, "loss": 1.9829, "step": 61980 }, { "epoch": 6.365064695009242, "grad_norm": 0.13708913326263428, "learning_rate": 0.01, "loss": 1.97, "step": 61983 }, { "epoch": 6.365372766481824, "grad_norm": 0.0903947725892067, "learning_rate": 0.01, "loss": 1.9938, "step": 61986 }, { "epoch": 6.365680837954406, "grad_norm": 0.039598993957042694, "learning_rate": 0.01, "loss": 1.9699, "step": 61989 }, { "epoch": 6.365988909426987, "grad_norm": 0.06196282058954239, "learning_rate": 0.01, "loss": 1.9819, "step": 61992 }, { "epoch": 6.366296980899569, "grad_norm": 0.058830760419368744, "learning_rate": 0.01, "loss": 1.9776, "step": 61995 }, { "epoch": 6.36660505237215, "grad_norm": 0.03394271805882454, "learning_rate": 0.01, "loss": 1.9957, "step": 61998 }, { "epoch": 6.366913123844732, "grad_norm": 0.11069590598344803, "learning_rate": 0.01, "loss": 2.0019, "step": 62001 }, { "epoch": 6.367221195317313, "grad_norm": 0.03749784827232361, "learning_rate": 0.01, "loss": 1.9631, "step": 62004 }, { "epoch": 6.367529266789895, "grad_norm": 0.06352918595075607, "learning_rate": 0.01, "loss": 1.9842, "step": 62007 }, { "epoch": 6.367837338262477, "grad_norm": 0.037357673048973083, "learning_rate": 0.01, "loss": 1.9839, "step": 62010 }, { "epoch": 6.3681454097350585, "grad_norm": 0.09099667519330978, "learning_rate": 0.01, "loss": 1.9692, "step": 62013 }, { "epoch": 6.3684534812076405, "grad_norm": 0.054853178560733795, "learning_rate": 0.01, "loss": 2.0031, "step": 62016 }, { "epoch": 6.368761552680222, "grad_norm": 0.04815756157040596, "learning_rate": 0.01, "loss": 1.9458, "step": 62019 }, { "epoch": 6.369069624152804, "grad_norm": 0.12704138457775116, "learning_rate": 0.01, "loss": 1.9922, "step": 62022 }, { "epoch": 6.369377695625385, "grad_norm": 0.1324865221977234, "learning_rate": 0.01, "loss": 1.9736, "step": 62025 }, { "epoch": 6.369685767097967, "grad_norm": 0.10938073694705963, "learning_rate": 0.01, "loss": 1.971, "step": 62028 }, { "epoch": 6.369993838570548, "grad_norm": 0.055503327399492264, "learning_rate": 0.01, "loss": 1.9997, "step": 62031 }, { "epoch": 6.37030191004313, "grad_norm": 0.049480196088552475, "learning_rate": 0.01, "loss": 1.9935, "step": 62034 }, { "epoch": 6.370609981515711, "grad_norm": 0.04294247552752495, "learning_rate": 0.01, "loss": 1.9979, "step": 62037 }, { "epoch": 6.370918052988293, "grad_norm": 0.03694196790456772, "learning_rate": 0.01, "loss": 1.9902, "step": 62040 }, { "epoch": 6.3712261244608746, "grad_norm": 0.08281165361404419, "learning_rate": 0.01, "loss": 2.0044, "step": 62043 }, { "epoch": 6.371534195933457, "grad_norm": 0.05012732744216919, "learning_rate": 0.01, "loss": 1.9971, "step": 62046 }, { "epoch": 6.371842267406038, "grad_norm": 0.06646348536014557, "learning_rate": 0.01, "loss": 1.9746, "step": 62049 }, { "epoch": 6.37215033887862, "grad_norm": 0.046320248395204544, "learning_rate": 0.01, "loss": 1.9818, "step": 62052 }, { "epoch": 6.372458410351202, "grad_norm": 0.13739866018295288, "learning_rate": 0.01, "loss": 1.9592, "step": 62055 }, { "epoch": 6.372766481823783, "grad_norm": 0.10860442370176315, "learning_rate": 0.01, "loss": 1.9933, "step": 62058 }, { "epoch": 6.373074553296365, "grad_norm": 0.1861836165189743, "learning_rate": 0.01, "loss": 1.9853, "step": 62061 }, { "epoch": 6.373382624768946, "grad_norm": 0.07088860869407654, "learning_rate": 0.01, "loss": 1.9816, "step": 62064 }, { "epoch": 6.373690696241528, "grad_norm": 0.055799081921577454, "learning_rate": 0.01, "loss": 1.9964, "step": 62067 }, { "epoch": 6.3739987677141094, "grad_norm": 0.08463507890701294, "learning_rate": 0.01, "loss": 1.988, "step": 62070 }, { "epoch": 6.3743068391866915, "grad_norm": 0.07785413414239883, "learning_rate": 0.01, "loss": 1.9594, "step": 62073 }, { "epoch": 6.374614910659273, "grad_norm": 0.042249925434589386, "learning_rate": 0.01, "loss": 1.9833, "step": 62076 }, { "epoch": 6.374922982131855, "grad_norm": 0.08121603727340698, "learning_rate": 0.01, "loss": 1.9567, "step": 62079 }, { "epoch": 6.375231053604436, "grad_norm": 0.08757340908050537, "learning_rate": 0.01, "loss": 2.0097, "step": 62082 }, { "epoch": 6.375539125077018, "grad_norm": 0.11189188808202744, "learning_rate": 0.01, "loss": 1.9862, "step": 62085 }, { "epoch": 6.3758471965496, "grad_norm": 0.04559013247489929, "learning_rate": 0.01, "loss": 1.9686, "step": 62088 }, { "epoch": 6.376155268022181, "grad_norm": 0.044948089867830276, "learning_rate": 0.01, "loss": 2.0105, "step": 62091 }, { "epoch": 6.376463339494763, "grad_norm": 0.030356265604496002, "learning_rate": 0.01, "loss": 1.9504, "step": 62094 }, { "epoch": 6.376771410967344, "grad_norm": 0.12627916038036346, "learning_rate": 0.01, "loss": 1.9999, "step": 62097 }, { "epoch": 6.377079482439926, "grad_norm": 0.046097807586193085, "learning_rate": 0.01, "loss": 1.968, "step": 62100 }, { "epoch": 6.377387553912508, "grad_norm": 0.057985857129096985, "learning_rate": 0.01, "loss": 2.0091, "step": 62103 }, { "epoch": 6.37769562538509, "grad_norm": 0.07194691151380539, "learning_rate": 0.01, "loss": 1.9823, "step": 62106 }, { "epoch": 6.378003696857671, "grad_norm": 0.07052842527627945, "learning_rate": 0.01, "loss": 1.9763, "step": 62109 }, { "epoch": 6.378311768330253, "grad_norm": 0.0420130155980587, "learning_rate": 0.01, "loss": 1.9822, "step": 62112 }, { "epoch": 6.378619839802834, "grad_norm": 0.08693109452724457, "learning_rate": 0.01, "loss": 1.9823, "step": 62115 }, { "epoch": 6.378927911275416, "grad_norm": 0.05671975761651993, "learning_rate": 0.01, "loss": 1.9806, "step": 62118 }, { "epoch": 6.379235982747997, "grad_norm": 0.04355829954147339, "learning_rate": 0.01, "loss": 1.9805, "step": 62121 }, { "epoch": 6.379544054220579, "grad_norm": 0.045782145112752914, "learning_rate": 0.01, "loss": 1.9765, "step": 62124 }, { "epoch": 6.37985212569316, "grad_norm": 0.07497083395719528, "learning_rate": 0.01, "loss": 1.994, "step": 62127 }, { "epoch": 6.3801601971657425, "grad_norm": 0.067107655107975, "learning_rate": 0.01, "loss": 1.9572, "step": 62130 }, { "epoch": 6.3804682686383245, "grad_norm": 0.10962393879890442, "learning_rate": 0.01, "loss": 1.9764, "step": 62133 }, { "epoch": 6.380776340110906, "grad_norm": 0.128241628408432, "learning_rate": 0.01, "loss": 1.96, "step": 62136 }, { "epoch": 6.381084411583488, "grad_norm": 0.03978749364614487, "learning_rate": 0.01, "loss": 1.9807, "step": 62139 }, { "epoch": 6.381392483056069, "grad_norm": 0.0986032783985138, "learning_rate": 0.01, "loss": 1.9917, "step": 62142 }, { "epoch": 6.381700554528651, "grad_norm": 0.05384555831551552, "learning_rate": 0.01, "loss": 1.9609, "step": 62145 }, { "epoch": 6.382008626001232, "grad_norm": 0.08981155604124069, "learning_rate": 0.01, "loss": 1.9911, "step": 62148 }, { "epoch": 6.382316697473814, "grad_norm": 0.14535114169120789, "learning_rate": 0.01, "loss": 1.9794, "step": 62151 }, { "epoch": 6.382624768946395, "grad_norm": 0.11009112000465393, "learning_rate": 0.01, "loss": 1.9863, "step": 62154 }, { "epoch": 6.382932840418977, "grad_norm": 0.07238960266113281, "learning_rate": 0.01, "loss": 1.9858, "step": 62157 }, { "epoch": 6.3832409118915585, "grad_norm": 0.0916430875658989, "learning_rate": 0.01, "loss": 1.9857, "step": 62160 }, { "epoch": 6.383548983364141, "grad_norm": 0.0457562655210495, "learning_rate": 0.01, "loss": 1.979, "step": 62163 }, { "epoch": 6.383857054836722, "grad_norm": 0.09100457280874252, "learning_rate": 0.01, "loss": 1.9816, "step": 62166 }, { "epoch": 6.384165126309304, "grad_norm": 0.06494874507188797, "learning_rate": 0.01, "loss": 1.9843, "step": 62169 }, { "epoch": 6.384473197781885, "grad_norm": 0.06534988433122635, "learning_rate": 0.01, "loss": 1.9821, "step": 62172 }, { "epoch": 6.384781269254467, "grad_norm": 0.04221831634640694, "learning_rate": 0.01, "loss": 1.9555, "step": 62175 }, { "epoch": 6.385089340727049, "grad_norm": 0.1350170373916626, "learning_rate": 0.01, "loss": 1.9788, "step": 62178 }, { "epoch": 6.38539741219963, "grad_norm": 0.10296313464641571, "learning_rate": 0.01, "loss": 1.9786, "step": 62181 }, { "epoch": 6.385705483672212, "grad_norm": 0.052958227694034576, "learning_rate": 0.01, "loss": 1.9942, "step": 62184 }, { "epoch": 6.386013555144793, "grad_norm": 0.046673450618982315, "learning_rate": 0.01, "loss": 1.9918, "step": 62187 }, { "epoch": 6.3863216266173755, "grad_norm": 0.052400704473257065, "learning_rate": 0.01, "loss": 1.9459, "step": 62190 }, { "epoch": 6.386629698089957, "grad_norm": 0.05483454838395119, "learning_rate": 0.01, "loss": 1.9565, "step": 62193 }, { "epoch": 6.386937769562539, "grad_norm": 0.04824971407651901, "learning_rate": 0.01, "loss": 1.9547, "step": 62196 }, { "epoch": 6.38724584103512, "grad_norm": 0.04102877154946327, "learning_rate": 0.01, "loss": 1.9761, "step": 62199 }, { "epoch": 6.387553912507702, "grad_norm": 0.052934035658836365, "learning_rate": 0.01, "loss": 1.9748, "step": 62202 }, { "epoch": 6.387861983980283, "grad_norm": 0.13166899979114532, "learning_rate": 0.01, "loss": 2.0194, "step": 62205 }, { "epoch": 6.388170055452865, "grad_norm": 0.06457889080047607, "learning_rate": 0.01, "loss": 1.9734, "step": 62208 }, { "epoch": 6.388478126925447, "grad_norm": 0.061608802527189255, "learning_rate": 0.01, "loss": 1.9968, "step": 62211 }, { "epoch": 6.388786198398028, "grad_norm": 0.03722333163022995, "learning_rate": 0.01, "loss": 1.9731, "step": 62214 }, { "epoch": 6.38909426987061, "grad_norm": 0.03712205961346626, "learning_rate": 0.01, "loss": 1.9694, "step": 62217 }, { "epoch": 6.3894023413431915, "grad_norm": 0.10517171770334244, "learning_rate": 0.01, "loss": 1.9878, "step": 62220 }, { "epoch": 6.389710412815774, "grad_norm": 0.07701227068901062, "learning_rate": 0.01, "loss": 1.9924, "step": 62223 }, { "epoch": 6.390018484288355, "grad_norm": 0.13547784090042114, "learning_rate": 0.01, "loss": 1.9829, "step": 62226 }, { "epoch": 6.390326555760937, "grad_norm": 0.11633336544036865, "learning_rate": 0.01, "loss": 2.0137, "step": 62229 }, { "epoch": 6.390634627233518, "grad_norm": 0.09636779129505157, "learning_rate": 0.01, "loss": 1.9763, "step": 62232 }, { "epoch": 6.3909426987061, "grad_norm": 0.08367501944303513, "learning_rate": 0.01, "loss": 1.9637, "step": 62235 }, { "epoch": 6.391250770178681, "grad_norm": 0.06181328743696213, "learning_rate": 0.01, "loss": 1.9545, "step": 62238 }, { "epoch": 6.391558841651263, "grad_norm": 0.08050753176212311, "learning_rate": 0.01, "loss": 2.0016, "step": 62241 }, { "epoch": 6.391866913123844, "grad_norm": 0.04931900277733803, "learning_rate": 0.01, "loss": 1.9901, "step": 62244 }, { "epoch": 6.392174984596426, "grad_norm": 0.050407491624355316, "learning_rate": 0.01, "loss": 2.001, "step": 62247 }, { "epoch": 6.392483056069008, "grad_norm": 0.03501970320940018, "learning_rate": 0.01, "loss": 1.9427, "step": 62250 }, { "epoch": 6.39279112754159, "grad_norm": 0.04515289515256882, "learning_rate": 0.01, "loss": 1.9769, "step": 62253 }, { "epoch": 6.393099199014172, "grad_norm": 0.07897801697254181, "learning_rate": 0.01, "loss": 1.9967, "step": 62256 }, { "epoch": 6.393407270486753, "grad_norm": 0.09098079800605774, "learning_rate": 0.01, "loss": 1.9675, "step": 62259 }, { "epoch": 6.393715341959335, "grad_norm": 0.046846143901348114, "learning_rate": 0.01, "loss": 1.9782, "step": 62262 }, { "epoch": 6.394023413431916, "grad_norm": 0.08435478061437607, "learning_rate": 0.01, "loss": 2.0053, "step": 62265 }, { "epoch": 6.394331484904498, "grad_norm": 0.04902444779872894, "learning_rate": 0.01, "loss": 2.0083, "step": 62268 }, { "epoch": 6.394639556377079, "grad_norm": 0.053175996989011765, "learning_rate": 0.01, "loss": 1.9669, "step": 62271 }, { "epoch": 6.394947627849661, "grad_norm": 0.04319942370057106, "learning_rate": 0.01, "loss": 1.957, "step": 62274 }, { "epoch": 6.3952556993222425, "grad_norm": 0.056976351886987686, "learning_rate": 0.01, "loss": 1.979, "step": 62277 }, { "epoch": 6.3955637707948245, "grad_norm": 0.12992046773433685, "learning_rate": 0.01, "loss": 1.9898, "step": 62280 }, { "epoch": 6.395871842267406, "grad_norm": 0.0788559541106224, "learning_rate": 0.01, "loss": 1.9768, "step": 62283 }, { "epoch": 6.396179913739988, "grad_norm": 0.039425671100616455, "learning_rate": 0.01, "loss": 1.9658, "step": 62286 }, { "epoch": 6.39648798521257, "grad_norm": 0.0928255170583725, "learning_rate": 0.01, "loss": 1.976, "step": 62289 }, { "epoch": 6.396796056685151, "grad_norm": 0.06437569856643677, "learning_rate": 0.01, "loss": 2.011, "step": 62292 }, { "epoch": 6.397104128157733, "grad_norm": 0.05675747990608215, "learning_rate": 0.01, "loss": 1.9789, "step": 62295 }, { "epoch": 6.397412199630314, "grad_norm": 0.06161409988999367, "learning_rate": 0.01, "loss": 1.9933, "step": 62298 }, { "epoch": 6.397720271102896, "grad_norm": 0.035643287003040314, "learning_rate": 0.01, "loss": 1.9875, "step": 62301 }, { "epoch": 6.398028342575477, "grad_norm": 0.06932775676250458, "learning_rate": 0.01, "loss": 2.0093, "step": 62304 }, { "epoch": 6.398336414048059, "grad_norm": 0.1191667765378952, "learning_rate": 0.01, "loss": 1.9782, "step": 62307 }, { "epoch": 6.398644485520641, "grad_norm": 0.06702014803886414, "learning_rate": 0.01, "loss": 1.9694, "step": 62310 }, { "epoch": 6.398952556993223, "grad_norm": 0.045102011412382126, "learning_rate": 0.01, "loss": 1.9784, "step": 62313 }, { "epoch": 6.399260628465804, "grad_norm": 0.042483970522880554, "learning_rate": 0.01, "loss": 1.9879, "step": 62316 }, { "epoch": 6.399568699938386, "grad_norm": 0.035098928958177567, "learning_rate": 0.01, "loss": 1.9512, "step": 62319 }, { "epoch": 6.399876771410967, "grad_norm": 0.05023285374045372, "learning_rate": 0.01, "loss": 1.9571, "step": 62322 }, { "epoch": 6.400184842883549, "grad_norm": 0.14267301559448242, "learning_rate": 0.01, "loss": 1.9759, "step": 62325 }, { "epoch": 6.40049291435613, "grad_norm": 0.12871447205543518, "learning_rate": 0.01, "loss": 1.9815, "step": 62328 }, { "epoch": 6.400800985828712, "grad_norm": 0.18785615265369415, "learning_rate": 0.01, "loss": 1.9662, "step": 62331 }, { "epoch": 6.401109057301294, "grad_norm": 0.14859537780284882, "learning_rate": 0.01, "loss": 1.9893, "step": 62334 }, { "epoch": 6.4014171287738755, "grad_norm": 0.10787920653820038, "learning_rate": 0.01, "loss": 1.979, "step": 62337 }, { "epoch": 6.4017252002464575, "grad_norm": 0.0459236241877079, "learning_rate": 0.01, "loss": 1.9536, "step": 62340 }, { "epoch": 6.402033271719039, "grad_norm": 0.04476577043533325, "learning_rate": 0.01, "loss": 1.9731, "step": 62343 }, { "epoch": 6.402341343191621, "grad_norm": 0.04856594651937485, "learning_rate": 0.01, "loss": 1.9599, "step": 62346 }, { "epoch": 6.402649414664202, "grad_norm": 0.047260161489248276, "learning_rate": 0.01, "loss": 1.9652, "step": 62349 }, { "epoch": 6.402957486136784, "grad_norm": 0.05097677931189537, "learning_rate": 0.01, "loss": 1.9836, "step": 62352 }, { "epoch": 6.403265557609365, "grad_norm": 0.10697360336780548, "learning_rate": 0.01, "loss": 1.9619, "step": 62355 }, { "epoch": 6.403573629081947, "grad_norm": 0.11435877531766891, "learning_rate": 0.01, "loss": 1.9935, "step": 62358 }, { "epoch": 6.403881700554528, "grad_norm": 0.036153070628643036, "learning_rate": 0.01, "loss": 1.9515, "step": 62361 }, { "epoch": 6.40418977202711, "grad_norm": 0.07002195715904236, "learning_rate": 0.01, "loss": 1.9677, "step": 62364 }, { "epoch": 6.4044978434996915, "grad_norm": 0.05492027476429939, "learning_rate": 0.01, "loss": 1.9763, "step": 62367 }, { "epoch": 6.404805914972274, "grad_norm": 0.06763515621423721, "learning_rate": 0.01, "loss": 1.97, "step": 62370 }, { "epoch": 6.405113986444855, "grad_norm": 0.03954880312085152, "learning_rate": 0.01, "loss": 1.9756, "step": 62373 }, { "epoch": 6.405422057917437, "grad_norm": 0.06005239859223366, "learning_rate": 0.01, "loss": 1.9666, "step": 62376 }, { "epoch": 6.405730129390019, "grad_norm": 0.06014340743422508, "learning_rate": 0.01, "loss": 1.9814, "step": 62379 }, { "epoch": 6.4060382008626, "grad_norm": 0.05861683562397957, "learning_rate": 0.01, "loss": 1.9733, "step": 62382 }, { "epoch": 6.406346272335182, "grad_norm": 0.07609166949987411, "learning_rate": 0.01, "loss": 2.0015, "step": 62385 }, { "epoch": 6.406654343807763, "grad_norm": 0.08934800326824188, "learning_rate": 0.01, "loss": 1.9715, "step": 62388 }, { "epoch": 6.406962415280345, "grad_norm": 0.14556893706321716, "learning_rate": 0.01, "loss": 1.9868, "step": 62391 }, { "epoch": 6.407270486752926, "grad_norm": 0.07740515470504761, "learning_rate": 0.01, "loss": 1.9631, "step": 62394 }, { "epoch": 6.4075785582255085, "grad_norm": 0.04366164281964302, "learning_rate": 0.01, "loss": 1.9745, "step": 62397 }, { "epoch": 6.40788662969809, "grad_norm": 0.04770512506365776, "learning_rate": 0.01, "loss": 1.9704, "step": 62400 }, { "epoch": 6.408194701170672, "grad_norm": 0.04657142981886864, "learning_rate": 0.01, "loss": 1.9712, "step": 62403 }, { "epoch": 6.408502772643253, "grad_norm": 0.04207182675600052, "learning_rate": 0.01, "loss": 1.9766, "step": 62406 }, { "epoch": 6.408810844115835, "grad_norm": 0.05520906671881676, "learning_rate": 0.01, "loss": 1.9899, "step": 62409 }, { "epoch": 6.409118915588417, "grad_norm": 0.07524003833532333, "learning_rate": 0.01, "loss": 2.0114, "step": 62412 }, { "epoch": 6.409426987060998, "grad_norm": 0.04558379203081131, "learning_rate": 0.01, "loss": 1.99, "step": 62415 }, { "epoch": 6.40973505853358, "grad_norm": 0.09892844408750534, "learning_rate": 0.01, "loss": 1.9771, "step": 62418 }, { "epoch": 6.410043130006161, "grad_norm": 0.07591421902179718, "learning_rate": 0.01, "loss": 1.9601, "step": 62421 }, { "epoch": 6.410351201478743, "grad_norm": 0.09021826833486557, "learning_rate": 0.01, "loss": 2.0068, "step": 62424 }, { "epoch": 6.4106592729513245, "grad_norm": 0.043195899575948715, "learning_rate": 0.01, "loss": 1.9557, "step": 62427 }, { "epoch": 6.410967344423907, "grad_norm": 0.10521116852760315, "learning_rate": 0.01, "loss": 1.9832, "step": 62430 }, { "epoch": 6.411275415896488, "grad_norm": 0.061183419078588486, "learning_rate": 0.01, "loss": 2.0113, "step": 62433 }, { "epoch": 6.41158348736907, "grad_norm": 0.0777936577796936, "learning_rate": 0.01, "loss": 2.0047, "step": 62436 }, { "epoch": 6.411891558841651, "grad_norm": 0.07783685624599457, "learning_rate": 0.01, "loss": 1.9624, "step": 62439 }, { "epoch": 6.412199630314233, "grad_norm": 0.10137835890054703, "learning_rate": 0.01, "loss": 1.9895, "step": 62442 }, { "epoch": 6.412507701786814, "grad_norm": 0.07583697885274887, "learning_rate": 0.01, "loss": 1.9631, "step": 62445 }, { "epoch": 6.412815773259396, "grad_norm": 0.05925040692090988, "learning_rate": 0.01, "loss": 1.9751, "step": 62448 }, { "epoch": 6.413123844731977, "grad_norm": 0.07420961558818817, "learning_rate": 0.01, "loss": 1.9927, "step": 62451 }, { "epoch": 6.413431916204559, "grad_norm": 0.07555720955133438, "learning_rate": 0.01, "loss": 1.9884, "step": 62454 }, { "epoch": 6.4137399876771415, "grad_norm": 0.05991441011428833, "learning_rate": 0.01, "loss": 1.9469, "step": 62457 }, { "epoch": 6.414048059149723, "grad_norm": 0.04959188401699066, "learning_rate": 0.01, "loss": 1.9636, "step": 62460 }, { "epoch": 6.414356130622305, "grad_norm": 0.09466046094894409, "learning_rate": 0.01, "loss": 1.9829, "step": 62463 }, { "epoch": 6.414664202094886, "grad_norm": 0.08828117698431015, "learning_rate": 0.01, "loss": 1.9817, "step": 62466 }, { "epoch": 6.414972273567468, "grad_norm": 0.0796947106719017, "learning_rate": 0.01, "loss": 1.9724, "step": 62469 }, { "epoch": 6.415280345040049, "grad_norm": 0.09187500178813934, "learning_rate": 0.01, "loss": 1.9676, "step": 62472 }, { "epoch": 6.415588416512631, "grad_norm": 0.032242316752672195, "learning_rate": 0.01, "loss": 1.9851, "step": 62475 }, { "epoch": 6.415896487985212, "grad_norm": 0.08650948852300644, "learning_rate": 0.01, "loss": 1.9873, "step": 62478 }, { "epoch": 6.416204559457794, "grad_norm": 0.07760648429393768, "learning_rate": 0.01, "loss": 1.985, "step": 62481 }, { "epoch": 6.4165126309303755, "grad_norm": 0.10039196908473969, "learning_rate": 0.01, "loss": 1.9643, "step": 62484 }, { "epoch": 6.4168207024029575, "grad_norm": 0.06620651483535767, "learning_rate": 0.01, "loss": 1.984, "step": 62487 }, { "epoch": 6.41712877387554, "grad_norm": 0.07534004002809525, "learning_rate": 0.01, "loss": 1.975, "step": 62490 }, { "epoch": 6.417436845348121, "grad_norm": 0.04290451481938362, "learning_rate": 0.01, "loss": 1.9609, "step": 62493 }, { "epoch": 6.417744916820703, "grad_norm": 0.040604498237371445, "learning_rate": 0.01, "loss": 1.9901, "step": 62496 }, { "epoch": 6.418052988293284, "grad_norm": 0.04257834702730179, "learning_rate": 0.01, "loss": 1.9863, "step": 62499 }, { "epoch": 6.418361059765866, "grad_norm": 0.06738926470279694, "learning_rate": 0.01, "loss": 1.9999, "step": 62502 }, { "epoch": 6.418669131238447, "grad_norm": 0.05635688826441765, "learning_rate": 0.01, "loss": 2.0168, "step": 62505 }, { "epoch": 6.418977202711029, "grad_norm": 0.10578261315822601, "learning_rate": 0.01, "loss": 1.9704, "step": 62508 }, { "epoch": 6.41928527418361, "grad_norm": 0.08642464131116867, "learning_rate": 0.01, "loss": 1.9973, "step": 62511 }, { "epoch": 6.419593345656192, "grad_norm": 0.036675188690423965, "learning_rate": 0.01, "loss": 1.9664, "step": 62514 }, { "epoch": 6.419901417128774, "grad_norm": 0.14191675186157227, "learning_rate": 0.01, "loss": 1.9978, "step": 62517 }, { "epoch": 6.420209488601356, "grad_norm": 0.13353928923606873, "learning_rate": 0.01, "loss": 1.9822, "step": 62520 }, { "epoch": 6.420517560073937, "grad_norm": 0.06307082623243332, "learning_rate": 0.01, "loss": 1.9746, "step": 62523 }, { "epoch": 6.420825631546519, "grad_norm": 0.04877189174294472, "learning_rate": 0.01, "loss": 1.9895, "step": 62526 }, { "epoch": 6.4211337030191, "grad_norm": 0.07062195241451263, "learning_rate": 0.01, "loss": 1.9535, "step": 62529 }, { "epoch": 6.421441774491682, "grad_norm": 0.03799920529127121, "learning_rate": 0.01, "loss": 1.9847, "step": 62532 }, { "epoch": 6.421749845964264, "grad_norm": 0.0635172426700592, "learning_rate": 0.01, "loss": 1.9664, "step": 62535 }, { "epoch": 6.422057917436845, "grad_norm": 0.04074868932366371, "learning_rate": 0.01, "loss": 1.956, "step": 62538 }, { "epoch": 6.422365988909427, "grad_norm": 0.1227545514702797, "learning_rate": 0.01, "loss": 2.0093, "step": 62541 }, { "epoch": 6.4226740603820085, "grad_norm": 0.10859929770231247, "learning_rate": 0.01, "loss": 1.9823, "step": 62544 }, { "epoch": 6.4229821318545905, "grad_norm": 0.10589033365249634, "learning_rate": 0.01, "loss": 1.9719, "step": 62547 }, { "epoch": 6.423290203327172, "grad_norm": 0.1324671059846878, "learning_rate": 0.01, "loss": 1.9673, "step": 62550 }, { "epoch": 6.423598274799754, "grad_norm": 0.0601799450814724, "learning_rate": 0.01, "loss": 1.9681, "step": 62553 }, { "epoch": 6.423906346272335, "grad_norm": 0.05349354445934296, "learning_rate": 0.01, "loss": 1.9659, "step": 62556 }, { "epoch": 6.424214417744917, "grad_norm": 0.03839118406176567, "learning_rate": 0.01, "loss": 1.9703, "step": 62559 }, { "epoch": 6.424522489217498, "grad_norm": 0.035764630883932114, "learning_rate": 0.01, "loss": 1.9882, "step": 62562 }, { "epoch": 6.42483056069008, "grad_norm": 0.05545911565423012, "learning_rate": 0.01, "loss": 1.9916, "step": 62565 }, { "epoch": 6.425138632162661, "grad_norm": 0.044913556426763535, "learning_rate": 0.01, "loss": 1.9601, "step": 62568 }, { "epoch": 6.425446703635243, "grad_norm": 0.040859974920749664, "learning_rate": 0.01, "loss": 2.0019, "step": 62571 }, { "epoch": 6.425754775107825, "grad_norm": 0.04757828265428543, "learning_rate": 0.01, "loss": 1.9723, "step": 62574 }, { "epoch": 6.426062846580407, "grad_norm": 0.0439058393239975, "learning_rate": 0.01, "loss": 1.9681, "step": 62577 }, { "epoch": 6.426370918052989, "grad_norm": 0.059357352554798126, "learning_rate": 0.01, "loss": 1.9749, "step": 62580 }, { "epoch": 6.42667898952557, "grad_norm": 0.1129942387342453, "learning_rate": 0.01, "loss": 1.957, "step": 62583 }, { "epoch": 6.426987060998152, "grad_norm": 0.09056062996387482, "learning_rate": 0.01, "loss": 1.9782, "step": 62586 }, { "epoch": 6.427295132470733, "grad_norm": 0.05224711075425148, "learning_rate": 0.01, "loss": 1.9771, "step": 62589 }, { "epoch": 6.427603203943315, "grad_norm": 0.03806770220398903, "learning_rate": 0.01, "loss": 1.9736, "step": 62592 }, { "epoch": 6.427911275415896, "grad_norm": 0.045875824987888336, "learning_rate": 0.01, "loss": 1.9619, "step": 62595 }, { "epoch": 6.428219346888478, "grad_norm": 0.054107557982206345, "learning_rate": 0.01, "loss": 1.9426, "step": 62598 }, { "epoch": 6.4285274183610595, "grad_norm": 0.049330100417137146, "learning_rate": 0.01, "loss": 1.9399, "step": 62601 }, { "epoch": 6.4288354898336415, "grad_norm": 0.09636044502258301, "learning_rate": 0.01, "loss": 1.9681, "step": 62604 }, { "epoch": 6.429143561306223, "grad_norm": 0.06452757120132446, "learning_rate": 0.01, "loss": 1.9612, "step": 62607 }, { "epoch": 6.429451632778805, "grad_norm": 0.04598628729581833, "learning_rate": 0.01, "loss": 1.9745, "step": 62610 }, { "epoch": 6.429759704251387, "grad_norm": 0.06763578951358795, "learning_rate": 0.01, "loss": 1.9924, "step": 62613 }, { "epoch": 6.430067775723968, "grad_norm": 0.12232638895511627, "learning_rate": 0.01, "loss": 1.9908, "step": 62616 }, { "epoch": 6.43037584719655, "grad_norm": 0.09073688089847565, "learning_rate": 0.01, "loss": 1.9598, "step": 62619 }, { "epoch": 6.430683918669131, "grad_norm": 0.08724263310432434, "learning_rate": 0.01, "loss": 1.9629, "step": 62622 }, { "epoch": 6.430991990141713, "grad_norm": 0.11307045817375183, "learning_rate": 0.01, "loss": 1.9681, "step": 62625 }, { "epoch": 6.431300061614294, "grad_norm": 0.037230703979730606, "learning_rate": 0.01, "loss": 1.9507, "step": 62628 }, { "epoch": 6.431608133086876, "grad_norm": 0.06643498688936234, "learning_rate": 0.01, "loss": 1.9711, "step": 62631 }, { "epoch": 6.431916204559458, "grad_norm": 0.04486044496297836, "learning_rate": 0.01, "loss": 1.9715, "step": 62634 }, { "epoch": 6.43222427603204, "grad_norm": 0.03463894501328468, "learning_rate": 0.01, "loss": 2.0016, "step": 62637 }, { "epoch": 6.432532347504621, "grad_norm": 0.04060268774628639, "learning_rate": 0.01, "loss": 1.975, "step": 62640 }, { "epoch": 6.432840418977203, "grad_norm": 0.11157824844121933, "learning_rate": 0.01, "loss": 1.9548, "step": 62643 }, { "epoch": 6.433148490449784, "grad_norm": 0.05599267780780792, "learning_rate": 0.01, "loss": 1.9936, "step": 62646 }, { "epoch": 6.433456561922366, "grad_norm": 0.05528303235769272, "learning_rate": 0.01, "loss": 2.0101, "step": 62649 }, { "epoch": 6.433764633394947, "grad_norm": 0.053959596902132034, "learning_rate": 0.01, "loss": 2.0109, "step": 62652 }, { "epoch": 6.434072704867529, "grad_norm": 0.06588397920131683, "learning_rate": 0.01, "loss": 1.989, "step": 62655 }, { "epoch": 6.434380776340111, "grad_norm": 0.058823537081480026, "learning_rate": 0.01, "loss": 1.9699, "step": 62658 }, { "epoch": 6.4346888478126925, "grad_norm": 0.1689031720161438, "learning_rate": 0.01, "loss": 1.9662, "step": 62661 }, { "epoch": 6.4349969192852745, "grad_norm": 0.09005407243967056, "learning_rate": 0.01, "loss": 2.0074, "step": 62664 }, { "epoch": 6.435304990757856, "grad_norm": 0.04335148632526398, "learning_rate": 0.01, "loss": 1.976, "step": 62667 }, { "epoch": 6.435613062230438, "grad_norm": 0.04634424299001694, "learning_rate": 0.01, "loss": 1.9452, "step": 62670 }, { "epoch": 6.435921133703019, "grad_norm": 0.03891894966363907, "learning_rate": 0.01, "loss": 1.9724, "step": 62673 }, { "epoch": 6.436229205175601, "grad_norm": 0.05635913461446762, "learning_rate": 0.01, "loss": 1.9774, "step": 62676 }, { "epoch": 6.436537276648182, "grad_norm": 0.05616259574890137, "learning_rate": 0.01, "loss": 1.9613, "step": 62679 }, { "epoch": 6.436845348120764, "grad_norm": 0.1745036244392395, "learning_rate": 0.01, "loss": 1.97, "step": 62682 }, { "epoch": 6.437153419593345, "grad_norm": 0.09336113184690475, "learning_rate": 0.01, "loss": 1.9742, "step": 62685 }, { "epoch": 6.437461491065927, "grad_norm": 0.06508792191743851, "learning_rate": 0.01, "loss": 1.9913, "step": 62688 }, { "epoch": 6.437769562538509, "grad_norm": 0.08041112869977951, "learning_rate": 0.01, "loss": 1.9879, "step": 62691 }, { "epoch": 6.438077634011091, "grad_norm": 0.050097182393074036, "learning_rate": 0.01, "loss": 1.9817, "step": 62694 }, { "epoch": 6.438385705483673, "grad_norm": 0.09112852811813354, "learning_rate": 0.01, "loss": 2.0008, "step": 62697 }, { "epoch": 6.438693776956254, "grad_norm": 0.06812035292387009, "learning_rate": 0.01, "loss": 2.0099, "step": 62700 }, { "epoch": 6.439001848428836, "grad_norm": 0.04868394508957863, "learning_rate": 0.01, "loss": 1.9786, "step": 62703 }, { "epoch": 6.439309919901417, "grad_norm": 0.03731069713830948, "learning_rate": 0.01, "loss": 1.9865, "step": 62706 }, { "epoch": 6.439617991373999, "grad_norm": 0.03705144673585892, "learning_rate": 0.01, "loss": 1.9439, "step": 62709 }, { "epoch": 6.43992606284658, "grad_norm": 0.06617160141468048, "learning_rate": 0.01, "loss": 1.9641, "step": 62712 }, { "epoch": 6.440234134319162, "grad_norm": 0.09520690143108368, "learning_rate": 0.01, "loss": 1.9827, "step": 62715 }, { "epoch": 6.440542205791743, "grad_norm": 0.10066824406385422, "learning_rate": 0.01, "loss": 1.9998, "step": 62718 }, { "epoch": 6.4408502772643255, "grad_norm": 0.04232907295227051, "learning_rate": 0.01, "loss": 1.9729, "step": 62721 }, { "epoch": 6.441158348736907, "grad_norm": 0.08188414573669434, "learning_rate": 0.01, "loss": 1.9495, "step": 62724 }, { "epoch": 6.441466420209489, "grad_norm": 0.11537371575832367, "learning_rate": 0.01, "loss": 2.0005, "step": 62727 }, { "epoch": 6.44177449168207, "grad_norm": 0.03267863765358925, "learning_rate": 0.01, "loss": 1.9782, "step": 62730 }, { "epoch": 6.442082563154652, "grad_norm": 0.0837605893611908, "learning_rate": 0.01, "loss": 1.9695, "step": 62733 }, { "epoch": 6.442390634627234, "grad_norm": 0.05545349791646004, "learning_rate": 0.01, "loss": 1.9948, "step": 62736 }, { "epoch": 6.442698706099815, "grad_norm": 0.06512763351202011, "learning_rate": 0.01, "loss": 1.9974, "step": 62739 }, { "epoch": 6.443006777572397, "grad_norm": 0.051384586840867996, "learning_rate": 0.01, "loss": 1.9842, "step": 62742 }, { "epoch": 6.443314849044978, "grad_norm": 0.035269416868686676, "learning_rate": 0.01, "loss": 1.9936, "step": 62745 }, { "epoch": 6.44362292051756, "grad_norm": 0.11074446141719818, "learning_rate": 0.01, "loss": 2.0109, "step": 62748 }, { "epoch": 6.4439309919901415, "grad_norm": 0.05051399767398834, "learning_rate": 0.01, "loss": 1.9779, "step": 62751 }, { "epoch": 6.444239063462724, "grad_norm": 0.07275965064764023, "learning_rate": 0.01, "loss": 1.9984, "step": 62754 }, { "epoch": 6.444547134935305, "grad_norm": 0.07027794420719147, "learning_rate": 0.01, "loss": 1.999, "step": 62757 }, { "epoch": 6.444855206407887, "grad_norm": 0.11397121101617813, "learning_rate": 0.01, "loss": 1.9713, "step": 62760 }, { "epoch": 6.445163277880468, "grad_norm": 0.08914832770824432, "learning_rate": 0.01, "loss": 1.9851, "step": 62763 }, { "epoch": 6.44547134935305, "grad_norm": 0.1111316904425621, "learning_rate": 0.01, "loss": 1.9661, "step": 62766 }, { "epoch": 6.445779420825631, "grad_norm": 0.11033376306295395, "learning_rate": 0.01, "loss": 2.001, "step": 62769 }, { "epoch": 6.446087492298213, "grad_norm": 0.07109701633453369, "learning_rate": 0.01, "loss": 1.9886, "step": 62772 }, { "epoch": 6.446395563770794, "grad_norm": 0.0448211170732975, "learning_rate": 0.01, "loss": 1.9995, "step": 62775 }, { "epoch": 6.446703635243376, "grad_norm": 0.04059508815407753, "learning_rate": 0.01, "loss": 1.9767, "step": 62778 }, { "epoch": 6.4470117067159585, "grad_norm": 0.04148499295115471, "learning_rate": 0.01, "loss": 1.9891, "step": 62781 }, { "epoch": 6.44731977818854, "grad_norm": 0.04664915055036545, "learning_rate": 0.01, "loss": 1.969, "step": 62784 }, { "epoch": 6.447627849661122, "grad_norm": 0.03414386510848999, "learning_rate": 0.01, "loss": 1.9736, "step": 62787 }, { "epoch": 6.447935921133703, "grad_norm": 0.047360535711050034, "learning_rate": 0.01, "loss": 1.9931, "step": 62790 }, { "epoch": 6.448243992606285, "grad_norm": 0.11773015558719635, "learning_rate": 0.01, "loss": 1.9424, "step": 62793 }, { "epoch": 6.448552064078866, "grad_norm": 0.08103271573781967, "learning_rate": 0.01, "loss": 2.0067, "step": 62796 }, { "epoch": 6.448860135551448, "grad_norm": 0.07534775882959366, "learning_rate": 0.01, "loss": 1.9818, "step": 62799 }, { "epoch": 6.449168207024029, "grad_norm": 0.0421251580119133, "learning_rate": 0.01, "loss": 1.9899, "step": 62802 }, { "epoch": 6.449476278496611, "grad_norm": 0.06340447813272476, "learning_rate": 0.01, "loss": 1.9611, "step": 62805 }, { "epoch": 6.4497843499691925, "grad_norm": 0.06287892907857895, "learning_rate": 0.01, "loss": 1.9908, "step": 62808 }, { "epoch": 6.4500924214417745, "grad_norm": 0.11696802824735641, "learning_rate": 0.01, "loss": 1.9796, "step": 62811 }, { "epoch": 6.450400492914357, "grad_norm": 0.061545319855213165, "learning_rate": 0.01, "loss": 1.9963, "step": 62814 }, { "epoch": 6.450708564386938, "grad_norm": 0.04134809225797653, "learning_rate": 0.01, "loss": 1.9419, "step": 62817 }, { "epoch": 6.45101663585952, "grad_norm": 0.11149853467941284, "learning_rate": 0.01, "loss": 1.9855, "step": 62820 }, { "epoch": 6.451324707332101, "grad_norm": 0.06323488801717758, "learning_rate": 0.01, "loss": 1.9718, "step": 62823 }, { "epoch": 6.451632778804683, "grad_norm": 0.0836939662694931, "learning_rate": 0.01, "loss": 2.006, "step": 62826 }, { "epoch": 6.451940850277264, "grad_norm": 0.04251860827207565, "learning_rate": 0.01, "loss": 1.9761, "step": 62829 }, { "epoch": 6.452248921749846, "grad_norm": 0.08365978300571442, "learning_rate": 0.01, "loss": 1.977, "step": 62832 }, { "epoch": 6.452556993222427, "grad_norm": 0.07985960692167282, "learning_rate": 0.01, "loss": 2.0022, "step": 62835 }, { "epoch": 6.452865064695009, "grad_norm": 0.0690624862909317, "learning_rate": 0.01, "loss": 1.9691, "step": 62838 }, { "epoch": 6.453173136167591, "grad_norm": 0.07743382453918457, "learning_rate": 0.01, "loss": 1.9828, "step": 62841 }, { "epoch": 6.453481207640173, "grad_norm": 0.06049206852912903, "learning_rate": 0.01, "loss": 2.0078, "step": 62844 }, { "epoch": 6.453789279112754, "grad_norm": 0.049416352063417435, "learning_rate": 0.01, "loss": 1.9806, "step": 62847 }, { "epoch": 6.454097350585336, "grad_norm": 0.03889904171228409, "learning_rate": 0.01, "loss": 1.9522, "step": 62850 }, { "epoch": 6.454405422057917, "grad_norm": 0.03183984011411667, "learning_rate": 0.01, "loss": 1.9544, "step": 62853 }, { "epoch": 6.454713493530499, "grad_norm": 0.03685016930103302, "learning_rate": 0.01, "loss": 1.9953, "step": 62856 }, { "epoch": 6.455021565003081, "grad_norm": 0.05258958786725998, "learning_rate": 0.01, "loss": 1.9811, "step": 62859 }, { "epoch": 6.455329636475662, "grad_norm": 0.12214828282594681, "learning_rate": 0.01, "loss": 1.9922, "step": 62862 }, { "epoch": 6.455637707948244, "grad_norm": 0.04400373622775078, "learning_rate": 0.01, "loss": 1.9588, "step": 62865 }, { "epoch": 6.4559457794208255, "grad_norm": 0.09445548057556152, "learning_rate": 0.01, "loss": 1.9745, "step": 62868 }, { "epoch": 6.4562538508934075, "grad_norm": 0.05584699660539627, "learning_rate": 0.01, "loss": 1.9717, "step": 62871 }, { "epoch": 6.456561922365989, "grad_norm": 0.041519295424222946, "learning_rate": 0.01, "loss": 1.9585, "step": 62874 }, { "epoch": 6.456869993838571, "grad_norm": 0.08750978112220764, "learning_rate": 0.01, "loss": 1.9901, "step": 62877 }, { "epoch": 6.457178065311152, "grad_norm": 0.0746956318616867, "learning_rate": 0.01, "loss": 1.99, "step": 62880 }, { "epoch": 6.457486136783734, "grad_norm": 0.05346643924713135, "learning_rate": 0.01, "loss": 1.9846, "step": 62883 }, { "epoch": 6.457794208256315, "grad_norm": 0.033672984689474106, "learning_rate": 0.01, "loss": 1.9696, "step": 62886 }, { "epoch": 6.458102279728897, "grad_norm": 0.07131464779376984, "learning_rate": 0.01, "loss": 1.9933, "step": 62889 }, { "epoch": 6.458410351201478, "grad_norm": 0.08488958328962326, "learning_rate": 0.01, "loss": 1.9658, "step": 62892 }, { "epoch": 6.45871842267406, "grad_norm": 0.041019588708877563, "learning_rate": 0.01, "loss": 1.9843, "step": 62895 }, { "epoch": 6.459026494146642, "grad_norm": 0.07616984844207764, "learning_rate": 0.01, "loss": 1.9969, "step": 62898 }, { "epoch": 6.459334565619224, "grad_norm": 0.071281798183918, "learning_rate": 0.01, "loss": 1.9888, "step": 62901 }, { "epoch": 6.459642637091806, "grad_norm": 0.04009273648262024, "learning_rate": 0.01, "loss": 1.9522, "step": 62904 }, { "epoch": 6.459950708564387, "grad_norm": 0.059646125882864, "learning_rate": 0.01, "loss": 2.0167, "step": 62907 }, { "epoch": 6.460258780036969, "grad_norm": 0.08733568340539932, "learning_rate": 0.01, "loss": 1.9812, "step": 62910 }, { "epoch": 6.46056685150955, "grad_norm": 0.06603758037090302, "learning_rate": 0.01, "loss": 1.9814, "step": 62913 }, { "epoch": 6.460874922982132, "grad_norm": 0.036482322961091995, "learning_rate": 0.01, "loss": 1.9888, "step": 62916 }, { "epoch": 6.461182994454713, "grad_norm": 0.08736536651849747, "learning_rate": 0.01, "loss": 1.9714, "step": 62919 }, { "epoch": 6.461491065927295, "grad_norm": 0.10915714502334595, "learning_rate": 0.01, "loss": 1.9726, "step": 62922 }, { "epoch": 6.461799137399876, "grad_norm": 0.07954758405685425, "learning_rate": 0.01, "loss": 1.9964, "step": 62925 }, { "epoch": 6.4621072088724585, "grad_norm": 0.05602665990591049, "learning_rate": 0.01, "loss": 1.9987, "step": 62928 }, { "epoch": 6.46241528034504, "grad_norm": 0.09531257301568985, "learning_rate": 0.01, "loss": 1.9556, "step": 62931 }, { "epoch": 6.462723351817622, "grad_norm": 0.044863101094961166, "learning_rate": 0.01, "loss": 1.9761, "step": 62934 }, { "epoch": 6.463031423290204, "grad_norm": 0.0783008560538292, "learning_rate": 0.01, "loss": 1.9402, "step": 62937 }, { "epoch": 6.463339494762785, "grad_norm": 0.05667195841670036, "learning_rate": 0.01, "loss": 1.9911, "step": 62940 }, { "epoch": 6.463647566235367, "grad_norm": 0.13006943464279175, "learning_rate": 0.01, "loss": 2.0073, "step": 62943 }, { "epoch": 6.463955637707948, "grad_norm": 0.08932863920927048, "learning_rate": 0.01, "loss": 1.9763, "step": 62946 }, { "epoch": 6.46426370918053, "grad_norm": 0.04788200557231903, "learning_rate": 0.01, "loss": 1.9902, "step": 62949 }, { "epoch": 6.464571780653111, "grad_norm": 0.03904096782207489, "learning_rate": 0.01, "loss": 1.956, "step": 62952 }, { "epoch": 6.464879852125693, "grad_norm": 0.06443644315004349, "learning_rate": 0.01, "loss": 1.9765, "step": 62955 }, { "epoch": 6.4651879235982745, "grad_norm": 0.08434759825468063, "learning_rate": 0.01, "loss": 1.9849, "step": 62958 }, { "epoch": 6.465495995070857, "grad_norm": 0.03871188312768936, "learning_rate": 0.01, "loss": 1.9737, "step": 62961 }, { "epoch": 6.465804066543438, "grad_norm": 0.11748526990413666, "learning_rate": 0.01, "loss": 1.9857, "step": 62964 }, { "epoch": 6.46611213801602, "grad_norm": 0.05763693153858185, "learning_rate": 0.01, "loss": 1.9485, "step": 62967 }, { "epoch": 6.466420209488601, "grad_norm": 0.0419640988111496, "learning_rate": 0.01, "loss": 1.983, "step": 62970 }, { "epoch": 6.466728280961183, "grad_norm": 0.061243150383234024, "learning_rate": 0.01, "loss": 1.9936, "step": 62973 }, { "epoch": 6.467036352433764, "grad_norm": 0.10403522849082947, "learning_rate": 0.01, "loss": 1.9732, "step": 62976 }, { "epoch": 6.467344423906346, "grad_norm": 0.14000679552555084, "learning_rate": 0.01, "loss": 1.9806, "step": 62979 }, { "epoch": 6.467652495378928, "grad_norm": 0.08773007243871689, "learning_rate": 0.01, "loss": 1.989, "step": 62982 }, { "epoch": 6.467960566851509, "grad_norm": 0.04810422658920288, "learning_rate": 0.01, "loss": 1.9772, "step": 62985 }, { "epoch": 6.4682686383240915, "grad_norm": 0.05809152126312256, "learning_rate": 0.01, "loss": 1.9849, "step": 62988 }, { "epoch": 6.468576709796673, "grad_norm": 0.05598244071006775, "learning_rate": 0.01, "loss": 1.9859, "step": 62991 }, { "epoch": 6.468884781269255, "grad_norm": 0.07979332655668259, "learning_rate": 0.01, "loss": 1.9527, "step": 62994 }, { "epoch": 6.469192852741836, "grad_norm": 0.044398579746484756, "learning_rate": 0.01, "loss": 1.9995, "step": 62997 }, { "epoch": 6.469500924214418, "grad_norm": 0.04663238674402237, "learning_rate": 0.01, "loss": 1.9816, "step": 63000 }, { "epoch": 6.469808995686999, "grad_norm": 0.04528966173529625, "learning_rate": 0.01, "loss": 1.9898, "step": 63003 }, { "epoch": 6.470117067159581, "grad_norm": 0.06028036028146744, "learning_rate": 0.01, "loss": 1.9504, "step": 63006 }, { "epoch": 6.470425138632162, "grad_norm": 0.094419464468956, "learning_rate": 0.01, "loss": 1.9973, "step": 63009 }, { "epoch": 6.470733210104744, "grad_norm": 0.18547490239143372, "learning_rate": 0.01, "loss": 2.003, "step": 63012 }, { "epoch": 6.471041281577326, "grad_norm": 0.09084778279066086, "learning_rate": 0.01, "loss": 1.9759, "step": 63015 }, { "epoch": 6.4713493530499075, "grad_norm": 0.07517223805189133, "learning_rate": 0.01, "loss": 1.9685, "step": 63018 }, { "epoch": 6.47165742452249, "grad_norm": 0.041310783475637436, "learning_rate": 0.01, "loss": 1.9962, "step": 63021 }, { "epoch": 6.471965495995071, "grad_norm": 0.04006296768784523, "learning_rate": 0.01, "loss": 1.9688, "step": 63024 }, { "epoch": 6.472273567467653, "grad_norm": 0.03506629914045334, "learning_rate": 0.01, "loss": 1.9701, "step": 63027 }, { "epoch": 6.472581638940234, "grad_norm": 0.04409859701991081, "learning_rate": 0.01, "loss": 1.9993, "step": 63030 }, { "epoch": 6.472889710412816, "grad_norm": 0.054022327065467834, "learning_rate": 0.01, "loss": 1.985, "step": 63033 }, { "epoch": 6.473197781885397, "grad_norm": 0.06177505478262901, "learning_rate": 0.01, "loss": 1.9894, "step": 63036 }, { "epoch": 6.473505853357979, "grad_norm": 0.10999149829149246, "learning_rate": 0.01, "loss": 1.9949, "step": 63039 }, { "epoch": 6.47381392483056, "grad_norm": 0.130665123462677, "learning_rate": 0.01, "loss": 1.9813, "step": 63042 }, { "epoch": 6.4741219963031424, "grad_norm": 0.15091292560100555, "learning_rate": 0.01, "loss": 2.008, "step": 63045 }, { "epoch": 6.474430067775724, "grad_norm": 0.1041717529296875, "learning_rate": 0.01, "loss": 1.9666, "step": 63048 }, { "epoch": 6.474738139248306, "grad_norm": 0.05246599763631821, "learning_rate": 0.01, "loss": 1.9521, "step": 63051 }, { "epoch": 6.475046210720887, "grad_norm": 0.04646655172109604, "learning_rate": 0.01, "loss": 1.998, "step": 63054 }, { "epoch": 6.475354282193469, "grad_norm": 0.047972384840250015, "learning_rate": 0.01, "loss": 1.9716, "step": 63057 }, { "epoch": 6.475662353666051, "grad_norm": 0.057053469121456146, "learning_rate": 0.01, "loss": 1.983, "step": 63060 }, { "epoch": 6.475970425138632, "grad_norm": 0.044478513300418854, "learning_rate": 0.01, "loss": 1.9598, "step": 63063 }, { "epoch": 6.476278496611214, "grad_norm": 0.13227659463882446, "learning_rate": 0.01, "loss": 1.9661, "step": 63066 }, { "epoch": 6.476586568083795, "grad_norm": 0.09201903641223907, "learning_rate": 0.01, "loss": 1.9711, "step": 63069 }, { "epoch": 6.476894639556377, "grad_norm": 0.07452027499675751, "learning_rate": 0.01, "loss": 1.9585, "step": 63072 }, { "epoch": 6.4772027110289585, "grad_norm": 0.04616081714630127, "learning_rate": 0.01, "loss": 1.9762, "step": 63075 }, { "epoch": 6.4775107825015406, "grad_norm": 0.047700148075819016, "learning_rate": 0.01, "loss": 1.961, "step": 63078 }, { "epoch": 6.477818853974122, "grad_norm": 0.0366378016769886, "learning_rate": 0.01, "loss": 2.0038, "step": 63081 }, { "epoch": 6.478126925446704, "grad_norm": 0.037750110030174255, "learning_rate": 0.01, "loss": 1.98, "step": 63084 }, { "epoch": 6.478434996919285, "grad_norm": 0.05896897614002228, "learning_rate": 0.01, "loss": 1.9679, "step": 63087 }, { "epoch": 6.478743068391867, "grad_norm": 0.13248635828495026, "learning_rate": 0.01, "loss": 1.9915, "step": 63090 }, { "epoch": 6.479051139864448, "grad_norm": 0.14373722672462463, "learning_rate": 0.01, "loss": 1.9855, "step": 63093 }, { "epoch": 6.47935921133703, "grad_norm": 0.07670046389102936, "learning_rate": 0.01, "loss": 1.9747, "step": 63096 }, { "epoch": 6.479667282809612, "grad_norm": 0.05786823108792305, "learning_rate": 0.01, "loss": 1.99, "step": 63099 }, { "epoch": 6.479975354282193, "grad_norm": 0.10580223053693771, "learning_rate": 0.01, "loss": 1.988, "step": 63102 }, { "epoch": 6.4802834257547755, "grad_norm": 0.09750565141439438, "learning_rate": 0.01, "loss": 1.9609, "step": 63105 }, { "epoch": 6.480591497227357, "grad_norm": 0.07120301574468613, "learning_rate": 0.01, "loss": 1.9575, "step": 63108 }, { "epoch": 6.480899568699939, "grad_norm": 0.046966131776571274, "learning_rate": 0.01, "loss": 1.9867, "step": 63111 }, { "epoch": 6.48120764017252, "grad_norm": 0.04642505198717117, "learning_rate": 0.01, "loss": 1.9916, "step": 63114 }, { "epoch": 6.481515711645102, "grad_norm": 0.0937868282198906, "learning_rate": 0.01, "loss": 1.9597, "step": 63117 }, { "epoch": 6.481823783117683, "grad_norm": 0.11612875759601593, "learning_rate": 0.01, "loss": 1.9849, "step": 63120 }, { "epoch": 6.482131854590265, "grad_norm": 0.042997948825359344, "learning_rate": 0.01, "loss": 2.0063, "step": 63123 }, { "epoch": 6.482439926062846, "grad_norm": 0.10609213262796402, "learning_rate": 0.01, "loss": 1.9562, "step": 63126 }, { "epoch": 6.482747997535428, "grad_norm": 0.09225320816040039, "learning_rate": 0.01, "loss": 1.9682, "step": 63129 }, { "epoch": 6.4830560690080095, "grad_norm": 0.053099583834409714, "learning_rate": 0.01, "loss": 1.993, "step": 63132 }, { "epoch": 6.4833641404805915, "grad_norm": 0.04756970703601837, "learning_rate": 0.01, "loss": 1.9854, "step": 63135 }, { "epoch": 6.483672211953174, "grad_norm": 0.04079375043511391, "learning_rate": 0.01, "loss": 1.9496, "step": 63138 }, { "epoch": 6.483980283425755, "grad_norm": 0.08000772446393967, "learning_rate": 0.01, "loss": 2.0007, "step": 63141 }, { "epoch": 6.484288354898337, "grad_norm": 0.05502250790596008, "learning_rate": 0.01, "loss": 1.9906, "step": 63144 }, { "epoch": 6.484596426370918, "grad_norm": 0.07607629895210266, "learning_rate": 0.01, "loss": 1.965, "step": 63147 }, { "epoch": 6.4849044978435, "grad_norm": 0.0457836389541626, "learning_rate": 0.01, "loss": 1.9916, "step": 63150 }, { "epoch": 6.485212569316081, "grad_norm": 0.04571022093296051, "learning_rate": 0.01, "loss": 1.9824, "step": 63153 }, { "epoch": 6.485520640788663, "grad_norm": 0.054583076387643814, "learning_rate": 0.01, "loss": 1.9734, "step": 63156 }, { "epoch": 6.485828712261244, "grad_norm": 0.08559385687112808, "learning_rate": 0.01, "loss": 1.9781, "step": 63159 }, { "epoch": 6.486136783733826, "grad_norm": 0.06536659598350525, "learning_rate": 0.01, "loss": 1.9888, "step": 63162 }, { "epoch": 6.486444855206408, "grad_norm": 0.031179916113615036, "learning_rate": 0.01, "loss": 1.9609, "step": 63165 }, { "epoch": 6.48675292667899, "grad_norm": 0.038442451506853104, "learning_rate": 0.01, "loss": 1.9676, "step": 63168 }, { "epoch": 6.487060998151571, "grad_norm": 0.05742299556732178, "learning_rate": 0.01, "loss": 1.9717, "step": 63171 }, { "epoch": 6.487369069624153, "grad_norm": 0.07068540900945663, "learning_rate": 0.01, "loss": 1.9797, "step": 63174 }, { "epoch": 6.487677141096734, "grad_norm": 0.041148263961076736, "learning_rate": 0.01, "loss": 1.9949, "step": 63177 }, { "epoch": 6.487985212569316, "grad_norm": 0.08058618754148483, "learning_rate": 0.01, "loss": 1.9969, "step": 63180 }, { "epoch": 6.488293284041898, "grad_norm": 0.10617338865995407, "learning_rate": 0.01, "loss": 1.9701, "step": 63183 }, { "epoch": 6.488601355514479, "grad_norm": 0.13301149010658264, "learning_rate": 0.01, "loss": 1.9829, "step": 63186 }, { "epoch": 6.488909426987061, "grad_norm": 0.12188655138015747, "learning_rate": 0.01, "loss": 1.9722, "step": 63189 }, { "epoch": 6.4892174984596425, "grad_norm": 0.07612743228673935, "learning_rate": 0.01, "loss": 2.0, "step": 63192 }, { "epoch": 6.4895255699322245, "grad_norm": 0.08802825957536697, "learning_rate": 0.01, "loss": 1.9677, "step": 63195 }, { "epoch": 6.489833641404806, "grad_norm": 0.05692920833826065, "learning_rate": 0.01, "loss": 1.985, "step": 63198 }, { "epoch": 6.490141712877388, "grad_norm": 0.0412018783390522, "learning_rate": 0.01, "loss": 2.0093, "step": 63201 }, { "epoch": 6.490449784349969, "grad_norm": 0.0954689234495163, "learning_rate": 0.01, "loss": 1.9927, "step": 63204 }, { "epoch": 6.490757855822551, "grad_norm": 0.11650601774454117, "learning_rate": 0.01, "loss": 1.9773, "step": 63207 }, { "epoch": 6.491065927295132, "grad_norm": 0.06705356389284134, "learning_rate": 0.01, "loss": 1.9719, "step": 63210 }, { "epoch": 6.491373998767714, "grad_norm": 0.04871569201350212, "learning_rate": 0.01, "loss": 1.9835, "step": 63213 }, { "epoch": 6.491682070240296, "grad_norm": 0.041996728628873825, "learning_rate": 0.01, "loss": 1.9822, "step": 63216 }, { "epoch": 6.491990141712877, "grad_norm": 0.03821207210421562, "learning_rate": 0.01, "loss": 1.9666, "step": 63219 }, { "epoch": 6.492298213185459, "grad_norm": 0.03589041158556938, "learning_rate": 0.01, "loss": 1.981, "step": 63222 }, { "epoch": 6.492606284658041, "grad_norm": 0.05628860369324684, "learning_rate": 0.01, "loss": 1.9676, "step": 63225 }, { "epoch": 6.492914356130623, "grad_norm": 0.10156149417161942, "learning_rate": 0.01, "loss": 1.9782, "step": 63228 }, { "epoch": 6.493222427603204, "grad_norm": 0.1743369847536087, "learning_rate": 0.01, "loss": 1.9564, "step": 63231 }, { "epoch": 6.493530499075786, "grad_norm": 0.13694548606872559, "learning_rate": 0.01, "loss": 1.9924, "step": 63234 }, { "epoch": 6.493838570548367, "grad_norm": 0.12643152475357056, "learning_rate": 0.01, "loss": 1.979, "step": 63237 }, { "epoch": 6.494146642020949, "grad_norm": 0.14127200841903687, "learning_rate": 0.01, "loss": 1.9621, "step": 63240 }, { "epoch": 6.49445471349353, "grad_norm": 0.05180880054831505, "learning_rate": 0.01, "loss": 1.9607, "step": 63243 }, { "epoch": 6.494762784966112, "grad_norm": 0.050564274191856384, "learning_rate": 0.01, "loss": 1.9772, "step": 63246 }, { "epoch": 6.495070856438693, "grad_norm": 0.09520803391933441, "learning_rate": 0.01, "loss": 1.9697, "step": 63249 }, { "epoch": 6.4953789279112755, "grad_norm": 0.09522721916437149, "learning_rate": 0.01, "loss": 1.957, "step": 63252 }, { "epoch": 6.495686999383857, "grad_norm": 0.1086997389793396, "learning_rate": 0.01, "loss": 1.9849, "step": 63255 }, { "epoch": 6.495995070856439, "grad_norm": 0.08121590316295624, "learning_rate": 0.01, "loss": 1.966, "step": 63258 }, { "epoch": 6.496303142329021, "grad_norm": 0.06703486293554306, "learning_rate": 0.01, "loss": 1.9985, "step": 63261 }, { "epoch": 6.496611213801602, "grad_norm": 0.04283798858523369, "learning_rate": 0.01, "loss": 1.9757, "step": 63264 }, { "epoch": 6.496919285274184, "grad_norm": 0.04943424090743065, "learning_rate": 0.01, "loss": 1.9672, "step": 63267 }, { "epoch": 6.497227356746765, "grad_norm": 0.0578305721282959, "learning_rate": 0.01, "loss": 1.9733, "step": 63270 }, { "epoch": 6.497535428219347, "grad_norm": 0.0359860323369503, "learning_rate": 0.01, "loss": 1.9931, "step": 63273 }, { "epoch": 6.497843499691928, "grad_norm": 0.04893532767891884, "learning_rate": 0.01, "loss": 1.9842, "step": 63276 }, { "epoch": 6.49815157116451, "grad_norm": 0.04525068402290344, "learning_rate": 0.01, "loss": 1.9695, "step": 63279 }, { "epoch": 6.4984596426370915, "grad_norm": 0.04810551553964615, "learning_rate": 0.01, "loss": 1.9921, "step": 63282 }, { "epoch": 6.498767714109674, "grad_norm": 0.05285782366991043, "learning_rate": 0.01, "loss": 1.9744, "step": 63285 }, { "epoch": 6.499075785582255, "grad_norm": 0.032118529081344604, "learning_rate": 0.01, "loss": 1.9732, "step": 63288 }, { "epoch": 6.499383857054837, "grad_norm": 0.049875080585479736, "learning_rate": 0.01, "loss": 1.9758, "step": 63291 }, { "epoch": 6.499691928527418, "grad_norm": 0.04658648371696472, "learning_rate": 0.01, "loss": 2.0009, "step": 63294 }, { "epoch": 6.5, "grad_norm": 0.05129887908697128, "learning_rate": 0.01, "loss": 1.9754, "step": 63297 }, { "epoch": 6.500308071472581, "grad_norm": 0.07968533784151077, "learning_rate": 0.01, "loss": 1.9645, "step": 63300 }, { "epoch": 6.500616142945163, "grad_norm": 0.08152198046445847, "learning_rate": 0.01, "loss": 1.9582, "step": 63303 }, { "epoch": 6.500924214417745, "grad_norm": 0.07405789196491241, "learning_rate": 0.01, "loss": 1.9891, "step": 63306 }, { "epoch": 6.501232285890326, "grad_norm": 0.07860677689313889, "learning_rate": 0.01, "loss": 1.9587, "step": 63309 }, { "epoch": 6.5015403573629085, "grad_norm": 0.051370490342378616, "learning_rate": 0.01, "loss": 1.9809, "step": 63312 }, { "epoch": 6.50184842883549, "grad_norm": 0.04912220686674118, "learning_rate": 0.01, "loss": 1.9814, "step": 63315 }, { "epoch": 6.502156500308072, "grad_norm": 0.042900361120700836, "learning_rate": 0.01, "loss": 1.981, "step": 63318 }, { "epoch": 6.502464571780653, "grad_norm": 0.16261926293373108, "learning_rate": 0.01, "loss": 1.9983, "step": 63321 }, { "epoch": 6.502772643253235, "grad_norm": 0.11455561220645905, "learning_rate": 0.01, "loss": 1.9578, "step": 63324 }, { "epoch": 6.503080714725816, "grad_norm": 0.04585309699177742, "learning_rate": 0.01, "loss": 1.9609, "step": 63327 }, { "epoch": 6.503388786198398, "grad_norm": 0.03483370691537857, "learning_rate": 0.01, "loss": 1.9664, "step": 63330 }, { "epoch": 6.503696857670979, "grad_norm": 0.08230151981115341, "learning_rate": 0.01, "loss": 1.9607, "step": 63333 }, { "epoch": 6.504004929143561, "grad_norm": 0.06978236138820648, "learning_rate": 0.01, "loss": 1.9593, "step": 63336 }, { "epoch": 6.504313000616143, "grad_norm": 0.03723878785967827, "learning_rate": 0.01, "loss": 1.9832, "step": 63339 }, { "epoch": 6.5046210720887245, "grad_norm": 0.0846422016620636, "learning_rate": 0.01, "loss": 1.9824, "step": 63342 }, { "epoch": 6.504929143561307, "grad_norm": 0.06443771719932556, "learning_rate": 0.01, "loss": 1.9698, "step": 63345 }, { "epoch": 6.505237215033888, "grad_norm": 0.07399289309978485, "learning_rate": 0.01, "loss": 2.0061, "step": 63348 }, { "epoch": 6.50554528650647, "grad_norm": 0.08382997661828995, "learning_rate": 0.01, "loss": 1.9713, "step": 63351 }, { "epoch": 6.505853357979051, "grad_norm": 0.0767715722322464, "learning_rate": 0.01, "loss": 1.9787, "step": 63354 }, { "epoch": 6.506161429451633, "grad_norm": 0.07842829078435898, "learning_rate": 0.01, "loss": 1.9532, "step": 63357 }, { "epoch": 6.506469500924214, "grad_norm": 0.04764385148882866, "learning_rate": 0.01, "loss": 1.9685, "step": 63360 }, { "epoch": 6.506777572396796, "grad_norm": 0.08968043327331543, "learning_rate": 0.01, "loss": 1.9704, "step": 63363 }, { "epoch": 6.507085643869377, "grad_norm": 0.07326337695121765, "learning_rate": 0.01, "loss": 1.9575, "step": 63366 }, { "epoch": 6.507393715341959, "grad_norm": 0.04753720387816429, "learning_rate": 0.01, "loss": 1.9866, "step": 63369 }, { "epoch": 6.507701786814541, "grad_norm": 0.05614113807678223, "learning_rate": 0.01, "loss": 1.9876, "step": 63372 }, { "epoch": 6.508009858287123, "grad_norm": 0.04377448558807373, "learning_rate": 0.01, "loss": 1.971, "step": 63375 }, { "epoch": 6.508317929759704, "grad_norm": 0.03820153325796127, "learning_rate": 0.01, "loss": 2.0105, "step": 63378 }, { "epoch": 6.508626001232286, "grad_norm": 0.03955162316560745, "learning_rate": 0.01, "loss": 1.9584, "step": 63381 }, { "epoch": 6.508934072704868, "grad_norm": 0.03919532150030136, "learning_rate": 0.01, "loss": 1.9572, "step": 63384 }, { "epoch": 6.509242144177449, "grad_norm": 0.13084079325199127, "learning_rate": 0.01, "loss": 1.9801, "step": 63387 }, { "epoch": 6.509550215650031, "grad_norm": 0.05490676313638687, "learning_rate": 0.01, "loss": 1.9969, "step": 63390 }, { "epoch": 6.509858287122612, "grad_norm": 0.048263441771268845, "learning_rate": 0.01, "loss": 1.9959, "step": 63393 }, { "epoch": 6.510166358595194, "grad_norm": 0.07833252102136612, "learning_rate": 0.01, "loss": 1.9664, "step": 63396 }, { "epoch": 6.5104744300677755, "grad_norm": 0.06575249880552292, "learning_rate": 0.01, "loss": 1.9734, "step": 63399 }, { "epoch": 6.5107825015403575, "grad_norm": 0.1300118863582611, "learning_rate": 0.01, "loss": 1.9681, "step": 63402 }, { "epoch": 6.511090573012939, "grad_norm": 0.05606196075677872, "learning_rate": 0.01, "loss": 1.9852, "step": 63405 }, { "epoch": 6.511398644485521, "grad_norm": 0.04275618493556976, "learning_rate": 0.01, "loss": 2.0126, "step": 63408 }, { "epoch": 6.511706715958102, "grad_norm": 0.11378994584083557, "learning_rate": 0.01, "loss": 1.9589, "step": 63411 }, { "epoch": 6.512014787430684, "grad_norm": 0.04268041253089905, "learning_rate": 0.01, "loss": 1.9571, "step": 63414 }, { "epoch": 6.512322858903266, "grad_norm": 0.18747419118881226, "learning_rate": 0.01, "loss": 1.9748, "step": 63417 }, { "epoch": 6.512630930375847, "grad_norm": 0.10926712304353714, "learning_rate": 0.01, "loss": 1.9855, "step": 63420 }, { "epoch": 6.512939001848429, "grad_norm": 0.12267261743545532, "learning_rate": 0.01, "loss": 1.9852, "step": 63423 }, { "epoch": 6.51324707332101, "grad_norm": 0.06755910813808441, "learning_rate": 0.01, "loss": 1.9691, "step": 63426 }, { "epoch": 6.513555144793592, "grad_norm": 0.06156865134835243, "learning_rate": 0.01, "loss": 2.0032, "step": 63429 }, { "epoch": 6.513863216266174, "grad_norm": 0.0420864075422287, "learning_rate": 0.01, "loss": 1.9596, "step": 63432 }, { "epoch": 6.514171287738756, "grad_norm": 0.037585970014333725, "learning_rate": 0.01, "loss": 1.9785, "step": 63435 }, { "epoch": 6.514479359211337, "grad_norm": 0.042937878519296646, "learning_rate": 0.01, "loss": 1.9753, "step": 63438 }, { "epoch": 6.514787430683919, "grad_norm": 0.04278058186173439, "learning_rate": 0.01, "loss": 1.9666, "step": 63441 }, { "epoch": 6.5150955021565, "grad_norm": 0.053777556866407394, "learning_rate": 0.01, "loss": 1.9675, "step": 63444 }, { "epoch": 6.515403573629082, "grad_norm": 0.11828344315290451, "learning_rate": 0.01, "loss": 1.9974, "step": 63447 }, { "epoch": 6.515711645101663, "grad_norm": 0.07692524790763855, "learning_rate": 0.01, "loss": 1.9837, "step": 63450 }, { "epoch": 6.516019716574245, "grad_norm": 0.036424048244953156, "learning_rate": 0.01, "loss": 1.9669, "step": 63453 }, { "epoch": 6.516327788046826, "grad_norm": 0.051881253719329834, "learning_rate": 0.01, "loss": 1.9613, "step": 63456 }, { "epoch": 6.5166358595194085, "grad_norm": 0.09392795711755753, "learning_rate": 0.01, "loss": 1.9757, "step": 63459 }, { "epoch": 6.5169439309919905, "grad_norm": 0.05411149188876152, "learning_rate": 0.01, "loss": 1.9763, "step": 63462 }, { "epoch": 6.517252002464572, "grad_norm": 0.07104165852069855, "learning_rate": 0.01, "loss": 1.9676, "step": 63465 }, { "epoch": 6.517560073937154, "grad_norm": 0.0653427243232727, "learning_rate": 0.01, "loss": 1.9812, "step": 63468 }, { "epoch": 6.517868145409735, "grad_norm": 0.07316904515028, "learning_rate": 0.01, "loss": 1.9809, "step": 63471 }, { "epoch": 6.518176216882317, "grad_norm": 0.08225788921117783, "learning_rate": 0.01, "loss": 1.968, "step": 63474 }, { "epoch": 6.518484288354898, "grad_norm": 0.07449998706579208, "learning_rate": 0.01, "loss": 1.9587, "step": 63477 }, { "epoch": 6.51879235982748, "grad_norm": 0.07791251689195633, "learning_rate": 0.01, "loss": 1.9827, "step": 63480 }, { "epoch": 6.519100431300061, "grad_norm": 0.1233309879899025, "learning_rate": 0.01, "loss": 1.968, "step": 63483 }, { "epoch": 6.519408502772643, "grad_norm": 0.041390251368284225, "learning_rate": 0.01, "loss": 1.9804, "step": 63486 }, { "epoch": 6.5197165742452245, "grad_norm": 0.039635319262742996, "learning_rate": 0.01, "loss": 1.9641, "step": 63489 }, { "epoch": 6.520024645717807, "grad_norm": 0.03680063784122467, "learning_rate": 0.01, "loss": 1.9501, "step": 63492 }, { "epoch": 6.520332717190389, "grad_norm": 0.08327323198318481, "learning_rate": 0.01, "loss": 1.9728, "step": 63495 }, { "epoch": 6.52064078866297, "grad_norm": 0.10368441045284271, "learning_rate": 0.01, "loss": 1.9563, "step": 63498 }, { "epoch": 6.520948860135551, "grad_norm": 0.05991170555353165, "learning_rate": 0.01, "loss": 1.9843, "step": 63501 }, { "epoch": 6.521256931608133, "grad_norm": 0.05868203192949295, "learning_rate": 0.01, "loss": 2.0054, "step": 63504 }, { "epoch": 6.521565003080715, "grad_norm": 0.04409966990351677, "learning_rate": 0.01, "loss": 1.9698, "step": 63507 }, { "epoch": 6.521873074553296, "grad_norm": 0.04939546808600426, "learning_rate": 0.01, "loss": 2.0037, "step": 63510 }, { "epoch": 6.522181146025878, "grad_norm": 0.041482388973236084, "learning_rate": 0.01, "loss": 1.9707, "step": 63513 }, { "epoch": 6.5224892174984594, "grad_norm": 0.03503831848502159, "learning_rate": 0.01, "loss": 1.9745, "step": 63516 }, { "epoch": 6.5227972889710415, "grad_norm": 0.07293230295181274, "learning_rate": 0.01, "loss": 1.9793, "step": 63519 }, { "epoch": 6.523105360443623, "grad_norm": 0.13121885061264038, "learning_rate": 0.01, "loss": 1.9747, "step": 63522 }, { "epoch": 6.523413431916205, "grad_norm": 0.05674733221530914, "learning_rate": 0.01, "loss": 1.9724, "step": 63525 }, { "epoch": 6.523721503388786, "grad_norm": 0.13403059542179108, "learning_rate": 0.01, "loss": 1.9897, "step": 63528 }, { "epoch": 6.524029574861368, "grad_norm": 0.05892037972807884, "learning_rate": 0.01, "loss": 1.9806, "step": 63531 }, { "epoch": 6.524337646333949, "grad_norm": 0.05163764953613281, "learning_rate": 0.01, "loss": 1.9752, "step": 63534 }, { "epoch": 6.524645717806531, "grad_norm": 0.10264305770397186, "learning_rate": 0.01, "loss": 1.9814, "step": 63537 }, { "epoch": 6.524953789279113, "grad_norm": 0.060135189443826675, "learning_rate": 0.01, "loss": 1.9778, "step": 63540 }, { "epoch": 6.525261860751694, "grad_norm": 0.033466801047325134, "learning_rate": 0.01, "loss": 1.9649, "step": 63543 }, { "epoch": 6.525569932224276, "grad_norm": 0.05220318213105202, "learning_rate": 0.01, "loss": 1.9861, "step": 63546 }, { "epoch": 6.5258780036968576, "grad_norm": 0.04468940570950508, "learning_rate": 0.01, "loss": 1.9974, "step": 63549 }, { "epoch": 6.52618607516944, "grad_norm": 0.07253078371286392, "learning_rate": 0.01, "loss": 1.9834, "step": 63552 }, { "epoch": 6.526494146642021, "grad_norm": 0.039232298731803894, "learning_rate": 0.01, "loss": 1.9901, "step": 63555 }, { "epoch": 6.526802218114603, "grad_norm": 0.08601675182580948, "learning_rate": 0.01, "loss": 1.9845, "step": 63558 }, { "epoch": 6.527110289587184, "grad_norm": 0.11496642231941223, "learning_rate": 0.01, "loss": 1.9742, "step": 63561 }, { "epoch": 6.527418361059766, "grad_norm": 0.21819603443145752, "learning_rate": 0.01, "loss": 2.0011, "step": 63564 }, { "epoch": 6.527726432532347, "grad_norm": 0.15641893446445465, "learning_rate": 0.01, "loss": 1.9666, "step": 63567 }, { "epoch": 6.528034504004929, "grad_norm": 0.07390442490577698, "learning_rate": 0.01, "loss": 1.9679, "step": 63570 }, { "epoch": 6.52834257547751, "grad_norm": 0.03637871518731117, "learning_rate": 0.01, "loss": 1.9839, "step": 63573 }, { "epoch": 6.5286506469500925, "grad_norm": 0.05536114424467087, "learning_rate": 0.01, "loss": 1.9713, "step": 63576 }, { "epoch": 6.528958718422674, "grad_norm": 0.08773189783096313, "learning_rate": 0.01, "loss": 1.9722, "step": 63579 }, { "epoch": 6.529266789895256, "grad_norm": 0.05206519737839699, "learning_rate": 0.01, "loss": 2.0209, "step": 63582 }, { "epoch": 6.529574861367838, "grad_norm": 0.12326787412166595, "learning_rate": 0.01, "loss": 2.0021, "step": 63585 }, { "epoch": 6.529882932840419, "grad_norm": 0.10255524516105652, "learning_rate": 0.01, "loss": 1.9552, "step": 63588 }, { "epoch": 6.530191004313001, "grad_norm": 0.08496958762407303, "learning_rate": 0.01, "loss": 1.9621, "step": 63591 }, { "epoch": 6.530499075785582, "grad_norm": 0.040596868842840195, "learning_rate": 0.01, "loss": 1.9465, "step": 63594 }, { "epoch": 6.530807147258164, "grad_norm": 0.033690448850393295, "learning_rate": 0.01, "loss": 1.967, "step": 63597 }, { "epoch": 6.531115218730745, "grad_norm": 0.0766686350107193, "learning_rate": 0.01, "loss": 1.9839, "step": 63600 }, { "epoch": 6.531423290203327, "grad_norm": 0.12199605256319046, "learning_rate": 0.01, "loss": 1.982, "step": 63603 }, { "epoch": 6.5317313616759085, "grad_norm": 0.03897617757320404, "learning_rate": 0.01, "loss": 1.9975, "step": 63606 }, { "epoch": 6.532039433148491, "grad_norm": 0.05026644468307495, "learning_rate": 0.01, "loss": 2.0126, "step": 63609 }, { "epoch": 6.532347504621072, "grad_norm": 0.03886674344539642, "learning_rate": 0.01, "loss": 1.9843, "step": 63612 }, { "epoch": 6.532655576093654, "grad_norm": 0.09314893931150436, "learning_rate": 0.01, "loss": 1.9715, "step": 63615 }, { "epoch": 6.532963647566236, "grad_norm": 0.048525359481573105, "learning_rate": 0.01, "loss": 1.9957, "step": 63618 }, { "epoch": 6.533271719038817, "grad_norm": 0.027339305728673935, "learning_rate": 0.01, "loss": 1.9555, "step": 63621 }, { "epoch": 6.533579790511399, "grad_norm": 0.0443040207028389, "learning_rate": 0.01, "loss": 1.9848, "step": 63624 }, { "epoch": 6.53388786198398, "grad_norm": 0.06850685924291611, "learning_rate": 0.01, "loss": 1.9784, "step": 63627 }, { "epoch": 6.534195933456562, "grad_norm": 0.06264297664165497, "learning_rate": 0.01, "loss": 1.9552, "step": 63630 }, { "epoch": 6.534504004929143, "grad_norm": 0.06047286093235016, "learning_rate": 0.01, "loss": 1.9898, "step": 63633 }, { "epoch": 6.5348120764017255, "grad_norm": 0.08621931076049805, "learning_rate": 0.01, "loss": 1.9852, "step": 63636 }, { "epoch": 6.535120147874307, "grad_norm": 0.06598355621099472, "learning_rate": 0.01, "loss": 1.9792, "step": 63639 }, { "epoch": 6.535428219346889, "grad_norm": 0.04652687907218933, "learning_rate": 0.01, "loss": 1.9644, "step": 63642 }, { "epoch": 6.53573629081947, "grad_norm": 0.04016298055648804, "learning_rate": 0.01, "loss": 1.9694, "step": 63645 }, { "epoch": 6.536044362292052, "grad_norm": 0.039485424757003784, "learning_rate": 0.01, "loss": 1.928, "step": 63648 }, { "epoch": 6.536352433764633, "grad_norm": 0.03557172790169716, "learning_rate": 0.01, "loss": 1.96, "step": 63651 }, { "epoch": 6.536660505237215, "grad_norm": 0.0416797399520874, "learning_rate": 0.01, "loss": 1.9884, "step": 63654 }, { "epoch": 6.536968576709796, "grad_norm": 0.09189657121896744, "learning_rate": 0.01, "loss": 1.98, "step": 63657 }, { "epoch": 6.537276648182378, "grad_norm": 0.04484305903315544, "learning_rate": 0.01, "loss": 2.0131, "step": 63660 }, { "epoch": 6.53758471965496, "grad_norm": 0.059794455766677856, "learning_rate": 0.01, "loss": 2.0018, "step": 63663 }, { "epoch": 6.5378927911275415, "grad_norm": 0.05551350116729736, "learning_rate": 0.01, "loss": 1.9546, "step": 63666 }, { "epoch": 6.538200862600124, "grad_norm": 0.19956345856189728, "learning_rate": 0.01, "loss": 1.961, "step": 63669 }, { "epoch": 6.538508934072705, "grad_norm": 0.0885189101099968, "learning_rate": 0.01, "loss": 1.9769, "step": 63672 }, { "epoch": 6.538817005545287, "grad_norm": 0.05539345368742943, "learning_rate": 0.01, "loss": 1.9832, "step": 63675 }, { "epoch": 6.539125077017868, "grad_norm": 0.037228211760520935, "learning_rate": 0.01, "loss": 1.9787, "step": 63678 }, { "epoch": 6.53943314849045, "grad_norm": 0.03209219127893448, "learning_rate": 0.01, "loss": 1.9876, "step": 63681 }, { "epoch": 6.539741219963031, "grad_norm": 0.029509764164686203, "learning_rate": 0.01, "loss": 1.9849, "step": 63684 }, { "epoch": 6.540049291435613, "grad_norm": 0.052676524966955185, "learning_rate": 0.01, "loss": 1.997, "step": 63687 }, { "epoch": 6.540357362908194, "grad_norm": 0.0744934156537056, "learning_rate": 0.01, "loss": 1.9809, "step": 63690 }, { "epoch": 6.540665434380776, "grad_norm": 0.05411761999130249, "learning_rate": 0.01, "loss": 1.9748, "step": 63693 }, { "epoch": 6.5409735058533585, "grad_norm": 0.04553327336907387, "learning_rate": 0.01, "loss": 1.9747, "step": 63696 }, { "epoch": 6.54128157732594, "grad_norm": 0.16345827281475067, "learning_rate": 0.01, "loss": 1.9884, "step": 63699 }, { "epoch": 6.541589648798521, "grad_norm": 0.05779232829809189, "learning_rate": 0.01, "loss": 1.97, "step": 63702 }, { "epoch": 6.541897720271103, "grad_norm": 0.032575078308582306, "learning_rate": 0.01, "loss": 1.9711, "step": 63705 }, { "epoch": 6.542205791743685, "grad_norm": 0.043234266340732574, "learning_rate": 0.01, "loss": 1.987, "step": 63708 }, { "epoch": 6.542513863216266, "grad_norm": 0.050926946103572845, "learning_rate": 0.01, "loss": 1.999, "step": 63711 }, { "epoch": 6.542821934688848, "grad_norm": 0.040199290961027145, "learning_rate": 0.01, "loss": 1.9849, "step": 63714 }, { "epoch": 6.543130006161429, "grad_norm": 0.0373854786157608, "learning_rate": 0.01, "loss": 1.9813, "step": 63717 }, { "epoch": 6.543438077634011, "grad_norm": 0.04420515522360802, "learning_rate": 0.01, "loss": 1.9614, "step": 63720 }, { "epoch": 6.5437461491065925, "grad_norm": 0.10335779935121536, "learning_rate": 0.01, "loss": 2.0047, "step": 63723 }, { "epoch": 6.5440542205791745, "grad_norm": 0.08750100433826447, "learning_rate": 0.01, "loss": 2.0028, "step": 63726 }, { "epoch": 6.544362292051756, "grad_norm": 0.08968067169189453, "learning_rate": 0.01, "loss": 2.003, "step": 63729 }, { "epoch": 6.544670363524338, "grad_norm": 0.05931568518280983, "learning_rate": 0.01, "loss": 1.9806, "step": 63732 }, { "epoch": 6.544978434996919, "grad_norm": 0.04726015031337738, "learning_rate": 0.01, "loss": 1.9871, "step": 63735 }, { "epoch": 6.545286506469501, "grad_norm": 0.048892538994550705, "learning_rate": 0.01, "loss": 1.9894, "step": 63738 }, { "epoch": 6.545594577942083, "grad_norm": 0.042674802243709564, "learning_rate": 0.01, "loss": 1.9829, "step": 63741 }, { "epoch": 6.545902649414664, "grad_norm": 0.11424809694290161, "learning_rate": 0.01, "loss": 1.9681, "step": 63744 }, { "epoch": 6.546210720887246, "grad_norm": 0.037352122366428375, "learning_rate": 0.01, "loss": 1.9509, "step": 63747 }, { "epoch": 6.546518792359827, "grad_norm": 0.04247456043958664, "learning_rate": 0.01, "loss": 1.9976, "step": 63750 }, { "epoch": 6.546826863832409, "grad_norm": 0.05194523185491562, "learning_rate": 0.01, "loss": 1.9767, "step": 63753 }, { "epoch": 6.547134935304991, "grad_norm": 0.055400025099515915, "learning_rate": 0.01, "loss": 1.9541, "step": 63756 }, { "epoch": 6.547443006777573, "grad_norm": 0.06295850872993469, "learning_rate": 0.01, "loss": 1.9756, "step": 63759 }, { "epoch": 6.547751078250154, "grad_norm": 0.05281165987253189, "learning_rate": 0.01, "loss": 1.9953, "step": 63762 }, { "epoch": 6.548059149722736, "grad_norm": 0.04838143289089203, "learning_rate": 0.01, "loss": 1.9818, "step": 63765 }, { "epoch": 6.548367221195317, "grad_norm": 0.03333796560764313, "learning_rate": 0.01, "loss": 1.9677, "step": 63768 }, { "epoch": 6.548675292667899, "grad_norm": 0.029952581971883774, "learning_rate": 0.01, "loss": 1.9974, "step": 63771 }, { "epoch": 6.54898336414048, "grad_norm": 0.03036186844110489, "learning_rate": 0.01, "loss": 1.9715, "step": 63774 }, { "epoch": 6.549291435613062, "grad_norm": 0.06050482019782066, "learning_rate": 0.01, "loss": 1.9668, "step": 63777 }, { "epoch": 6.549599507085643, "grad_norm": 0.08855880796909332, "learning_rate": 0.01, "loss": 1.9847, "step": 63780 }, { "epoch": 6.5499075785582255, "grad_norm": 0.08679243922233582, "learning_rate": 0.01, "loss": 1.9938, "step": 63783 }, { "epoch": 6.5502156500308075, "grad_norm": 0.04234171286225319, "learning_rate": 0.01, "loss": 1.9808, "step": 63786 }, { "epoch": 6.550523721503389, "grad_norm": 0.029922012239694595, "learning_rate": 0.01, "loss": 1.9517, "step": 63789 }, { "epoch": 6.550831792975971, "grad_norm": 0.11024188250303268, "learning_rate": 0.01, "loss": 1.9659, "step": 63792 }, { "epoch": 6.551139864448552, "grad_norm": 0.03447732329368591, "learning_rate": 0.01, "loss": 1.9677, "step": 63795 }, { "epoch": 6.551447935921134, "grad_norm": 0.04188188910484314, "learning_rate": 0.01, "loss": 1.98, "step": 63798 }, { "epoch": 6.551756007393715, "grad_norm": 0.040945570915937424, "learning_rate": 0.01, "loss": 1.9852, "step": 63801 }, { "epoch": 6.552064078866297, "grad_norm": 0.07144046574831009, "learning_rate": 0.01, "loss": 1.98, "step": 63804 }, { "epoch": 6.552372150338878, "grad_norm": 0.043573856353759766, "learning_rate": 0.01, "loss": 1.969, "step": 63807 }, { "epoch": 6.55268022181146, "grad_norm": 0.04076509550213814, "learning_rate": 0.01, "loss": 1.9931, "step": 63810 }, { "epoch": 6.5529882932840415, "grad_norm": 0.042045023292303085, "learning_rate": 0.01, "loss": 1.9812, "step": 63813 }, { "epoch": 6.553296364756624, "grad_norm": 0.047036029398441315, "learning_rate": 0.01, "loss": 1.9671, "step": 63816 }, { "epoch": 6.553604436229206, "grad_norm": 0.08206555247306824, "learning_rate": 0.01, "loss": 1.997, "step": 63819 }, { "epoch": 6.553912507701787, "grad_norm": 0.04344995692372322, "learning_rate": 0.01, "loss": 1.9864, "step": 63822 }, { "epoch": 6.554220579174368, "grad_norm": 0.05620454624295235, "learning_rate": 0.01, "loss": 1.9974, "step": 63825 }, { "epoch": 6.55452865064695, "grad_norm": 0.042846135795116425, "learning_rate": 0.01, "loss": 1.967, "step": 63828 }, { "epoch": 6.554836722119532, "grad_norm": 0.04379443824291229, "learning_rate": 0.01, "loss": 1.9887, "step": 63831 }, { "epoch": 6.555144793592113, "grad_norm": 0.050486281514167786, "learning_rate": 0.01, "loss": 1.9708, "step": 63834 }, { "epoch": 6.555452865064695, "grad_norm": 0.10757488012313843, "learning_rate": 0.01, "loss": 1.96, "step": 63837 }, { "epoch": 6.555760936537276, "grad_norm": 0.09059060364961624, "learning_rate": 0.01, "loss": 1.9658, "step": 63840 }, { "epoch": 6.5560690080098585, "grad_norm": 0.055471502244472504, "learning_rate": 0.01, "loss": 1.9772, "step": 63843 }, { "epoch": 6.55637707948244, "grad_norm": 0.10934463888406754, "learning_rate": 0.01, "loss": 1.9681, "step": 63846 }, { "epoch": 6.556685150955022, "grad_norm": 0.08888571709394455, "learning_rate": 0.01, "loss": 1.9686, "step": 63849 }, { "epoch": 6.556993222427603, "grad_norm": 0.11083342134952545, "learning_rate": 0.01, "loss": 1.9915, "step": 63852 }, { "epoch": 6.557301293900185, "grad_norm": 0.07405796647071838, "learning_rate": 0.01, "loss": 1.9707, "step": 63855 }, { "epoch": 6.557609365372766, "grad_norm": 0.08451070636510849, "learning_rate": 0.01, "loss": 2.0033, "step": 63858 }, { "epoch": 6.557917436845348, "grad_norm": 0.05954553931951523, "learning_rate": 0.01, "loss": 1.9894, "step": 63861 }, { "epoch": 6.55822550831793, "grad_norm": 0.06008210405707359, "learning_rate": 0.01, "loss": 1.9529, "step": 63864 }, { "epoch": 6.558533579790511, "grad_norm": 0.0861048474907875, "learning_rate": 0.01, "loss": 1.9767, "step": 63867 }, { "epoch": 6.558841651263093, "grad_norm": 0.06240614131093025, "learning_rate": 0.01, "loss": 1.9825, "step": 63870 }, { "epoch": 6.5591497227356745, "grad_norm": 0.05393391102552414, "learning_rate": 0.01, "loss": 1.9604, "step": 63873 }, { "epoch": 6.559457794208257, "grad_norm": 0.08556719869375229, "learning_rate": 0.01, "loss": 1.9926, "step": 63876 }, { "epoch": 6.559765865680838, "grad_norm": 0.045306041836738586, "learning_rate": 0.01, "loss": 1.9863, "step": 63879 }, { "epoch": 6.56007393715342, "grad_norm": 0.11380963772535324, "learning_rate": 0.01, "loss": 2.0034, "step": 63882 }, { "epoch": 6.560382008626001, "grad_norm": 0.04248273745179176, "learning_rate": 0.01, "loss": 1.9724, "step": 63885 }, { "epoch": 6.560690080098583, "grad_norm": 0.040910713374614716, "learning_rate": 0.01, "loss": 1.9818, "step": 63888 }, { "epoch": 6.560998151571164, "grad_norm": 0.046450424939394, "learning_rate": 0.01, "loss": 1.9842, "step": 63891 }, { "epoch": 6.561306223043746, "grad_norm": 0.07835663855075836, "learning_rate": 0.01, "loss": 1.973, "step": 63894 }, { "epoch": 6.561614294516328, "grad_norm": 0.04756823554635048, "learning_rate": 0.01, "loss": 1.9691, "step": 63897 }, { "epoch": 6.561922365988909, "grad_norm": 0.10655516386032104, "learning_rate": 0.01, "loss": 1.9948, "step": 63900 }, { "epoch": 6.562230437461491, "grad_norm": 0.08743315935134888, "learning_rate": 0.01, "loss": 1.9793, "step": 63903 }, { "epoch": 6.562538508934073, "grad_norm": 0.05087851360440254, "learning_rate": 0.01, "loss": 1.9687, "step": 63906 }, { "epoch": 6.562846580406655, "grad_norm": 0.03635932877659798, "learning_rate": 0.01, "loss": 1.9521, "step": 63909 }, { "epoch": 6.563154651879236, "grad_norm": 0.09845136106014252, "learning_rate": 0.01, "loss": 1.979, "step": 63912 }, { "epoch": 6.563462723351818, "grad_norm": 0.03554631769657135, "learning_rate": 0.01, "loss": 1.9448, "step": 63915 }, { "epoch": 6.563770794824399, "grad_norm": 0.053068485110998154, "learning_rate": 0.01, "loss": 1.9789, "step": 63918 }, { "epoch": 6.564078866296981, "grad_norm": 0.09752979129552841, "learning_rate": 0.01, "loss": 1.9749, "step": 63921 }, { "epoch": 6.564386937769562, "grad_norm": 0.10045387595891953, "learning_rate": 0.01, "loss": 1.9748, "step": 63924 }, { "epoch": 6.564695009242144, "grad_norm": 0.1082577034831047, "learning_rate": 0.01, "loss": 1.9848, "step": 63927 }, { "epoch": 6.5650030807147255, "grad_norm": 0.09065879136323929, "learning_rate": 0.01, "loss": 1.9871, "step": 63930 }, { "epoch": 6.5653111521873075, "grad_norm": 0.06496050953865051, "learning_rate": 0.01, "loss": 1.9963, "step": 63933 }, { "epoch": 6.565619223659889, "grad_norm": 0.07092882692813873, "learning_rate": 0.01, "loss": 1.9729, "step": 63936 }, { "epoch": 6.565927295132471, "grad_norm": 0.048295971006155014, "learning_rate": 0.01, "loss": 1.9828, "step": 63939 }, { "epoch": 6.566235366605053, "grad_norm": 0.045929424464702606, "learning_rate": 0.01, "loss": 1.9922, "step": 63942 }, { "epoch": 6.566543438077634, "grad_norm": 0.03663928061723709, "learning_rate": 0.01, "loss": 1.9835, "step": 63945 }, { "epoch": 6.566851509550216, "grad_norm": 0.03723525255918503, "learning_rate": 0.01, "loss": 1.9486, "step": 63948 }, { "epoch": 6.567159581022797, "grad_norm": 0.05147719010710716, "learning_rate": 0.01, "loss": 1.9737, "step": 63951 }, { "epoch": 6.567467652495379, "grad_norm": 0.04248051717877388, "learning_rate": 0.01, "loss": 1.9759, "step": 63954 }, { "epoch": 6.56777572396796, "grad_norm": 0.054685961455106735, "learning_rate": 0.01, "loss": 1.9616, "step": 63957 }, { "epoch": 6.568083795440542, "grad_norm": 0.07126769423484802, "learning_rate": 0.01, "loss": 1.9693, "step": 63960 }, { "epoch": 6.568391866913124, "grad_norm": 0.0660753920674324, "learning_rate": 0.01, "loss": 1.9703, "step": 63963 }, { "epoch": 6.568699938385706, "grad_norm": 0.04418491572141647, "learning_rate": 0.01, "loss": 1.9591, "step": 63966 }, { "epoch": 6.569008009858287, "grad_norm": 0.03849758952856064, "learning_rate": 0.01, "loss": 1.9853, "step": 63969 }, { "epoch": 6.569316081330869, "grad_norm": 0.0360284261405468, "learning_rate": 0.01, "loss": 1.9819, "step": 63972 }, { "epoch": 6.56962415280345, "grad_norm": 0.04783300310373306, "learning_rate": 0.01, "loss": 1.9934, "step": 63975 }, { "epoch": 6.569932224276032, "grad_norm": 0.104099340736866, "learning_rate": 0.01, "loss": 1.9797, "step": 63978 }, { "epoch": 6.570240295748613, "grad_norm": 0.06214939430356026, "learning_rate": 0.01, "loss": 1.9825, "step": 63981 }, { "epoch": 6.570548367221195, "grad_norm": 0.03583189472556114, "learning_rate": 0.01, "loss": 1.962, "step": 63984 }, { "epoch": 6.570856438693777, "grad_norm": 0.03806734457612038, "learning_rate": 0.01, "loss": 1.9445, "step": 63987 }, { "epoch": 6.5711645101663585, "grad_norm": 0.037077397108078, "learning_rate": 0.01, "loss": 1.9754, "step": 63990 }, { "epoch": 6.5714725816389405, "grad_norm": 0.1366073340177536, "learning_rate": 0.01, "loss": 1.9836, "step": 63993 }, { "epoch": 6.571780653111522, "grad_norm": 0.06739377975463867, "learning_rate": 0.01, "loss": 1.946, "step": 63996 }, { "epoch": 6.572088724584104, "grad_norm": 0.041534364223480225, "learning_rate": 0.01, "loss": 1.9853, "step": 63999 }, { "epoch": 6.572396796056685, "grad_norm": 0.04053931310772896, "learning_rate": 0.01, "loss": 1.9929, "step": 64002 }, { "epoch": 6.572704867529267, "grad_norm": 0.04500468447804451, "learning_rate": 0.01, "loss": 1.9702, "step": 64005 }, { "epoch": 6.573012939001848, "grad_norm": 0.07406989485025406, "learning_rate": 0.01, "loss": 1.9843, "step": 64008 }, { "epoch": 6.57332101047443, "grad_norm": 0.05247531831264496, "learning_rate": 0.01, "loss": 1.9658, "step": 64011 }, { "epoch": 6.573629081947011, "grad_norm": 0.117593914270401, "learning_rate": 0.01, "loss": 1.9978, "step": 64014 }, { "epoch": 6.573937153419593, "grad_norm": 0.07242528349161148, "learning_rate": 0.01, "loss": 1.9675, "step": 64017 }, { "epoch": 6.574245224892175, "grad_norm": 0.05359083414077759, "learning_rate": 0.01, "loss": 1.9632, "step": 64020 }, { "epoch": 6.574553296364757, "grad_norm": 0.036466315388679504, "learning_rate": 0.01, "loss": 1.9753, "step": 64023 }, { "epoch": 6.574861367837338, "grad_norm": 0.03925064578652382, "learning_rate": 0.01, "loss": 1.9683, "step": 64026 }, { "epoch": 6.57516943930992, "grad_norm": 0.05542049929499626, "learning_rate": 0.01, "loss": 1.9873, "step": 64029 }, { "epoch": 6.575477510782502, "grad_norm": 0.06237075477838516, "learning_rate": 0.01, "loss": 1.9851, "step": 64032 }, { "epoch": 6.575785582255083, "grad_norm": 0.04179569333791733, "learning_rate": 0.01, "loss": 1.9761, "step": 64035 }, { "epoch": 6.576093653727665, "grad_norm": 0.11507870256900787, "learning_rate": 0.01, "loss": 1.9753, "step": 64038 }, { "epoch": 6.576401725200246, "grad_norm": 0.06069584935903549, "learning_rate": 0.01, "loss": 1.9697, "step": 64041 }, { "epoch": 6.576709796672828, "grad_norm": 0.041366275399923325, "learning_rate": 0.01, "loss": 1.9638, "step": 64044 }, { "epoch": 6.5770178681454095, "grad_norm": 0.06424157321453094, "learning_rate": 0.01, "loss": 1.9826, "step": 64047 }, { "epoch": 6.5773259396179915, "grad_norm": 0.06879021972417831, "learning_rate": 0.01, "loss": 2.0027, "step": 64050 }, { "epoch": 6.577634011090573, "grad_norm": 0.043172743171453476, "learning_rate": 0.01, "loss": 1.9724, "step": 64053 }, { "epoch": 6.577942082563155, "grad_norm": 0.05824809893965721, "learning_rate": 0.01, "loss": 1.9463, "step": 64056 }, { "epoch": 6.578250154035736, "grad_norm": 0.045327652245759964, "learning_rate": 0.01, "loss": 1.9779, "step": 64059 }, { "epoch": 6.578558225508318, "grad_norm": 0.11322353035211563, "learning_rate": 0.01, "loss": 1.981, "step": 64062 }, { "epoch": 6.5788662969809, "grad_norm": 0.06283524632453918, "learning_rate": 0.01, "loss": 1.9633, "step": 64065 }, { "epoch": 6.579174368453481, "grad_norm": 0.06951749324798584, "learning_rate": 0.01, "loss": 1.9673, "step": 64068 }, { "epoch": 6.579482439926063, "grad_norm": 0.1091168001294136, "learning_rate": 0.01, "loss": 1.9652, "step": 64071 }, { "epoch": 6.579790511398644, "grad_norm": 0.0779736191034317, "learning_rate": 0.01, "loss": 1.9903, "step": 64074 }, { "epoch": 6.580098582871226, "grad_norm": 0.06814195215702057, "learning_rate": 0.01, "loss": 1.9717, "step": 64077 }, { "epoch": 6.580406654343808, "grad_norm": 0.06565315276384354, "learning_rate": 0.01, "loss": 1.9895, "step": 64080 }, { "epoch": 6.58071472581639, "grad_norm": 0.08950956165790558, "learning_rate": 0.01, "loss": 1.9727, "step": 64083 }, { "epoch": 6.581022797288971, "grad_norm": 0.06202126666903496, "learning_rate": 0.01, "loss": 1.992, "step": 64086 }, { "epoch": 6.581330868761553, "grad_norm": 0.10947906970977783, "learning_rate": 0.01, "loss": 1.988, "step": 64089 }, { "epoch": 6.581638940234134, "grad_norm": 0.051477983593940735, "learning_rate": 0.01, "loss": 1.9994, "step": 64092 }, { "epoch": 6.581947011706716, "grad_norm": 0.05183662101626396, "learning_rate": 0.01, "loss": 1.9831, "step": 64095 }, { "epoch": 6.582255083179298, "grad_norm": 0.04651165008544922, "learning_rate": 0.01, "loss": 1.9713, "step": 64098 }, { "epoch": 6.582563154651879, "grad_norm": 0.035913266241550446, "learning_rate": 0.01, "loss": 1.96, "step": 64101 }, { "epoch": 6.58287122612446, "grad_norm": 0.0991610661149025, "learning_rate": 0.01, "loss": 2.0085, "step": 64104 }, { "epoch": 6.5831792975970425, "grad_norm": 0.04391265660524368, "learning_rate": 0.01, "loss": 1.9765, "step": 64107 }, { "epoch": 6.5834873690696245, "grad_norm": 0.14331793785095215, "learning_rate": 0.01, "loss": 2.026, "step": 64110 }, { "epoch": 6.583795440542206, "grad_norm": 0.10682855546474457, "learning_rate": 0.01, "loss": 1.965, "step": 64113 }, { "epoch": 6.584103512014788, "grad_norm": 0.07142464816570282, "learning_rate": 0.01, "loss": 1.9844, "step": 64116 }, { "epoch": 6.584411583487369, "grad_norm": 0.05298778787255287, "learning_rate": 0.01, "loss": 1.9831, "step": 64119 }, { "epoch": 6.584719654959951, "grad_norm": 0.10031582415103912, "learning_rate": 0.01, "loss": 1.9498, "step": 64122 }, { "epoch": 6.585027726432532, "grad_norm": 0.03978649899363518, "learning_rate": 0.01, "loss": 1.9734, "step": 64125 }, { "epoch": 6.585335797905114, "grad_norm": 0.05913195759057999, "learning_rate": 0.01, "loss": 1.9689, "step": 64128 }, { "epoch": 6.585643869377695, "grad_norm": 0.03559190407395363, "learning_rate": 0.01, "loss": 1.9879, "step": 64131 }, { "epoch": 6.585951940850277, "grad_norm": 0.09079554677009583, "learning_rate": 0.01, "loss": 1.9875, "step": 64134 }, { "epoch": 6.5862600123228585, "grad_norm": 0.04525060951709747, "learning_rate": 0.01, "loss": 1.9758, "step": 64137 }, { "epoch": 6.586568083795441, "grad_norm": 0.03372851014137268, "learning_rate": 0.01, "loss": 1.983, "step": 64140 }, { "epoch": 6.586876155268023, "grad_norm": 0.08057595044374466, "learning_rate": 0.01, "loss": 1.9687, "step": 64143 }, { "epoch": 6.587184226740604, "grad_norm": 0.09445854276418686, "learning_rate": 0.01, "loss": 1.9685, "step": 64146 }, { "epoch": 6.587492298213186, "grad_norm": 0.07656941562891006, "learning_rate": 0.01, "loss": 1.987, "step": 64149 }, { "epoch": 6.587800369685767, "grad_norm": 0.08286568522453308, "learning_rate": 0.01, "loss": 1.9887, "step": 64152 }, { "epoch": 6.588108441158349, "grad_norm": 0.11229046434164047, "learning_rate": 0.01, "loss": 1.9904, "step": 64155 }, { "epoch": 6.58841651263093, "grad_norm": 0.06434221565723419, "learning_rate": 0.01, "loss": 1.9891, "step": 64158 }, { "epoch": 6.588724584103512, "grad_norm": 0.08923232555389404, "learning_rate": 0.01, "loss": 1.9935, "step": 64161 }, { "epoch": 6.589032655576093, "grad_norm": 0.07555590569972992, "learning_rate": 0.01, "loss": 1.9707, "step": 64164 }, { "epoch": 6.5893407270486755, "grad_norm": 0.05860813334584236, "learning_rate": 0.01, "loss": 1.982, "step": 64167 }, { "epoch": 6.589648798521257, "grad_norm": 0.03441310673952103, "learning_rate": 0.01, "loss": 1.9682, "step": 64170 }, { "epoch": 6.589956869993839, "grad_norm": 0.0361168198287487, "learning_rate": 0.01, "loss": 1.9671, "step": 64173 }, { "epoch": 6.59026494146642, "grad_norm": 0.04222318157553673, "learning_rate": 0.01, "loss": 1.9749, "step": 64176 }, { "epoch": 6.590573012939002, "grad_norm": 0.12981876730918884, "learning_rate": 0.01, "loss": 1.9699, "step": 64179 }, { "epoch": 6.590881084411583, "grad_norm": 0.05409705266356468, "learning_rate": 0.01, "loss": 1.9807, "step": 64182 }, { "epoch": 6.591189155884165, "grad_norm": 0.053002189844846725, "learning_rate": 0.01, "loss": 1.9874, "step": 64185 }, { "epoch": 6.591497227356747, "grad_norm": 0.061368245631456375, "learning_rate": 0.01, "loss": 1.9612, "step": 64188 }, { "epoch": 6.591805298829328, "grad_norm": 0.10517262667417526, "learning_rate": 0.01, "loss": 1.973, "step": 64191 }, { "epoch": 6.59211337030191, "grad_norm": 0.06446947157382965, "learning_rate": 0.01, "loss": 1.9956, "step": 64194 }, { "epoch": 6.5924214417744915, "grad_norm": 0.06423608213663101, "learning_rate": 0.01, "loss": 1.9959, "step": 64197 }, { "epoch": 6.592729513247074, "grad_norm": 0.034221209585666656, "learning_rate": 0.01, "loss": 1.962, "step": 64200 }, { "epoch": 6.593037584719655, "grad_norm": 0.12078302353620529, "learning_rate": 0.01, "loss": 1.9731, "step": 64203 }, { "epoch": 6.593345656192237, "grad_norm": 0.08189702033996582, "learning_rate": 0.01, "loss": 1.9648, "step": 64206 }, { "epoch": 6.593653727664818, "grad_norm": 0.0505688339471817, "learning_rate": 0.01, "loss": 1.9889, "step": 64209 }, { "epoch": 6.5939617991374, "grad_norm": 0.05289214104413986, "learning_rate": 0.01, "loss": 1.9856, "step": 64212 }, { "epoch": 6.594269870609981, "grad_norm": 0.0513903982937336, "learning_rate": 0.01, "loss": 2.0154, "step": 64215 }, { "epoch": 6.594577942082563, "grad_norm": 0.06634233146905899, "learning_rate": 0.01, "loss": 1.964, "step": 64218 }, { "epoch": 6.594886013555145, "grad_norm": 0.0847700983285904, "learning_rate": 0.01, "loss": 1.9755, "step": 64221 }, { "epoch": 6.595194085027726, "grad_norm": 0.08042684942483902, "learning_rate": 0.01, "loss": 1.991, "step": 64224 }, { "epoch": 6.595502156500308, "grad_norm": 0.046448756009340286, "learning_rate": 0.01, "loss": 1.9801, "step": 64227 }, { "epoch": 6.59581022797289, "grad_norm": 0.09152430295944214, "learning_rate": 0.01, "loss": 1.997, "step": 64230 }, { "epoch": 6.596118299445472, "grad_norm": 0.05537572130560875, "learning_rate": 0.01, "loss": 2.0014, "step": 64233 }, { "epoch": 6.596426370918053, "grad_norm": 0.040232930332422256, "learning_rate": 0.01, "loss": 1.9843, "step": 64236 }, { "epoch": 6.596734442390635, "grad_norm": 0.040378376841545105, "learning_rate": 0.01, "loss": 1.9749, "step": 64239 }, { "epoch": 6.597042513863216, "grad_norm": 0.036219045519828796, "learning_rate": 0.01, "loss": 1.9633, "step": 64242 }, { "epoch": 6.597350585335798, "grad_norm": 0.0671059712767601, "learning_rate": 0.01, "loss": 1.9738, "step": 64245 }, { "epoch": 6.597658656808379, "grad_norm": 0.05195783078670502, "learning_rate": 0.01, "loss": 1.9798, "step": 64248 }, { "epoch": 6.597966728280961, "grad_norm": 0.05707523226737976, "learning_rate": 0.01, "loss": 1.9654, "step": 64251 }, { "epoch": 6.5982747997535425, "grad_norm": 0.10035475343465805, "learning_rate": 0.01, "loss": 2.0062, "step": 64254 }, { "epoch": 6.5985828712261245, "grad_norm": 0.08237627893686295, "learning_rate": 0.01, "loss": 1.9667, "step": 64257 }, { "epoch": 6.598890942698706, "grad_norm": 0.06940212100744247, "learning_rate": 0.01, "loss": 1.9815, "step": 64260 }, { "epoch": 6.599199014171288, "grad_norm": 0.08880186080932617, "learning_rate": 0.01, "loss": 1.9756, "step": 64263 }, { "epoch": 6.59950708564387, "grad_norm": 0.07859902083873749, "learning_rate": 0.01, "loss": 1.9807, "step": 64266 }, { "epoch": 6.599815157116451, "grad_norm": 0.10869091004133224, "learning_rate": 0.01, "loss": 1.9805, "step": 64269 }, { "epoch": 6.600123228589033, "grad_norm": 0.09375559538602829, "learning_rate": 0.01, "loss": 1.9978, "step": 64272 }, { "epoch": 6.600431300061614, "grad_norm": 0.047256410121917725, "learning_rate": 0.01, "loss": 2.0044, "step": 64275 }, { "epoch": 6.600739371534196, "grad_norm": 0.04523398354649544, "learning_rate": 0.01, "loss": 1.9741, "step": 64278 }, { "epoch": 6.601047443006777, "grad_norm": 0.03981922194361687, "learning_rate": 0.01, "loss": 1.9791, "step": 64281 }, { "epoch": 6.601355514479359, "grad_norm": 0.05818277969956398, "learning_rate": 0.01, "loss": 2.002, "step": 64284 }, { "epoch": 6.601663585951941, "grad_norm": 0.04212537035346031, "learning_rate": 0.01, "loss": 1.9815, "step": 64287 }, { "epoch": 6.601971657424523, "grad_norm": 0.033235009759664536, "learning_rate": 0.01, "loss": 1.9896, "step": 64290 }, { "epoch": 6.602279728897104, "grad_norm": 0.0338917002081871, "learning_rate": 0.01, "loss": 1.9502, "step": 64293 }, { "epoch": 6.602587800369686, "grad_norm": 0.07970410585403442, "learning_rate": 0.01, "loss": 1.9692, "step": 64296 }, { "epoch": 6.602895871842268, "grad_norm": 0.05640435963869095, "learning_rate": 0.01, "loss": 2.0047, "step": 64299 }, { "epoch": 6.603203943314849, "grad_norm": 0.03606804460287094, "learning_rate": 0.01, "loss": 1.98, "step": 64302 }, { "epoch": 6.60351201478743, "grad_norm": 0.04679587855935097, "learning_rate": 0.01, "loss": 2.0004, "step": 64305 }, { "epoch": 6.603820086260012, "grad_norm": 0.10526128858327866, "learning_rate": 0.01, "loss": 1.9714, "step": 64308 }, { "epoch": 6.604128157732594, "grad_norm": 0.1200772076845169, "learning_rate": 0.01, "loss": 1.974, "step": 64311 }, { "epoch": 6.6044362292051755, "grad_norm": 0.1391524225473404, "learning_rate": 0.01, "loss": 1.965, "step": 64314 }, { "epoch": 6.6047443006777575, "grad_norm": 0.08838554471731186, "learning_rate": 0.01, "loss": 1.9816, "step": 64317 }, { "epoch": 6.605052372150339, "grad_norm": 0.07253210991621017, "learning_rate": 0.01, "loss": 2.0028, "step": 64320 }, { "epoch": 6.605360443622921, "grad_norm": 0.04417060315608978, "learning_rate": 0.01, "loss": 1.9559, "step": 64323 }, { "epoch": 6.605668515095502, "grad_norm": 0.04443688318133354, "learning_rate": 0.01, "loss": 1.9699, "step": 64326 }, { "epoch": 6.605976586568084, "grad_norm": 0.05711906775832176, "learning_rate": 0.01, "loss": 1.9709, "step": 64329 }, { "epoch": 6.606284658040665, "grad_norm": 0.05172707885503769, "learning_rate": 0.01, "loss": 1.9804, "step": 64332 }, { "epoch": 6.606592729513247, "grad_norm": 0.08967714011669159, "learning_rate": 0.01, "loss": 1.9843, "step": 64335 }, { "epoch": 6.606900800985828, "grad_norm": 0.03300314024090767, "learning_rate": 0.01, "loss": 1.9868, "step": 64338 }, { "epoch": 6.60720887245841, "grad_norm": 0.052630916237831116, "learning_rate": 0.01, "loss": 1.9787, "step": 64341 }, { "epoch": 6.607516943930992, "grad_norm": 0.041368499398231506, "learning_rate": 0.01, "loss": 1.9676, "step": 64344 }, { "epoch": 6.607825015403574, "grad_norm": 0.04549229145050049, "learning_rate": 0.01, "loss": 1.972, "step": 64347 }, { "epoch": 6.608133086876156, "grad_norm": 0.057475391775369644, "learning_rate": 0.01, "loss": 1.977, "step": 64350 }, { "epoch": 6.608441158348737, "grad_norm": 0.056770164519548416, "learning_rate": 0.01, "loss": 1.9915, "step": 64353 }, { "epoch": 6.608749229821319, "grad_norm": 0.12769392132759094, "learning_rate": 0.01, "loss": 1.9643, "step": 64356 }, { "epoch": 6.6090573012939, "grad_norm": 0.05759736895561218, "learning_rate": 0.01, "loss": 1.9962, "step": 64359 }, { "epoch": 6.609365372766482, "grad_norm": 0.10004235059022903, "learning_rate": 0.01, "loss": 1.9524, "step": 64362 }, { "epoch": 6.609673444239063, "grad_norm": 0.0365755669772625, "learning_rate": 0.01, "loss": 1.9676, "step": 64365 }, { "epoch": 6.609981515711645, "grad_norm": 0.07999947667121887, "learning_rate": 0.01, "loss": 1.9623, "step": 64368 }, { "epoch": 6.610289587184226, "grad_norm": 0.055819109082221985, "learning_rate": 0.01, "loss": 1.9727, "step": 64371 }, { "epoch": 6.6105976586568085, "grad_norm": 0.046389635652303696, "learning_rate": 0.01, "loss": 1.9857, "step": 64374 }, { "epoch": 6.61090573012939, "grad_norm": 0.04378519952297211, "learning_rate": 0.01, "loss": 1.9706, "step": 64377 }, { "epoch": 6.611213801601972, "grad_norm": 0.04519166797399521, "learning_rate": 0.01, "loss": 1.9777, "step": 64380 }, { "epoch": 6.611521873074553, "grad_norm": 0.037725985050201416, "learning_rate": 0.01, "loss": 1.9827, "step": 64383 }, { "epoch": 6.611829944547135, "grad_norm": 0.03793521970510483, "learning_rate": 0.01, "loss": 1.9545, "step": 64386 }, { "epoch": 6.612138016019717, "grad_norm": 0.04742049798369408, "learning_rate": 0.01, "loss": 1.9686, "step": 64389 }, { "epoch": 6.612446087492298, "grad_norm": 0.059588026255369186, "learning_rate": 0.01, "loss": 1.976, "step": 64392 }, { "epoch": 6.61275415896488, "grad_norm": 0.06134435534477234, "learning_rate": 0.01, "loss": 1.9881, "step": 64395 }, { "epoch": 6.613062230437461, "grad_norm": 0.11348873376846313, "learning_rate": 0.01, "loss": 1.9899, "step": 64398 }, { "epoch": 6.613370301910043, "grad_norm": 0.03893901780247688, "learning_rate": 0.01, "loss": 1.9708, "step": 64401 }, { "epoch": 6.6136783733826245, "grad_norm": 0.11874943226575851, "learning_rate": 0.01, "loss": 1.9656, "step": 64404 }, { "epoch": 6.613986444855207, "grad_norm": 0.08608514815568924, "learning_rate": 0.01, "loss": 1.9592, "step": 64407 }, { "epoch": 6.614294516327788, "grad_norm": 0.04932364821434021, "learning_rate": 0.01, "loss": 1.9503, "step": 64410 }, { "epoch": 6.61460258780037, "grad_norm": 0.04308156296610832, "learning_rate": 0.01, "loss": 1.9613, "step": 64413 }, { "epoch": 6.614910659272951, "grad_norm": 0.050084516406059265, "learning_rate": 0.01, "loss": 1.9996, "step": 64416 }, { "epoch": 6.615218730745533, "grad_norm": 0.054838646203279495, "learning_rate": 0.01, "loss": 1.9968, "step": 64419 }, { "epoch": 6.615526802218115, "grad_norm": 0.08248993754386902, "learning_rate": 0.01, "loss": 1.964, "step": 64422 }, { "epoch": 6.615834873690696, "grad_norm": 0.09142714738845825, "learning_rate": 0.01, "loss": 1.9613, "step": 64425 }, { "epoch": 6.616142945163277, "grad_norm": 0.07664018124341965, "learning_rate": 0.01, "loss": 1.9591, "step": 64428 }, { "epoch": 6.616451016635859, "grad_norm": 0.10052567720413208, "learning_rate": 0.01, "loss": 1.9717, "step": 64431 }, { "epoch": 6.6167590881084415, "grad_norm": 0.04676009342074394, "learning_rate": 0.01, "loss": 1.9909, "step": 64434 }, { "epoch": 6.617067159581023, "grad_norm": 0.05421235412359238, "learning_rate": 0.01, "loss": 1.9797, "step": 64437 }, { "epoch": 6.617375231053605, "grad_norm": 0.05721297487616539, "learning_rate": 0.01, "loss": 1.9847, "step": 64440 }, { "epoch": 6.617683302526186, "grad_norm": 0.05209130793809891, "learning_rate": 0.01, "loss": 2.0027, "step": 64443 }, { "epoch": 6.617991373998768, "grad_norm": 0.032787423580884933, "learning_rate": 0.01, "loss": 1.979, "step": 64446 }, { "epoch": 6.618299445471349, "grad_norm": 0.11715228855609894, "learning_rate": 0.01, "loss": 1.9863, "step": 64449 }, { "epoch": 6.618607516943931, "grad_norm": 0.044540733098983765, "learning_rate": 0.01, "loss": 1.9501, "step": 64452 }, { "epoch": 6.618915588416512, "grad_norm": 0.06670498847961426, "learning_rate": 0.01, "loss": 1.9556, "step": 64455 }, { "epoch": 6.619223659889094, "grad_norm": 0.09549476206302643, "learning_rate": 0.01, "loss": 1.9744, "step": 64458 }, { "epoch": 6.6195317313616755, "grad_norm": 0.06052359193563461, "learning_rate": 0.01, "loss": 1.9866, "step": 64461 }, { "epoch": 6.6198398028342575, "grad_norm": 0.04301074892282486, "learning_rate": 0.01, "loss": 1.9816, "step": 64464 }, { "epoch": 6.62014787430684, "grad_norm": 0.06434419006109238, "learning_rate": 0.01, "loss": 1.9689, "step": 64467 }, { "epoch": 6.620455945779421, "grad_norm": 0.051222655922174454, "learning_rate": 0.01, "loss": 1.9613, "step": 64470 }, { "epoch": 6.620764017252003, "grad_norm": 0.06115943565964699, "learning_rate": 0.01, "loss": 1.9729, "step": 64473 }, { "epoch": 6.621072088724584, "grad_norm": 0.07994543761014938, "learning_rate": 0.01, "loss": 1.9754, "step": 64476 }, { "epoch": 6.621380160197166, "grad_norm": 0.06953758746385574, "learning_rate": 0.01, "loss": 1.9682, "step": 64479 }, { "epoch": 6.621688231669747, "grad_norm": 0.12298289686441422, "learning_rate": 0.01, "loss": 1.9851, "step": 64482 }, { "epoch": 6.621996303142329, "grad_norm": 0.05092281848192215, "learning_rate": 0.01, "loss": 1.9941, "step": 64485 }, { "epoch": 6.62230437461491, "grad_norm": 0.043478354811668396, "learning_rate": 0.01, "loss": 1.9795, "step": 64488 }, { "epoch": 6.622612446087492, "grad_norm": 0.043610502034425735, "learning_rate": 0.01, "loss": 1.9741, "step": 64491 }, { "epoch": 6.622920517560074, "grad_norm": 0.052348487079143524, "learning_rate": 0.01, "loss": 2.0082, "step": 64494 }, { "epoch": 6.623228589032656, "grad_norm": 0.05436374992132187, "learning_rate": 0.01, "loss": 1.9554, "step": 64497 }, { "epoch": 6.623536660505238, "grad_norm": 0.057916752994060516, "learning_rate": 0.01, "loss": 1.9778, "step": 64500 }, { "epoch": 6.623844731977819, "grad_norm": 0.047713443636894226, "learning_rate": 0.01, "loss": 1.9707, "step": 64503 }, { "epoch": 6.6241528034504, "grad_norm": 0.03382120653986931, "learning_rate": 0.01, "loss": 1.9799, "step": 64506 }, { "epoch": 6.624460874922982, "grad_norm": 0.0990116074681282, "learning_rate": 0.01, "loss": 1.9705, "step": 64509 }, { "epoch": 6.624768946395564, "grad_norm": 0.0787537470459938, "learning_rate": 0.01, "loss": 1.988, "step": 64512 }, { "epoch": 6.625077017868145, "grad_norm": 0.07667285948991776, "learning_rate": 0.01, "loss": 1.9846, "step": 64515 }, { "epoch": 6.625385089340727, "grad_norm": 0.04837285354733467, "learning_rate": 0.01, "loss": 1.9926, "step": 64518 }, { "epoch": 6.6256931608133085, "grad_norm": 0.04143200442194939, "learning_rate": 0.01, "loss": 2.0021, "step": 64521 }, { "epoch": 6.6260012322858906, "grad_norm": 0.033114057034254074, "learning_rate": 0.01, "loss": 1.9956, "step": 64524 }, { "epoch": 6.626309303758472, "grad_norm": 0.0412205308675766, "learning_rate": 0.01, "loss": 1.9918, "step": 64527 }, { "epoch": 6.626617375231054, "grad_norm": 0.10429148375988007, "learning_rate": 0.01, "loss": 1.9965, "step": 64530 }, { "epoch": 6.626925446703635, "grad_norm": 0.05905142053961754, "learning_rate": 0.01, "loss": 1.9826, "step": 64533 }, { "epoch": 6.627233518176217, "grad_norm": 0.05937071144580841, "learning_rate": 0.01, "loss": 1.9721, "step": 64536 }, { "epoch": 6.627541589648798, "grad_norm": 0.10505139082670212, "learning_rate": 0.01, "loss": 1.9879, "step": 64539 }, { "epoch": 6.62784966112138, "grad_norm": 0.08412894606590271, "learning_rate": 0.01, "loss": 1.9716, "step": 64542 }, { "epoch": 6.628157732593962, "grad_norm": 0.07351752370595932, "learning_rate": 0.01, "loss": 1.9741, "step": 64545 }, { "epoch": 6.628465804066543, "grad_norm": 0.07745961844921112, "learning_rate": 0.01, "loss": 1.9912, "step": 64548 }, { "epoch": 6.6287738755391254, "grad_norm": 0.07289955765008926, "learning_rate": 0.01, "loss": 1.9546, "step": 64551 }, { "epoch": 6.629081947011707, "grad_norm": 0.04405929520726204, "learning_rate": 0.01, "loss": 1.9712, "step": 64554 }, { "epoch": 6.629390018484289, "grad_norm": 0.0912909060716629, "learning_rate": 0.01, "loss": 1.9768, "step": 64557 }, { "epoch": 6.62969808995687, "grad_norm": 0.042689189314842224, "learning_rate": 0.01, "loss": 1.9859, "step": 64560 }, { "epoch": 6.630006161429452, "grad_norm": 0.14025220274925232, "learning_rate": 0.01, "loss": 1.9644, "step": 64563 }, { "epoch": 6.630314232902033, "grad_norm": 0.1245136708021164, "learning_rate": 0.01, "loss": 1.9891, "step": 64566 }, { "epoch": 6.630622304374615, "grad_norm": 0.09421566873788834, "learning_rate": 0.01, "loss": 1.9583, "step": 64569 }, { "epoch": 6.630930375847196, "grad_norm": 0.0803411453962326, "learning_rate": 0.01, "loss": 1.9709, "step": 64572 }, { "epoch": 6.631238447319778, "grad_norm": 0.06703267991542816, "learning_rate": 0.01, "loss": 1.9602, "step": 64575 }, { "epoch": 6.6315465187923595, "grad_norm": 0.1008155420422554, "learning_rate": 0.01, "loss": 1.9158, "step": 64578 }, { "epoch": 6.6318545902649415, "grad_norm": 0.08870470523834229, "learning_rate": 0.01, "loss": 1.9768, "step": 64581 }, { "epoch": 6.632162661737523, "grad_norm": 0.06180182099342346, "learning_rate": 0.01, "loss": 1.992, "step": 64584 }, { "epoch": 6.632470733210105, "grad_norm": 0.06106216087937355, "learning_rate": 0.01, "loss": 1.9888, "step": 64587 }, { "epoch": 6.632778804682687, "grad_norm": 0.05340215936303139, "learning_rate": 0.01, "loss": 1.9667, "step": 64590 }, { "epoch": 6.633086876155268, "grad_norm": 0.04080619663000107, "learning_rate": 0.01, "loss": 1.97, "step": 64593 }, { "epoch": 6.63339494762785, "grad_norm": 0.031813837587833405, "learning_rate": 0.01, "loss": 1.9728, "step": 64596 }, { "epoch": 6.633703019100431, "grad_norm": 0.0589885450899601, "learning_rate": 0.01, "loss": 1.9692, "step": 64599 }, { "epoch": 6.634011090573013, "grad_norm": 0.14397123456001282, "learning_rate": 0.01, "loss": 1.9954, "step": 64602 }, { "epoch": 6.634319162045594, "grad_norm": 0.058227211236953735, "learning_rate": 0.01, "loss": 1.9922, "step": 64605 }, { "epoch": 6.634627233518176, "grad_norm": 0.05160403624176979, "learning_rate": 0.01, "loss": 1.953, "step": 64608 }, { "epoch": 6.634935304990758, "grad_norm": 0.052490971982479095, "learning_rate": 0.01, "loss": 1.9786, "step": 64611 }, { "epoch": 6.63524337646334, "grad_norm": 0.06203007325530052, "learning_rate": 0.01, "loss": 1.991, "step": 64614 }, { "epoch": 6.635551447935921, "grad_norm": 0.06790245324373245, "learning_rate": 0.01, "loss": 1.9649, "step": 64617 }, { "epoch": 6.635859519408503, "grad_norm": 0.0575333833694458, "learning_rate": 0.01, "loss": 1.9963, "step": 64620 }, { "epoch": 6.636167590881085, "grad_norm": 0.05253339931368828, "learning_rate": 0.01, "loss": 1.9631, "step": 64623 }, { "epoch": 6.636475662353666, "grad_norm": 0.0605839341878891, "learning_rate": 0.01, "loss": 1.9648, "step": 64626 }, { "epoch": 6.636783733826247, "grad_norm": 0.046164620667696, "learning_rate": 0.01, "loss": 1.9797, "step": 64629 }, { "epoch": 6.637091805298829, "grad_norm": 0.04549845680594444, "learning_rate": 0.01, "loss": 1.9647, "step": 64632 }, { "epoch": 6.637399876771411, "grad_norm": 0.04419662430882454, "learning_rate": 0.01, "loss": 1.9575, "step": 64635 }, { "epoch": 6.6377079482439925, "grad_norm": 0.04874144867062569, "learning_rate": 0.01, "loss": 1.9874, "step": 64638 }, { "epoch": 6.6380160197165745, "grad_norm": 0.06364091485738754, "learning_rate": 0.01, "loss": 1.9775, "step": 64641 }, { "epoch": 6.638324091189156, "grad_norm": 0.11188903450965881, "learning_rate": 0.01, "loss": 1.9876, "step": 64644 }, { "epoch": 6.638632162661738, "grad_norm": 0.03762582689523697, "learning_rate": 0.01, "loss": 1.97, "step": 64647 }, { "epoch": 6.638940234134319, "grad_norm": 0.03571620211005211, "learning_rate": 0.01, "loss": 1.975, "step": 64650 }, { "epoch": 6.639248305606901, "grad_norm": 0.05100245773792267, "learning_rate": 0.01, "loss": 1.9701, "step": 64653 }, { "epoch": 6.639556377079482, "grad_norm": 0.0773586705327034, "learning_rate": 0.01, "loss": 1.9905, "step": 64656 }, { "epoch": 6.639864448552064, "grad_norm": 0.056971583515405655, "learning_rate": 0.01, "loss": 1.9654, "step": 64659 }, { "epoch": 6.640172520024645, "grad_norm": 0.04947191849350929, "learning_rate": 0.01, "loss": 1.9586, "step": 64662 }, { "epoch": 6.640480591497227, "grad_norm": 0.039240311831235886, "learning_rate": 0.01, "loss": 2.0069, "step": 64665 }, { "epoch": 6.640788662969809, "grad_norm": 0.03866692632436752, "learning_rate": 0.01, "loss": 1.9856, "step": 64668 }, { "epoch": 6.641096734442391, "grad_norm": 0.05008673667907715, "learning_rate": 0.01, "loss": 1.9802, "step": 64671 }, { "epoch": 6.641404805914973, "grad_norm": 0.12828943133354187, "learning_rate": 0.01, "loss": 1.9751, "step": 64674 }, { "epoch": 6.641712877387554, "grad_norm": 0.04267043247818947, "learning_rate": 0.01, "loss": 1.9932, "step": 64677 }, { "epoch": 6.642020948860136, "grad_norm": 0.09645269066095352, "learning_rate": 0.01, "loss": 1.9822, "step": 64680 }, { "epoch": 6.642329020332717, "grad_norm": 0.053970299661159515, "learning_rate": 0.01, "loss": 1.9629, "step": 64683 }, { "epoch": 6.642637091805299, "grad_norm": 0.15396493673324585, "learning_rate": 0.01, "loss": 1.9827, "step": 64686 }, { "epoch": 6.64294516327788, "grad_norm": 0.06772658973932266, "learning_rate": 0.01, "loss": 1.9895, "step": 64689 }, { "epoch": 6.643253234750462, "grad_norm": 0.05688326433300972, "learning_rate": 0.01, "loss": 1.9633, "step": 64692 }, { "epoch": 6.643561306223043, "grad_norm": 0.05814405530691147, "learning_rate": 0.01, "loss": 1.9805, "step": 64695 }, { "epoch": 6.6438693776956255, "grad_norm": 0.033603183925151825, "learning_rate": 0.01, "loss": 1.9562, "step": 64698 }, { "epoch": 6.6441774491682075, "grad_norm": 0.06284506618976593, "learning_rate": 0.01, "loss": 2.0001, "step": 64701 }, { "epoch": 6.644485520640789, "grad_norm": 0.03463011607527733, "learning_rate": 0.01, "loss": 1.9749, "step": 64704 }, { "epoch": 6.64479359211337, "grad_norm": 0.039407879114151, "learning_rate": 0.01, "loss": 1.9792, "step": 64707 }, { "epoch": 6.645101663585952, "grad_norm": 0.034094471484422684, "learning_rate": 0.01, "loss": 1.9753, "step": 64710 }, { "epoch": 6.645409735058534, "grad_norm": 0.15402834117412567, "learning_rate": 0.01, "loss": 1.9939, "step": 64713 }, { "epoch": 6.645717806531115, "grad_norm": 0.055694807320833206, "learning_rate": 0.01, "loss": 1.9946, "step": 64716 }, { "epoch": 6.646025878003697, "grad_norm": 0.07708931714296341, "learning_rate": 0.01, "loss": 1.9773, "step": 64719 }, { "epoch": 6.646333949476278, "grad_norm": 0.07107258588075638, "learning_rate": 0.01, "loss": 1.9692, "step": 64722 }, { "epoch": 6.64664202094886, "grad_norm": 0.06758569926023483, "learning_rate": 0.01, "loss": 1.9544, "step": 64725 }, { "epoch": 6.6469500924214415, "grad_norm": 0.07093843817710876, "learning_rate": 0.01, "loss": 1.9627, "step": 64728 }, { "epoch": 6.647258163894024, "grad_norm": 0.06615183502435684, "learning_rate": 0.01, "loss": 1.9833, "step": 64731 }, { "epoch": 6.647566235366605, "grad_norm": 0.06096789985895157, "learning_rate": 0.01, "loss": 1.9968, "step": 64734 }, { "epoch": 6.647874306839187, "grad_norm": 0.03493250906467438, "learning_rate": 0.01, "loss": 1.9776, "step": 64737 }, { "epoch": 6.648182378311768, "grad_norm": 0.058653466403484344, "learning_rate": 0.01, "loss": 1.9707, "step": 64740 }, { "epoch": 6.64849044978435, "grad_norm": 0.1340709775686264, "learning_rate": 0.01, "loss": 1.9796, "step": 64743 }, { "epoch": 6.648798521256932, "grad_norm": 0.0948764905333519, "learning_rate": 0.01, "loss": 1.9954, "step": 64746 }, { "epoch": 6.649106592729513, "grad_norm": 0.03758051246404648, "learning_rate": 0.01, "loss": 1.9835, "step": 64749 }, { "epoch": 6.649414664202095, "grad_norm": 0.05022870749235153, "learning_rate": 0.01, "loss": 2.0035, "step": 64752 }, { "epoch": 6.649722735674676, "grad_norm": 0.04376206919550896, "learning_rate": 0.01, "loss": 1.9686, "step": 64755 }, { "epoch": 6.6500308071472585, "grad_norm": 0.05255085602402687, "learning_rate": 0.01, "loss": 1.9832, "step": 64758 }, { "epoch": 6.65033887861984, "grad_norm": 0.04554371163249016, "learning_rate": 0.01, "loss": 1.9882, "step": 64761 }, { "epoch": 6.650646950092422, "grad_norm": 0.046166516840457916, "learning_rate": 0.01, "loss": 1.9969, "step": 64764 }, { "epoch": 6.650955021565003, "grad_norm": 0.06942164152860641, "learning_rate": 0.01, "loss": 1.977, "step": 64767 }, { "epoch": 6.651263093037585, "grad_norm": 0.07894229888916016, "learning_rate": 0.01, "loss": 1.9941, "step": 64770 }, { "epoch": 6.651571164510166, "grad_norm": 0.10424064844846725, "learning_rate": 0.01, "loss": 1.9659, "step": 64773 }, { "epoch": 6.651879235982748, "grad_norm": 0.1305040717124939, "learning_rate": 0.01, "loss": 1.9598, "step": 64776 }, { "epoch": 6.652187307455329, "grad_norm": 0.0960015282034874, "learning_rate": 0.01, "loss": 1.9668, "step": 64779 }, { "epoch": 6.652495378927911, "grad_norm": 0.051948267966508865, "learning_rate": 0.01, "loss": 1.9784, "step": 64782 }, { "epoch": 6.6528034504004925, "grad_norm": 0.049625445157289505, "learning_rate": 0.01, "loss": 1.974, "step": 64785 }, { "epoch": 6.6531115218730745, "grad_norm": 0.03437644988298416, "learning_rate": 0.01, "loss": 1.9774, "step": 64788 }, { "epoch": 6.653419593345657, "grad_norm": 0.04177222028374672, "learning_rate": 0.01, "loss": 2.0032, "step": 64791 }, { "epoch": 6.653727664818238, "grad_norm": 0.12556210160255432, "learning_rate": 0.01, "loss": 1.9663, "step": 64794 }, { "epoch": 6.65403573629082, "grad_norm": 0.06504391878843307, "learning_rate": 0.01, "loss": 1.9859, "step": 64797 }, { "epoch": 6.654343807763401, "grad_norm": 0.0797589123249054, "learning_rate": 0.01, "loss": 2.0007, "step": 64800 }, { "epoch": 6.654651879235983, "grad_norm": 0.05979294329881668, "learning_rate": 0.01, "loss": 1.9808, "step": 64803 }, { "epoch": 6.654959950708564, "grad_norm": 0.07763978838920593, "learning_rate": 0.01, "loss": 1.9639, "step": 64806 }, { "epoch": 6.655268022181146, "grad_norm": 0.06913294643163681, "learning_rate": 0.01, "loss": 1.9651, "step": 64809 }, { "epoch": 6.655576093653727, "grad_norm": 0.0825156420469284, "learning_rate": 0.01, "loss": 1.9491, "step": 64812 }, { "epoch": 6.655884165126309, "grad_norm": 0.04779151454567909, "learning_rate": 0.01, "loss": 1.9637, "step": 64815 }, { "epoch": 6.656192236598891, "grad_norm": 0.09039946645498276, "learning_rate": 0.01, "loss": 1.979, "step": 64818 }, { "epoch": 6.656500308071473, "grad_norm": 0.03368096798658371, "learning_rate": 0.01, "loss": 1.9844, "step": 64821 }, { "epoch": 6.656808379544055, "grad_norm": 0.11677176505327225, "learning_rate": 0.01, "loss": 1.9897, "step": 64824 }, { "epoch": 6.657116451016636, "grad_norm": 0.09988972544670105, "learning_rate": 0.01, "loss": 1.9647, "step": 64827 }, { "epoch": 6.657424522489217, "grad_norm": 0.1377715915441513, "learning_rate": 0.01, "loss": 1.9832, "step": 64830 }, { "epoch": 6.657732593961799, "grad_norm": 0.04986416921019554, "learning_rate": 0.01, "loss": 1.9764, "step": 64833 }, { "epoch": 6.658040665434381, "grad_norm": 0.06681905686855316, "learning_rate": 0.01, "loss": 1.9867, "step": 64836 }, { "epoch": 6.658348736906962, "grad_norm": 0.05175319314002991, "learning_rate": 0.01, "loss": 1.9556, "step": 64839 }, { "epoch": 6.658656808379544, "grad_norm": 0.048531901091337204, "learning_rate": 0.01, "loss": 2.0054, "step": 64842 }, { "epoch": 6.6589648798521255, "grad_norm": 0.06149359792470932, "learning_rate": 0.01, "loss": 1.9804, "step": 64845 }, { "epoch": 6.6592729513247075, "grad_norm": 0.0352664515376091, "learning_rate": 0.01, "loss": 1.9821, "step": 64848 }, { "epoch": 6.659581022797289, "grad_norm": 0.0510052926838398, "learning_rate": 0.01, "loss": 1.9894, "step": 64851 }, { "epoch": 6.659889094269871, "grad_norm": 0.06455834954977036, "learning_rate": 0.01, "loss": 1.9664, "step": 64854 }, { "epoch": 6.660197165742452, "grad_norm": 0.13268320262432098, "learning_rate": 0.01, "loss": 1.9866, "step": 64857 }, { "epoch": 6.660505237215034, "grad_norm": 0.0314185805618763, "learning_rate": 0.01, "loss": 1.9935, "step": 64860 }, { "epoch": 6.660813308687615, "grad_norm": 0.06962891668081284, "learning_rate": 0.01, "loss": 1.9849, "step": 64863 }, { "epoch": 6.661121380160197, "grad_norm": 0.11769289523363113, "learning_rate": 0.01, "loss": 2.0032, "step": 64866 }, { "epoch": 6.661429451632779, "grad_norm": 0.03778669983148575, "learning_rate": 0.01, "loss": 1.9735, "step": 64869 }, { "epoch": 6.66173752310536, "grad_norm": 0.09534257650375366, "learning_rate": 0.01, "loss": 1.9734, "step": 64872 }, { "epoch": 6.662045594577942, "grad_norm": 0.03727418929338455, "learning_rate": 0.01, "loss": 1.9789, "step": 64875 }, { "epoch": 6.662353666050524, "grad_norm": 0.04617612436413765, "learning_rate": 0.01, "loss": 1.978, "step": 64878 }, { "epoch": 6.662661737523106, "grad_norm": 0.043121397495269775, "learning_rate": 0.01, "loss": 1.977, "step": 64881 }, { "epoch": 6.662969808995687, "grad_norm": 0.03990964964032173, "learning_rate": 0.01, "loss": 1.973, "step": 64884 }, { "epoch": 6.663277880468269, "grad_norm": 0.03709634020924568, "learning_rate": 0.01, "loss": 1.9594, "step": 64887 }, { "epoch": 6.66358595194085, "grad_norm": 0.042194269597530365, "learning_rate": 0.01, "loss": 1.9616, "step": 64890 }, { "epoch": 6.663894023413432, "grad_norm": 0.08765631169080734, "learning_rate": 0.01, "loss": 1.9964, "step": 64893 }, { "epoch": 6.664202094886013, "grad_norm": 0.10358686000108719, "learning_rate": 0.01, "loss": 1.9844, "step": 64896 }, { "epoch": 6.664510166358595, "grad_norm": 0.05577368661761284, "learning_rate": 0.01, "loss": 1.9848, "step": 64899 }, { "epoch": 6.664818237831177, "grad_norm": 0.07221733778715134, "learning_rate": 0.01, "loss": 1.9895, "step": 64902 }, { "epoch": 6.6651263093037585, "grad_norm": 0.08421463519334793, "learning_rate": 0.01, "loss": 1.9786, "step": 64905 }, { "epoch": 6.66543438077634, "grad_norm": 0.04022625833749771, "learning_rate": 0.01, "loss": 1.9654, "step": 64908 }, { "epoch": 6.665742452248922, "grad_norm": 0.10746564716100693, "learning_rate": 0.01, "loss": 1.9708, "step": 64911 }, { "epoch": 6.666050523721504, "grad_norm": 0.10090082883834839, "learning_rate": 0.01, "loss": 1.9674, "step": 64914 }, { "epoch": 6.666358595194085, "grad_norm": 0.03953251615166664, "learning_rate": 0.01, "loss": 1.9555, "step": 64917 }, { "epoch": 6.666666666666667, "grad_norm": 0.09518130123615265, "learning_rate": 0.01, "loss": 1.9642, "step": 64920 }, { "epoch": 6.666974738139248, "grad_norm": 0.06792232394218445, "learning_rate": 0.01, "loss": 1.9861, "step": 64923 }, { "epoch": 6.66728280961183, "grad_norm": 0.08679928630590439, "learning_rate": 0.01, "loss": 1.9761, "step": 64926 }, { "epoch": 6.667590881084411, "grad_norm": 0.07284796237945557, "learning_rate": 0.01, "loss": 1.9691, "step": 64929 }, { "epoch": 6.667898952556993, "grad_norm": 0.07962015271186829, "learning_rate": 0.01, "loss": 1.9834, "step": 64932 }, { "epoch": 6.6682070240295745, "grad_norm": 0.04905517399311066, "learning_rate": 0.01, "loss": 1.9889, "step": 64935 }, { "epoch": 6.668515095502157, "grad_norm": 0.1008431613445282, "learning_rate": 0.01, "loss": 1.9707, "step": 64938 }, { "epoch": 6.668823166974738, "grad_norm": 0.07995069026947021, "learning_rate": 0.01, "loss": 1.9803, "step": 64941 }, { "epoch": 6.66913123844732, "grad_norm": 0.08693604916334152, "learning_rate": 0.01, "loss": 1.9604, "step": 64944 }, { "epoch": 6.669439309919902, "grad_norm": 0.07200144976377487, "learning_rate": 0.01, "loss": 1.9782, "step": 64947 }, { "epoch": 6.669747381392483, "grad_norm": 0.10411632061004639, "learning_rate": 0.01, "loss": 1.9813, "step": 64950 }, { "epoch": 6.670055452865065, "grad_norm": 0.03982757031917572, "learning_rate": 0.01, "loss": 2.0043, "step": 64953 }, { "epoch": 6.670363524337646, "grad_norm": 0.03512009605765343, "learning_rate": 0.01, "loss": 1.9712, "step": 64956 }, { "epoch": 6.670671595810228, "grad_norm": 0.056927431374788284, "learning_rate": 0.01, "loss": 1.9744, "step": 64959 }, { "epoch": 6.6709796672828094, "grad_norm": 0.04020826891064644, "learning_rate": 0.01, "loss": 1.9408, "step": 64962 }, { "epoch": 6.6712877387553915, "grad_norm": 0.051895178854465485, "learning_rate": 0.01, "loss": 1.9598, "step": 64965 }, { "epoch": 6.671595810227973, "grad_norm": 0.1494154930114746, "learning_rate": 0.01, "loss": 1.9836, "step": 64968 }, { "epoch": 6.671903881700555, "grad_norm": 0.06913480907678604, "learning_rate": 0.01, "loss": 1.9997, "step": 64971 }, { "epoch": 6.672211953173136, "grad_norm": 0.0441024973988533, "learning_rate": 0.01, "loss": 1.9884, "step": 64974 }, { "epoch": 6.672520024645718, "grad_norm": 0.053763311356306076, "learning_rate": 0.01, "loss": 1.9948, "step": 64977 }, { "epoch": 6.672828096118299, "grad_norm": 0.08884178847074509, "learning_rate": 0.01, "loss": 1.9652, "step": 64980 }, { "epoch": 6.673136167590881, "grad_norm": 0.06425255537033081, "learning_rate": 0.01, "loss": 1.9631, "step": 64983 }, { "epoch": 6.673444239063462, "grad_norm": 0.07111632078886032, "learning_rate": 0.01, "loss": 1.9967, "step": 64986 }, { "epoch": 6.673752310536044, "grad_norm": 0.06380520761013031, "learning_rate": 0.01, "loss": 1.9768, "step": 64989 }, { "epoch": 6.674060382008626, "grad_norm": 0.06785110384225845, "learning_rate": 0.01, "loss": 1.9913, "step": 64992 }, { "epoch": 6.6743684534812076, "grad_norm": 0.13397693634033203, "learning_rate": 0.01, "loss": 1.9836, "step": 64995 }, { "epoch": 6.67467652495379, "grad_norm": 0.06838545948266983, "learning_rate": 0.01, "loss": 2.0009, "step": 64998 }, { "epoch": 6.674984596426371, "grad_norm": 0.05458493158221245, "learning_rate": 0.01, "loss": 2.0025, "step": 65001 }, { "epoch": 6.675292667898953, "grad_norm": 0.04406043142080307, "learning_rate": 0.01, "loss": 1.9596, "step": 65004 }, { "epoch": 6.675600739371534, "grad_norm": 0.03164122626185417, "learning_rate": 0.01, "loss": 1.9683, "step": 65007 }, { "epoch": 6.675908810844116, "grad_norm": 0.03512512892484665, "learning_rate": 0.01, "loss": 1.9773, "step": 65010 }, { "epoch": 6.676216882316697, "grad_norm": 0.03886500746011734, "learning_rate": 0.01, "loss": 1.9656, "step": 65013 }, { "epoch": 6.676524953789279, "grad_norm": 0.04235444217920303, "learning_rate": 0.01, "loss": 1.9672, "step": 65016 }, { "epoch": 6.67683302526186, "grad_norm": 0.11992576718330383, "learning_rate": 0.01, "loss": 1.9785, "step": 65019 }, { "epoch": 6.6771410967344424, "grad_norm": 0.07870931923389435, "learning_rate": 0.01, "loss": 1.9679, "step": 65022 }, { "epoch": 6.6774491682070245, "grad_norm": 0.11495670676231384, "learning_rate": 0.01, "loss": 1.9681, "step": 65025 }, { "epoch": 6.677757239679606, "grad_norm": 0.10441329330205917, "learning_rate": 0.01, "loss": 2.0027, "step": 65028 }, { "epoch": 6.678065311152187, "grad_norm": 0.08615846931934357, "learning_rate": 0.01, "loss": 1.9905, "step": 65031 }, { "epoch": 6.678373382624769, "grad_norm": 0.053645677864551544, "learning_rate": 0.01, "loss": 2.0039, "step": 65034 }, { "epoch": 6.678681454097351, "grad_norm": 0.0375412255525589, "learning_rate": 0.01, "loss": 1.9721, "step": 65037 }, { "epoch": 6.678989525569932, "grad_norm": 0.03644879162311554, "learning_rate": 0.01, "loss": 1.9553, "step": 65040 }, { "epoch": 6.679297597042514, "grad_norm": 0.09399533271789551, "learning_rate": 0.01, "loss": 1.9674, "step": 65043 }, { "epoch": 6.679605668515095, "grad_norm": 0.06882072985172272, "learning_rate": 0.01, "loss": 1.9863, "step": 65046 }, { "epoch": 6.679913739987677, "grad_norm": 0.10664650052785873, "learning_rate": 0.01, "loss": 1.9733, "step": 65049 }, { "epoch": 6.6802218114602585, "grad_norm": 0.044063057750463486, "learning_rate": 0.01, "loss": 1.9976, "step": 65052 }, { "epoch": 6.680529882932841, "grad_norm": 0.037052053958177567, "learning_rate": 0.01, "loss": 1.963, "step": 65055 }, { "epoch": 6.680837954405422, "grad_norm": 0.038574136793613434, "learning_rate": 0.01, "loss": 1.9605, "step": 65058 }, { "epoch": 6.681146025878004, "grad_norm": 0.06184682250022888, "learning_rate": 0.01, "loss": 1.9944, "step": 65061 }, { "epoch": 6.681454097350585, "grad_norm": 0.09441886097192764, "learning_rate": 0.01, "loss": 1.9866, "step": 65064 }, { "epoch": 6.681762168823167, "grad_norm": 0.07284927368164062, "learning_rate": 0.01, "loss": 1.9733, "step": 65067 }, { "epoch": 6.682070240295749, "grad_norm": 0.07610774785280228, "learning_rate": 0.01, "loss": 1.9756, "step": 65070 }, { "epoch": 6.68237831176833, "grad_norm": 0.04308845102787018, "learning_rate": 0.01, "loss": 1.9727, "step": 65073 }, { "epoch": 6.682686383240912, "grad_norm": 0.08398102223873138, "learning_rate": 0.01, "loss": 1.9889, "step": 65076 }, { "epoch": 6.682994454713493, "grad_norm": 0.11537694185972214, "learning_rate": 0.01, "loss": 1.9694, "step": 65079 }, { "epoch": 6.6833025261860755, "grad_norm": 0.03176436200737953, "learning_rate": 0.01, "loss": 1.9525, "step": 65082 }, { "epoch": 6.683610597658657, "grad_norm": 0.09571046382188797, "learning_rate": 0.01, "loss": 1.9763, "step": 65085 }, { "epoch": 6.683918669131239, "grad_norm": 0.04681390896439552, "learning_rate": 0.01, "loss": 1.9871, "step": 65088 }, { "epoch": 6.68422674060382, "grad_norm": 0.26577845215797424, "learning_rate": 0.01, "loss": 1.9925, "step": 65091 }, { "epoch": 6.684534812076402, "grad_norm": 0.12212441116571426, "learning_rate": 0.01, "loss": 1.9681, "step": 65094 }, { "epoch": 6.684842883548983, "grad_norm": 0.4320748448371887, "learning_rate": 0.01, "loss": 1.9581, "step": 65097 }, { "epoch": 6.685150955021565, "grad_norm": 0.04880547523498535, "learning_rate": 0.01, "loss": 1.9766, "step": 65100 }, { "epoch": 6.685459026494147, "grad_norm": 0.08876339346170425, "learning_rate": 0.01, "loss": 1.974, "step": 65103 }, { "epoch": 6.685767097966728, "grad_norm": 0.06511343270540237, "learning_rate": 0.01, "loss": 1.9877, "step": 65106 }, { "epoch": 6.6860751694393095, "grad_norm": 0.050562430173158646, "learning_rate": 0.01, "loss": 1.9807, "step": 65109 }, { "epoch": 6.6863832409118915, "grad_norm": 0.04717005044221878, "learning_rate": 0.01, "loss": 1.9602, "step": 65112 }, { "epoch": 6.686691312384474, "grad_norm": 0.08370763063430786, "learning_rate": 0.01, "loss": 1.9779, "step": 65115 }, { "epoch": 6.686999383857055, "grad_norm": 0.0477861724793911, "learning_rate": 0.01, "loss": 1.9572, "step": 65118 }, { "epoch": 6.687307455329637, "grad_norm": 0.02870395965874195, "learning_rate": 0.01, "loss": 2.0043, "step": 65121 }, { "epoch": 6.687615526802218, "grad_norm": 0.04413042590022087, "learning_rate": 0.01, "loss": 1.9689, "step": 65124 }, { "epoch": 6.6879235982748, "grad_norm": 0.041404690593481064, "learning_rate": 0.01, "loss": 1.9668, "step": 65127 }, { "epoch": 6.688231669747381, "grad_norm": 0.04159929230809212, "learning_rate": 0.01, "loss": 1.9783, "step": 65130 }, { "epoch": 6.688539741219963, "grad_norm": 0.06357663124799728, "learning_rate": 0.01, "loss": 1.9677, "step": 65133 }, { "epoch": 6.688847812692544, "grad_norm": 0.03610898181796074, "learning_rate": 0.01, "loss": 2.0024, "step": 65136 }, { "epoch": 6.689155884165126, "grad_norm": 0.038069941103458405, "learning_rate": 0.01, "loss": 1.9789, "step": 65139 }, { "epoch": 6.689463955637708, "grad_norm": 0.05543750151991844, "learning_rate": 0.01, "loss": 1.9695, "step": 65142 }, { "epoch": 6.68977202711029, "grad_norm": 0.10848159343004227, "learning_rate": 0.01, "loss": 1.9616, "step": 65145 }, { "epoch": 6.690080098582872, "grad_norm": 0.1446017026901245, "learning_rate": 0.01, "loss": 1.9915, "step": 65148 }, { "epoch": 6.690388170055453, "grad_norm": 0.10384636372327805, "learning_rate": 0.01, "loss": 1.9581, "step": 65151 }, { "epoch": 6.690696241528035, "grad_norm": 0.07247021794319153, "learning_rate": 0.01, "loss": 1.9817, "step": 65154 }, { "epoch": 6.691004313000616, "grad_norm": 0.04569610580801964, "learning_rate": 0.01, "loss": 1.9818, "step": 65157 }, { "epoch": 6.691312384473198, "grad_norm": 0.039695657789707184, "learning_rate": 0.01, "loss": 1.9897, "step": 65160 }, { "epoch": 6.691620455945779, "grad_norm": 0.053198885172605515, "learning_rate": 0.01, "loss": 1.998, "step": 65163 }, { "epoch": 6.691928527418361, "grad_norm": 0.042604029178619385, "learning_rate": 0.01, "loss": 1.9965, "step": 65166 }, { "epoch": 6.6922365988909425, "grad_norm": 0.031200909987092018, "learning_rate": 0.01, "loss": 1.9647, "step": 65169 }, { "epoch": 6.6925446703635245, "grad_norm": 0.10486329346895218, "learning_rate": 0.01, "loss": 1.9919, "step": 65172 }, { "epoch": 6.692852741836106, "grad_norm": 0.046825818717479706, "learning_rate": 0.01, "loss": 1.9795, "step": 65175 }, { "epoch": 6.693160813308688, "grad_norm": 0.1281237006187439, "learning_rate": 0.01, "loss": 2.0059, "step": 65178 }, { "epoch": 6.693468884781269, "grad_norm": 0.09412495791912079, "learning_rate": 0.01, "loss": 1.9904, "step": 65181 }, { "epoch": 6.693776956253851, "grad_norm": 0.08914454281330109, "learning_rate": 0.01, "loss": 1.9938, "step": 65184 }, { "epoch": 6.694085027726432, "grad_norm": 0.07769876718521118, "learning_rate": 0.01, "loss": 1.9673, "step": 65187 }, { "epoch": 6.694393099199014, "grad_norm": 0.10803245007991791, "learning_rate": 0.01, "loss": 1.9865, "step": 65190 }, { "epoch": 6.694701170671596, "grad_norm": 0.07634082436561584, "learning_rate": 0.01, "loss": 1.9459, "step": 65193 }, { "epoch": 6.695009242144177, "grad_norm": 0.033576395362615585, "learning_rate": 0.01, "loss": 1.9826, "step": 65196 }, { "epoch": 6.695317313616759, "grad_norm": 0.04042442888021469, "learning_rate": 0.01, "loss": 1.992, "step": 65199 }, { "epoch": 6.695625385089341, "grad_norm": 0.04007275775074959, "learning_rate": 0.01, "loss": 2.0092, "step": 65202 }, { "epoch": 6.695933456561923, "grad_norm": 0.036564283072948456, "learning_rate": 0.01, "loss": 1.9913, "step": 65205 }, { "epoch": 6.696241528034504, "grad_norm": 0.03179538995027542, "learning_rate": 0.01, "loss": 1.957, "step": 65208 }, { "epoch": 6.696549599507086, "grad_norm": 0.050880614668130875, "learning_rate": 0.01, "loss": 1.985, "step": 65211 }, { "epoch": 6.696857670979667, "grad_norm": 0.07803148031234741, "learning_rate": 0.01, "loss": 1.9987, "step": 65214 }, { "epoch": 6.697165742452249, "grad_norm": 0.11088874191045761, "learning_rate": 0.01, "loss": 1.9561, "step": 65217 }, { "epoch": 6.69747381392483, "grad_norm": 0.05180731415748596, "learning_rate": 0.01, "loss": 2.0025, "step": 65220 }, { "epoch": 6.697781885397412, "grad_norm": 0.09849876910448074, "learning_rate": 0.01, "loss": 1.9838, "step": 65223 }, { "epoch": 6.698089956869994, "grad_norm": 0.07928299903869629, "learning_rate": 0.01, "loss": 1.974, "step": 65226 }, { "epoch": 6.6983980283425755, "grad_norm": 0.058122437447309494, "learning_rate": 0.01, "loss": 1.9843, "step": 65229 }, { "epoch": 6.698706099815157, "grad_norm": 0.059113189578056335, "learning_rate": 0.01, "loss": 1.987, "step": 65232 }, { "epoch": 6.699014171287739, "grad_norm": 0.03366167098283768, "learning_rate": 0.01, "loss": 1.9818, "step": 65235 }, { "epoch": 6.699322242760321, "grad_norm": 0.036344975233078, "learning_rate": 0.01, "loss": 1.9468, "step": 65238 }, { "epoch": 6.699630314232902, "grad_norm": 0.10664808005094528, "learning_rate": 0.01, "loss": 1.9469, "step": 65241 }, { "epoch": 6.699938385705484, "grad_norm": 0.09889326244592667, "learning_rate": 0.01, "loss": 1.9851, "step": 65244 }, { "epoch": 6.700246457178065, "grad_norm": 0.05460183694958687, "learning_rate": 0.01, "loss": 2.0019, "step": 65247 }, { "epoch": 6.700554528650647, "grad_norm": 0.08168340474367142, "learning_rate": 0.01, "loss": 1.9788, "step": 65250 }, { "epoch": 6.700862600123228, "grad_norm": 0.0726526603102684, "learning_rate": 0.01, "loss": 1.9659, "step": 65253 }, { "epoch": 6.70117067159581, "grad_norm": 0.0469973124563694, "learning_rate": 0.01, "loss": 1.9772, "step": 65256 }, { "epoch": 6.7014787430683915, "grad_norm": 0.05402790755033493, "learning_rate": 0.01, "loss": 1.9723, "step": 65259 }, { "epoch": 6.701786814540974, "grad_norm": 0.03723495826125145, "learning_rate": 0.01, "loss": 1.9975, "step": 65262 }, { "epoch": 6.702094886013555, "grad_norm": 0.1069960966706276, "learning_rate": 0.01, "loss": 1.9526, "step": 65265 }, { "epoch": 6.702402957486137, "grad_norm": 0.0806579738855362, "learning_rate": 0.01, "loss": 1.98, "step": 65268 }, { "epoch": 6.702711028958719, "grad_norm": 0.05724218115210533, "learning_rate": 0.01, "loss": 2.001, "step": 65271 }, { "epoch": 6.7030191004313, "grad_norm": 0.10128419101238251, "learning_rate": 0.01, "loss": 1.9824, "step": 65274 }, { "epoch": 6.703327171903882, "grad_norm": 0.1476512998342514, "learning_rate": 0.01, "loss": 2.0008, "step": 65277 }, { "epoch": 6.703635243376463, "grad_norm": 0.10202914476394653, "learning_rate": 0.01, "loss": 1.9957, "step": 65280 }, { "epoch": 6.703943314849045, "grad_norm": 0.042887743562459946, "learning_rate": 0.01, "loss": 1.9819, "step": 65283 }, { "epoch": 6.704251386321626, "grad_norm": 0.0640069991350174, "learning_rate": 0.01, "loss": 1.9784, "step": 65286 }, { "epoch": 6.7045594577942085, "grad_norm": 0.042582917958498, "learning_rate": 0.01, "loss": 1.9819, "step": 65289 }, { "epoch": 6.70486752926679, "grad_norm": 0.0463312529027462, "learning_rate": 0.01, "loss": 1.9802, "step": 65292 }, { "epoch": 6.705175600739372, "grad_norm": 0.05490421503782272, "learning_rate": 0.01, "loss": 1.9697, "step": 65295 }, { "epoch": 6.705483672211953, "grad_norm": 0.07986756414175034, "learning_rate": 0.01, "loss": 1.9504, "step": 65298 }, { "epoch": 6.705791743684535, "grad_norm": 0.06604810804128647, "learning_rate": 0.01, "loss": 1.9782, "step": 65301 }, { "epoch": 6.706099815157117, "grad_norm": 0.036626990884542465, "learning_rate": 0.01, "loss": 1.9685, "step": 65304 }, { "epoch": 6.706407886629698, "grad_norm": 0.03825300931930542, "learning_rate": 0.01, "loss": 1.9751, "step": 65307 }, { "epoch": 6.706715958102279, "grad_norm": 0.07136386632919312, "learning_rate": 0.01, "loss": 1.9652, "step": 65310 }, { "epoch": 6.707024029574861, "grad_norm": 0.059436917304992676, "learning_rate": 0.01, "loss": 2.0015, "step": 65313 }, { "epoch": 6.707332101047443, "grad_norm": 0.06798838824033737, "learning_rate": 0.01, "loss": 1.9803, "step": 65316 }, { "epoch": 6.7076401725200245, "grad_norm": 0.05605984479188919, "learning_rate": 0.01, "loss": 2.0175, "step": 65319 }, { "epoch": 6.707948243992607, "grad_norm": 0.039418842643499374, "learning_rate": 0.01, "loss": 1.9744, "step": 65322 }, { "epoch": 6.708256315465188, "grad_norm": 0.037476614117622375, "learning_rate": 0.01, "loss": 1.985, "step": 65325 }, { "epoch": 6.70856438693777, "grad_norm": 0.03878027945756912, "learning_rate": 0.01, "loss": 1.9927, "step": 65328 }, { "epoch": 6.708872458410351, "grad_norm": 0.14439933001995087, "learning_rate": 0.01, "loss": 1.9748, "step": 65331 }, { "epoch": 6.709180529882933, "grad_norm": 0.1026519313454628, "learning_rate": 0.01, "loss": 1.9874, "step": 65334 }, { "epoch": 6.709488601355514, "grad_norm": 0.059400398284196854, "learning_rate": 0.01, "loss": 1.9556, "step": 65337 }, { "epoch": 6.709796672828096, "grad_norm": 0.03667070344090462, "learning_rate": 0.01, "loss": 1.9706, "step": 65340 }, { "epoch": 6.710104744300677, "grad_norm": 0.03821050748229027, "learning_rate": 0.01, "loss": 1.9647, "step": 65343 }, { "epoch": 6.710412815773259, "grad_norm": 0.034569595009088516, "learning_rate": 0.01, "loss": 1.9783, "step": 65346 }, { "epoch": 6.7107208872458415, "grad_norm": 0.0894671380519867, "learning_rate": 0.01, "loss": 1.979, "step": 65349 }, { "epoch": 6.711028958718423, "grad_norm": 0.1035616546869278, "learning_rate": 0.01, "loss": 1.9808, "step": 65352 }, { "epoch": 6.711337030191005, "grad_norm": 0.07528524845838547, "learning_rate": 0.01, "loss": 1.9708, "step": 65355 }, { "epoch": 6.711645101663586, "grad_norm": 0.073489710688591, "learning_rate": 0.01, "loss": 1.9892, "step": 65358 }, { "epoch": 6.711953173136168, "grad_norm": 0.047937504947185516, "learning_rate": 0.01, "loss": 1.9775, "step": 65361 }, { "epoch": 6.712261244608749, "grad_norm": 0.049860548228025436, "learning_rate": 0.01, "loss": 1.99, "step": 65364 }, { "epoch": 6.712569316081331, "grad_norm": 0.04179536923766136, "learning_rate": 0.01, "loss": 1.9644, "step": 65367 }, { "epoch": 6.712877387553912, "grad_norm": 0.03599520027637482, "learning_rate": 0.01, "loss": 1.9819, "step": 65370 }, { "epoch": 6.713185459026494, "grad_norm": 0.03743865713477135, "learning_rate": 0.01, "loss": 1.9791, "step": 65373 }, { "epoch": 6.7134935304990755, "grad_norm": 0.04530680924654007, "learning_rate": 0.01, "loss": 1.9728, "step": 65376 }, { "epoch": 6.7138016019716575, "grad_norm": 0.05956900492310524, "learning_rate": 0.01, "loss": 2.0093, "step": 65379 }, { "epoch": 6.714109673444239, "grad_norm": 0.03879319131374359, "learning_rate": 0.01, "loss": 1.9913, "step": 65382 }, { "epoch": 6.714417744916821, "grad_norm": 0.08729801326990128, "learning_rate": 0.01, "loss": 1.979, "step": 65385 }, { "epoch": 6.714725816389402, "grad_norm": 0.06801248341798782, "learning_rate": 0.01, "loss": 1.9922, "step": 65388 }, { "epoch": 6.715033887861984, "grad_norm": 0.10621631890535355, "learning_rate": 0.01, "loss": 1.9631, "step": 65391 }, { "epoch": 6.715341959334566, "grad_norm": 0.06591764837503433, "learning_rate": 0.01, "loss": 1.9941, "step": 65394 }, { "epoch": 6.715650030807147, "grad_norm": 0.08169190585613251, "learning_rate": 0.01, "loss": 1.9712, "step": 65397 }, { "epoch": 6.715958102279729, "grad_norm": 0.06591983139514923, "learning_rate": 0.01, "loss": 1.987, "step": 65400 }, { "epoch": 6.71626617375231, "grad_norm": 0.102086141705513, "learning_rate": 0.01, "loss": 1.962, "step": 65403 }, { "epoch": 6.716574245224892, "grad_norm": 0.13108035922050476, "learning_rate": 0.01, "loss": 1.9915, "step": 65406 }, { "epoch": 6.716882316697474, "grad_norm": 0.09894704073667526, "learning_rate": 0.01, "loss": 2.0025, "step": 65409 }, { "epoch": 6.717190388170056, "grad_norm": 0.05907077714800835, "learning_rate": 0.01, "loss": 1.9726, "step": 65412 }, { "epoch": 6.717498459642637, "grad_norm": 0.044414445757865906, "learning_rate": 0.01, "loss": 1.9734, "step": 65415 }, { "epoch": 6.717806531115219, "grad_norm": 0.04454610496759415, "learning_rate": 0.01, "loss": 1.947, "step": 65418 }, { "epoch": 6.7181146025878, "grad_norm": 0.044069502502679825, "learning_rate": 0.01, "loss": 1.9755, "step": 65421 }, { "epoch": 6.718422674060382, "grad_norm": 0.05330649018287659, "learning_rate": 0.01, "loss": 2.0213, "step": 65424 }, { "epoch": 6.718730745532964, "grad_norm": 0.13220176100730896, "learning_rate": 0.01, "loss": 1.9773, "step": 65427 }, { "epoch": 6.719038817005545, "grad_norm": 0.05000099912285805, "learning_rate": 0.01, "loss": 1.9834, "step": 65430 }, { "epoch": 6.7193468884781264, "grad_norm": 0.053126510232686996, "learning_rate": 0.01, "loss": 1.9683, "step": 65433 }, { "epoch": 6.7196549599507085, "grad_norm": 0.050792306661605835, "learning_rate": 0.01, "loss": 1.9877, "step": 65436 }, { "epoch": 6.7199630314232905, "grad_norm": 0.05356533080339432, "learning_rate": 0.01, "loss": 1.9913, "step": 65439 }, { "epoch": 6.720271102895872, "grad_norm": 0.03830750659108162, "learning_rate": 0.01, "loss": 1.974, "step": 65442 }, { "epoch": 6.720579174368454, "grad_norm": 0.0419539138674736, "learning_rate": 0.01, "loss": 1.9891, "step": 65445 }, { "epoch": 6.720887245841035, "grad_norm": 0.04633840546011925, "learning_rate": 0.01, "loss": 1.9725, "step": 65448 }, { "epoch": 6.721195317313617, "grad_norm": 0.048049312084913254, "learning_rate": 0.01, "loss": 1.978, "step": 65451 }, { "epoch": 6.721503388786198, "grad_norm": 0.031329698860645294, "learning_rate": 0.01, "loss": 1.9681, "step": 65454 }, { "epoch": 6.72181146025878, "grad_norm": 0.03230907768011093, "learning_rate": 0.01, "loss": 1.9974, "step": 65457 }, { "epoch": 6.722119531731361, "grad_norm": 0.09823235124349594, "learning_rate": 0.01, "loss": 1.9824, "step": 65460 }, { "epoch": 6.722427603203943, "grad_norm": 0.11478835344314575, "learning_rate": 0.01, "loss": 1.9782, "step": 65463 }, { "epoch": 6.7227356746765246, "grad_norm": 0.040770623832941055, "learning_rate": 0.01, "loss": 1.9768, "step": 65466 }, { "epoch": 6.723043746149107, "grad_norm": 0.11709921061992645, "learning_rate": 0.01, "loss": 1.9885, "step": 65469 }, { "epoch": 6.723351817621689, "grad_norm": 0.041359901428222656, "learning_rate": 0.01, "loss": 1.9778, "step": 65472 }, { "epoch": 6.72365988909427, "grad_norm": 0.03594958409667015, "learning_rate": 0.01, "loss": 1.9651, "step": 65475 }, { "epoch": 6.723967960566852, "grad_norm": 0.03344079852104187, "learning_rate": 0.01, "loss": 1.9872, "step": 65478 }, { "epoch": 6.724276032039433, "grad_norm": 0.03400930017232895, "learning_rate": 0.01, "loss": 1.9618, "step": 65481 }, { "epoch": 6.724584103512015, "grad_norm": 0.11725469678640366, "learning_rate": 0.01, "loss": 1.9798, "step": 65484 }, { "epoch": 6.724892174984596, "grad_norm": 0.08199802041053772, "learning_rate": 0.01, "loss": 1.9765, "step": 65487 }, { "epoch": 6.725200246457178, "grad_norm": 0.04940160736441612, "learning_rate": 0.01, "loss": 1.9546, "step": 65490 }, { "epoch": 6.7255083179297594, "grad_norm": 0.08786390721797943, "learning_rate": 0.01, "loss": 1.9616, "step": 65493 }, { "epoch": 6.7258163894023415, "grad_norm": 0.04642069712281227, "learning_rate": 0.01, "loss": 1.9623, "step": 65496 }, { "epoch": 6.726124460874923, "grad_norm": 0.0684671550989151, "learning_rate": 0.01, "loss": 1.9626, "step": 65499 }, { "epoch": 6.726432532347505, "grad_norm": 0.05767513066530228, "learning_rate": 0.01, "loss": 2.001, "step": 65502 }, { "epoch": 6.726740603820086, "grad_norm": 0.04825710505247116, "learning_rate": 0.01, "loss": 1.9883, "step": 65505 }, { "epoch": 6.727048675292668, "grad_norm": 0.08460171520709991, "learning_rate": 0.01, "loss": 1.9801, "step": 65508 }, { "epoch": 6.727356746765249, "grad_norm": 0.08406694233417511, "learning_rate": 0.01, "loss": 1.9708, "step": 65511 }, { "epoch": 6.727664818237831, "grad_norm": 0.09590575098991394, "learning_rate": 0.01, "loss": 1.9723, "step": 65514 }, { "epoch": 6.727972889710413, "grad_norm": 0.05631608888506889, "learning_rate": 0.01, "loss": 1.9611, "step": 65517 }, { "epoch": 6.728280961182994, "grad_norm": 0.04405519738793373, "learning_rate": 0.01, "loss": 1.973, "step": 65520 }, { "epoch": 6.728589032655576, "grad_norm": 0.04463804140686989, "learning_rate": 0.01, "loss": 1.9702, "step": 65523 }, { "epoch": 6.728897104128158, "grad_norm": 0.048253465443849564, "learning_rate": 0.01, "loss": 1.9753, "step": 65526 }, { "epoch": 6.72920517560074, "grad_norm": 0.058578286319971085, "learning_rate": 0.01, "loss": 1.9872, "step": 65529 }, { "epoch": 6.729513247073321, "grad_norm": 0.0791950449347496, "learning_rate": 0.01, "loss": 1.962, "step": 65532 }, { "epoch": 6.729821318545903, "grad_norm": 0.112459696829319, "learning_rate": 0.01, "loss": 1.9797, "step": 65535 }, { "epoch": 6.730129390018484, "grad_norm": 0.03688877448439598, "learning_rate": 0.01, "loss": 1.98, "step": 65538 }, { "epoch": 6.730437461491066, "grad_norm": 0.041237879544496536, "learning_rate": 0.01, "loss": 1.9844, "step": 65541 }, { "epoch": 6.730745532963647, "grad_norm": 0.09671095013618469, "learning_rate": 0.01, "loss": 1.9889, "step": 65544 }, { "epoch": 6.731053604436229, "grad_norm": 0.05881296843290329, "learning_rate": 0.01, "loss": 2.0005, "step": 65547 }, { "epoch": 6.731361675908811, "grad_norm": 0.07665944844484329, "learning_rate": 0.01, "loss": 1.9668, "step": 65550 }, { "epoch": 6.7316697473813925, "grad_norm": 0.12369941920042038, "learning_rate": 0.01, "loss": 1.9712, "step": 65553 }, { "epoch": 6.7319778188539745, "grad_norm": 0.03289101645350456, "learning_rate": 0.01, "loss": 1.972, "step": 65556 }, { "epoch": 6.732285890326556, "grad_norm": 0.07767914235591888, "learning_rate": 0.01, "loss": 1.9751, "step": 65559 }, { "epoch": 6.732593961799138, "grad_norm": 0.16727425158023834, "learning_rate": 0.01, "loss": 1.9728, "step": 65562 }, { "epoch": 6.732902033271719, "grad_norm": 0.13580100238323212, "learning_rate": 0.01, "loss": 1.9805, "step": 65565 }, { "epoch": 6.733210104744301, "grad_norm": 0.06935961544513702, "learning_rate": 0.01, "loss": 1.985, "step": 65568 }, { "epoch": 6.733518176216882, "grad_norm": 0.07026053220033646, "learning_rate": 0.01, "loss": 1.9791, "step": 65571 }, { "epoch": 6.733826247689464, "grad_norm": 0.08974307775497437, "learning_rate": 0.01, "loss": 1.9715, "step": 65574 }, { "epoch": 6.734134319162045, "grad_norm": 0.04675333574414253, "learning_rate": 0.01, "loss": 1.9894, "step": 65577 }, { "epoch": 6.734442390634627, "grad_norm": 0.04853864386677742, "learning_rate": 0.01, "loss": 1.9679, "step": 65580 }, { "epoch": 6.7347504621072085, "grad_norm": 0.03581572324037552, "learning_rate": 0.01, "loss": 1.947, "step": 65583 }, { "epoch": 6.735058533579791, "grad_norm": 0.03860647231340408, "learning_rate": 0.01, "loss": 2.0082, "step": 65586 }, { "epoch": 6.735366605052372, "grad_norm": 0.0573217049241066, "learning_rate": 0.01, "loss": 1.9692, "step": 65589 }, { "epoch": 6.735674676524954, "grad_norm": 0.09368444234132767, "learning_rate": 0.01, "loss": 1.9789, "step": 65592 }, { "epoch": 6.735982747997536, "grad_norm": 0.21956096589565277, "learning_rate": 0.01, "loss": 1.9777, "step": 65595 }, { "epoch": 6.736290819470117, "grad_norm": 0.12725447118282318, "learning_rate": 0.01, "loss": 1.9808, "step": 65598 }, { "epoch": 6.736598890942699, "grad_norm": 0.06952683627605438, "learning_rate": 0.01, "loss": 1.9499, "step": 65601 }, { "epoch": 6.73690696241528, "grad_norm": 0.03885588422417641, "learning_rate": 0.01, "loss": 1.9634, "step": 65604 }, { "epoch": 6.737215033887862, "grad_norm": 0.06260085850954056, "learning_rate": 0.01, "loss": 1.9751, "step": 65607 }, { "epoch": 6.737523105360443, "grad_norm": 0.045172885060310364, "learning_rate": 0.01, "loss": 1.9778, "step": 65610 }, { "epoch": 6.7378311768330255, "grad_norm": 0.05928365886211395, "learning_rate": 0.01, "loss": 1.9915, "step": 65613 }, { "epoch": 6.738139248305607, "grad_norm": 0.039838556200265884, "learning_rate": 0.01, "loss": 1.9747, "step": 65616 }, { "epoch": 6.738447319778189, "grad_norm": 0.035764627158641815, "learning_rate": 0.01, "loss": 1.9774, "step": 65619 }, { "epoch": 6.73875539125077, "grad_norm": 0.05831705033779144, "learning_rate": 0.01, "loss": 1.9647, "step": 65622 }, { "epoch": 6.739063462723352, "grad_norm": 0.07349859923124313, "learning_rate": 0.01, "loss": 1.9753, "step": 65625 }, { "epoch": 6.739371534195934, "grad_norm": 0.05156675726175308, "learning_rate": 0.01, "loss": 1.972, "step": 65628 }, { "epoch": 6.739679605668515, "grad_norm": 0.09530221670866013, "learning_rate": 0.01, "loss": 1.9801, "step": 65631 }, { "epoch": 6.739987677141096, "grad_norm": 0.04756055027246475, "learning_rate": 0.01, "loss": 1.9879, "step": 65634 }, { "epoch": 6.740295748613678, "grad_norm": 0.038086969405412674, "learning_rate": 0.01, "loss": 1.972, "step": 65637 }, { "epoch": 6.74060382008626, "grad_norm": 0.06435366719961166, "learning_rate": 0.01, "loss": 1.9785, "step": 65640 }, { "epoch": 6.7409118915588415, "grad_norm": 0.12931565940380096, "learning_rate": 0.01, "loss": 1.9875, "step": 65643 }, { "epoch": 6.741219963031424, "grad_norm": 0.05568262189626694, "learning_rate": 0.01, "loss": 1.9713, "step": 65646 }, { "epoch": 6.741528034504005, "grad_norm": 0.06151677295565605, "learning_rate": 0.01, "loss": 1.9735, "step": 65649 }, { "epoch": 6.741836105976587, "grad_norm": 0.05449217930436134, "learning_rate": 0.01, "loss": 2.0004, "step": 65652 }, { "epoch": 6.742144177449168, "grad_norm": 0.05131798982620239, "learning_rate": 0.01, "loss": 1.9909, "step": 65655 }, { "epoch": 6.74245224892175, "grad_norm": 0.04320504143834114, "learning_rate": 0.01, "loss": 1.9984, "step": 65658 }, { "epoch": 6.742760320394331, "grad_norm": 0.06800030171871185, "learning_rate": 0.01, "loss": 1.9785, "step": 65661 }, { "epoch": 6.743068391866913, "grad_norm": 0.04916686192154884, "learning_rate": 0.01, "loss": 1.9932, "step": 65664 }, { "epoch": 6.743376463339494, "grad_norm": 0.10272326320409775, "learning_rate": 0.01, "loss": 1.9765, "step": 65667 }, { "epoch": 6.743684534812076, "grad_norm": 0.03326984494924545, "learning_rate": 0.01, "loss": 1.9685, "step": 65670 }, { "epoch": 6.7439926062846585, "grad_norm": 0.03591003641486168, "learning_rate": 0.01, "loss": 1.9751, "step": 65673 }, { "epoch": 6.74430067775724, "grad_norm": 0.0768413096666336, "learning_rate": 0.01, "loss": 1.951, "step": 65676 }, { "epoch": 6.744608749229822, "grad_norm": 0.04805091395974159, "learning_rate": 0.01, "loss": 1.9654, "step": 65679 }, { "epoch": 6.744916820702403, "grad_norm": 0.060281701385974884, "learning_rate": 0.01, "loss": 1.9986, "step": 65682 }, { "epoch": 6.745224892174985, "grad_norm": 0.08178987354040146, "learning_rate": 0.01, "loss": 1.9693, "step": 65685 }, { "epoch": 6.745532963647566, "grad_norm": 0.047426290810108185, "learning_rate": 0.01, "loss": 1.9985, "step": 65688 }, { "epoch": 6.745841035120148, "grad_norm": 0.06165017560124397, "learning_rate": 0.01, "loss": 1.9793, "step": 65691 }, { "epoch": 6.746149106592729, "grad_norm": 0.0794510692358017, "learning_rate": 0.01, "loss": 1.9537, "step": 65694 }, { "epoch": 6.746457178065311, "grad_norm": 0.1278935670852661, "learning_rate": 0.01, "loss": 1.9794, "step": 65697 }, { "epoch": 6.7467652495378925, "grad_norm": 0.09933564066886902, "learning_rate": 0.01, "loss": 1.9877, "step": 65700 }, { "epoch": 6.7470733210104745, "grad_norm": 0.06581506133079529, "learning_rate": 0.01, "loss": 1.9882, "step": 65703 }, { "epoch": 6.747381392483056, "grad_norm": 0.07437628507614136, "learning_rate": 0.01, "loss": 1.9723, "step": 65706 }, { "epoch": 6.747689463955638, "grad_norm": 0.06629917025566101, "learning_rate": 0.01, "loss": 1.9722, "step": 65709 }, { "epoch": 6.747997535428219, "grad_norm": 0.04644669219851494, "learning_rate": 0.01, "loss": 1.9623, "step": 65712 }, { "epoch": 6.748305606900801, "grad_norm": 0.042347557842731476, "learning_rate": 0.01, "loss": 1.9564, "step": 65715 }, { "epoch": 6.748613678373383, "grad_norm": 0.05482964962720871, "learning_rate": 0.01, "loss": 1.957, "step": 65718 }, { "epoch": 6.748921749845964, "grad_norm": 0.07373082637786865, "learning_rate": 0.01, "loss": 1.9562, "step": 65721 }, { "epoch": 6.749229821318546, "grad_norm": 0.08168603479862213, "learning_rate": 0.01, "loss": 1.9857, "step": 65724 }, { "epoch": 6.749537892791127, "grad_norm": 0.09342950582504272, "learning_rate": 0.01, "loss": 1.9536, "step": 65727 }, { "epoch": 6.749845964263709, "grad_norm": 0.05923538655042648, "learning_rate": 0.01, "loss": 1.9711, "step": 65730 }, { "epoch": 6.750154035736291, "grad_norm": 0.09214897453784943, "learning_rate": 0.01, "loss": 1.9879, "step": 65733 }, { "epoch": 6.750462107208873, "grad_norm": 0.034419383853673935, "learning_rate": 0.01, "loss": 1.9925, "step": 65736 }, { "epoch": 6.750770178681454, "grad_norm": 0.05979537591338158, "learning_rate": 0.01, "loss": 1.9595, "step": 65739 }, { "epoch": 6.751078250154036, "grad_norm": 0.11106430739164352, "learning_rate": 0.01, "loss": 1.9711, "step": 65742 }, { "epoch": 6.751386321626617, "grad_norm": 0.04116882011294365, "learning_rate": 0.01, "loss": 1.9743, "step": 65745 }, { "epoch": 6.751694393099199, "grad_norm": 0.0603414922952652, "learning_rate": 0.01, "loss": 2.0005, "step": 65748 }, { "epoch": 6.752002464571781, "grad_norm": 0.08985871076583862, "learning_rate": 0.01, "loss": 1.989, "step": 65751 }, { "epoch": 6.752310536044362, "grad_norm": 0.058961328119039536, "learning_rate": 0.01, "loss": 2.0145, "step": 65754 }, { "epoch": 6.752618607516944, "grad_norm": 0.0800008699297905, "learning_rate": 0.01, "loss": 1.9734, "step": 65757 }, { "epoch": 6.7529266789895255, "grad_norm": 0.06763100624084473, "learning_rate": 0.01, "loss": 1.9666, "step": 65760 }, { "epoch": 6.7532347504621075, "grad_norm": 0.08112933486700058, "learning_rate": 0.01, "loss": 1.9635, "step": 65763 }, { "epoch": 6.753542821934689, "grad_norm": 0.03964497894048691, "learning_rate": 0.01, "loss": 1.9907, "step": 65766 }, { "epoch": 6.753850893407271, "grad_norm": 0.1308111995458603, "learning_rate": 0.01, "loss": 1.9561, "step": 65769 }, { "epoch": 6.754158964879852, "grad_norm": 0.09832756221294403, "learning_rate": 0.01, "loss": 1.9777, "step": 65772 }, { "epoch": 6.754467036352434, "grad_norm": 0.08136483281850815, "learning_rate": 0.01, "loss": 1.9727, "step": 65775 }, { "epoch": 6.754775107825015, "grad_norm": 0.0976012796163559, "learning_rate": 0.01, "loss": 1.9719, "step": 65778 }, { "epoch": 6.755083179297597, "grad_norm": 0.14576563239097595, "learning_rate": 0.01, "loss": 1.9803, "step": 65781 }, { "epoch": 6.755391250770178, "grad_norm": 0.09006581455469131, "learning_rate": 0.01, "loss": 1.9583, "step": 65784 }, { "epoch": 6.75569932224276, "grad_norm": 0.07345472276210785, "learning_rate": 0.01, "loss": 2.003, "step": 65787 }, { "epoch": 6.7560073937153415, "grad_norm": 0.06007661297917366, "learning_rate": 0.01, "loss": 1.9676, "step": 65790 }, { "epoch": 6.756315465187924, "grad_norm": 0.07360505312681198, "learning_rate": 0.01, "loss": 1.991, "step": 65793 }, { "epoch": 6.756623536660506, "grad_norm": 0.07598039507865906, "learning_rate": 0.01, "loss": 1.9945, "step": 65796 }, { "epoch": 6.756931608133087, "grad_norm": 0.09206894040107727, "learning_rate": 0.01, "loss": 1.986, "step": 65799 }, { "epoch": 6.757239679605669, "grad_norm": 0.0724322646856308, "learning_rate": 0.01, "loss": 2.0015, "step": 65802 }, { "epoch": 6.75754775107825, "grad_norm": 0.06573556363582611, "learning_rate": 0.01, "loss": 1.9694, "step": 65805 }, { "epoch": 6.757855822550832, "grad_norm": 0.04363197833299637, "learning_rate": 0.01, "loss": 1.9615, "step": 65808 }, { "epoch": 6.758163894023413, "grad_norm": 0.03666981682181358, "learning_rate": 0.01, "loss": 1.9787, "step": 65811 }, { "epoch": 6.758471965495995, "grad_norm": 0.05076993256807327, "learning_rate": 0.01, "loss": 1.9997, "step": 65814 }, { "epoch": 6.758780036968576, "grad_norm": 0.07672358304262161, "learning_rate": 0.01, "loss": 1.9793, "step": 65817 }, { "epoch": 6.7590881084411585, "grad_norm": 0.1728837937116623, "learning_rate": 0.01, "loss": 1.9722, "step": 65820 }, { "epoch": 6.75939617991374, "grad_norm": 0.14630280435085297, "learning_rate": 0.01, "loss": 1.9864, "step": 65823 }, { "epoch": 6.759704251386322, "grad_norm": 0.08959884941577911, "learning_rate": 0.01, "loss": 1.9648, "step": 65826 }, { "epoch": 6.760012322858904, "grad_norm": 0.05487535893917084, "learning_rate": 0.01, "loss": 2.0093, "step": 65829 }, { "epoch": 6.760320394331485, "grad_norm": 0.05097530409693718, "learning_rate": 0.01, "loss": 1.9804, "step": 65832 }, { "epoch": 6.760628465804066, "grad_norm": 0.04289190098643303, "learning_rate": 0.01, "loss": 1.9502, "step": 65835 }, { "epoch": 6.760936537276648, "grad_norm": 0.054402586072683334, "learning_rate": 0.01, "loss": 1.9824, "step": 65838 }, { "epoch": 6.76124460874923, "grad_norm": 0.046257637441158295, "learning_rate": 0.01, "loss": 1.9838, "step": 65841 }, { "epoch": 6.761552680221811, "grad_norm": 0.033798422664403915, "learning_rate": 0.01, "loss": 1.9841, "step": 65844 }, { "epoch": 6.761860751694393, "grad_norm": 0.03898288682103157, "learning_rate": 0.01, "loss": 1.964, "step": 65847 }, { "epoch": 6.7621688231669745, "grad_norm": 0.061388492584228516, "learning_rate": 0.01, "loss": 1.953, "step": 65850 }, { "epoch": 6.762476894639557, "grad_norm": 0.03913845494389534, "learning_rate": 0.01, "loss": 1.9564, "step": 65853 }, { "epoch": 6.762784966112138, "grad_norm": 0.06032055988907814, "learning_rate": 0.01, "loss": 1.9701, "step": 65856 }, { "epoch": 6.76309303758472, "grad_norm": 0.11745115369558334, "learning_rate": 0.01, "loss": 1.982, "step": 65859 }, { "epoch": 6.763401109057301, "grad_norm": 0.10399042069911957, "learning_rate": 0.01, "loss": 1.9666, "step": 65862 }, { "epoch": 6.763709180529883, "grad_norm": 0.06745419651269913, "learning_rate": 0.01, "loss": 1.9733, "step": 65865 }, { "epoch": 6.764017252002464, "grad_norm": 0.036637041717767715, "learning_rate": 0.01, "loss": 1.9632, "step": 65868 }, { "epoch": 6.764325323475046, "grad_norm": 0.03688213601708412, "learning_rate": 0.01, "loss": 1.9647, "step": 65871 }, { "epoch": 6.764633394947628, "grad_norm": 0.041792191565036774, "learning_rate": 0.01, "loss": 1.9368, "step": 65874 }, { "epoch": 6.764941466420209, "grad_norm": 0.03558392450213432, "learning_rate": 0.01, "loss": 1.9553, "step": 65877 }, { "epoch": 6.7652495378927915, "grad_norm": 0.037666697055101395, "learning_rate": 0.01, "loss": 1.9519, "step": 65880 }, { "epoch": 6.765557609365373, "grad_norm": 0.03376712277531624, "learning_rate": 0.01, "loss": 1.9706, "step": 65883 }, { "epoch": 6.765865680837955, "grad_norm": 0.09008733928203583, "learning_rate": 0.01, "loss": 1.9788, "step": 65886 }, { "epoch": 6.766173752310536, "grad_norm": 0.10737526416778564, "learning_rate": 0.01, "loss": 1.9893, "step": 65889 }, { "epoch": 6.766481823783118, "grad_norm": 0.12039290368556976, "learning_rate": 0.01, "loss": 1.9967, "step": 65892 }, { "epoch": 6.766789895255699, "grad_norm": 0.10525926202535629, "learning_rate": 0.01, "loss": 1.9466, "step": 65895 }, { "epoch": 6.767097966728281, "grad_norm": 0.11887960135936737, "learning_rate": 0.01, "loss": 1.9659, "step": 65898 }, { "epoch": 6.767406038200862, "grad_norm": 0.06034785136580467, "learning_rate": 0.01, "loss": 1.9726, "step": 65901 }, { "epoch": 6.767714109673444, "grad_norm": 0.04543771967291832, "learning_rate": 0.01, "loss": 1.9848, "step": 65904 }, { "epoch": 6.7680221811460255, "grad_norm": 0.04256156459450722, "learning_rate": 0.01, "loss": 1.9842, "step": 65907 }, { "epoch": 6.7683302526186075, "grad_norm": 0.04168063402175903, "learning_rate": 0.01, "loss": 1.9641, "step": 65910 }, { "epoch": 6.768638324091189, "grad_norm": 0.05196443200111389, "learning_rate": 0.01, "loss": 1.9854, "step": 65913 }, { "epoch": 6.768946395563771, "grad_norm": 0.03945057466626167, "learning_rate": 0.01, "loss": 1.9682, "step": 65916 }, { "epoch": 6.769254467036353, "grad_norm": 0.09867379069328308, "learning_rate": 0.01, "loss": 1.9873, "step": 65919 }, { "epoch": 6.769562538508934, "grad_norm": 0.03700730949640274, "learning_rate": 0.01, "loss": 1.9814, "step": 65922 }, { "epoch": 6.769870609981516, "grad_norm": 0.05173708498477936, "learning_rate": 0.01, "loss": 1.9516, "step": 65925 }, { "epoch": 6.770178681454097, "grad_norm": 0.04833926633000374, "learning_rate": 0.01, "loss": 1.9616, "step": 65928 }, { "epoch": 6.770486752926679, "grad_norm": 0.05311394855380058, "learning_rate": 0.01, "loss": 1.9876, "step": 65931 }, { "epoch": 6.77079482439926, "grad_norm": 0.13476112484931946, "learning_rate": 0.01, "loss": 1.9678, "step": 65934 }, { "epoch": 6.771102895871842, "grad_norm": 0.05324669927358627, "learning_rate": 0.01, "loss": 1.9839, "step": 65937 }, { "epoch": 6.771410967344424, "grad_norm": 0.050452303141355515, "learning_rate": 0.01, "loss": 1.9709, "step": 65940 }, { "epoch": 6.771719038817006, "grad_norm": 0.03241961449384689, "learning_rate": 0.01, "loss": 1.997, "step": 65943 }, { "epoch": 6.772027110289587, "grad_norm": 0.1082826629281044, "learning_rate": 0.01, "loss": 1.9622, "step": 65946 }, { "epoch": 6.772335181762169, "grad_norm": 0.08889704197645187, "learning_rate": 0.01, "loss": 1.9578, "step": 65949 }, { "epoch": 6.772643253234751, "grad_norm": 0.08153937757015228, "learning_rate": 0.01, "loss": 1.9395, "step": 65952 }, { "epoch": 6.772951324707332, "grad_norm": 0.07284180819988251, "learning_rate": 0.01, "loss": 1.9858, "step": 65955 }, { "epoch": 6.773259396179914, "grad_norm": 0.049150798469781876, "learning_rate": 0.01, "loss": 1.9699, "step": 65958 }, { "epoch": 6.773567467652495, "grad_norm": 0.10383135825395584, "learning_rate": 0.01, "loss": 1.9728, "step": 65961 }, { "epoch": 6.773875539125077, "grad_norm": 0.05065099522471428, "learning_rate": 0.01, "loss": 1.9868, "step": 65964 }, { "epoch": 6.7741836105976585, "grad_norm": 0.05559004843235016, "learning_rate": 0.01, "loss": 2.0035, "step": 65967 }, { "epoch": 6.7744916820702406, "grad_norm": 0.050398439168930054, "learning_rate": 0.01, "loss": 1.979, "step": 65970 }, { "epoch": 6.774799753542822, "grad_norm": 0.05198364332318306, "learning_rate": 0.01, "loss": 1.9815, "step": 65973 }, { "epoch": 6.775107825015404, "grad_norm": 0.06122811511158943, "learning_rate": 0.01, "loss": 1.9957, "step": 65976 }, { "epoch": 6.775415896487985, "grad_norm": 0.09441248327493668, "learning_rate": 0.01, "loss": 2.0144, "step": 65979 }, { "epoch": 6.775723967960567, "grad_norm": 0.06502705812454224, "learning_rate": 0.01, "loss": 1.9775, "step": 65982 }, { "epoch": 6.776032039433148, "grad_norm": 0.07678141444921494, "learning_rate": 0.01, "loss": 1.958, "step": 65985 }, { "epoch": 6.77634011090573, "grad_norm": 0.05069658160209656, "learning_rate": 0.01, "loss": 1.9911, "step": 65988 }, { "epoch": 6.776648182378311, "grad_norm": 0.11829594522714615, "learning_rate": 0.01, "loss": 1.9692, "step": 65991 }, { "epoch": 6.776956253850893, "grad_norm": 0.036905381828546524, "learning_rate": 0.01, "loss": 1.9611, "step": 65994 }, { "epoch": 6.7772643253234754, "grad_norm": 0.04046548530459404, "learning_rate": 0.01, "loss": 1.9668, "step": 65997 }, { "epoch": 6.777572396796057, "grad_norm": 0.04027654603123665, "learning_rate": 0.01, "loss": 1.9871, "step": 66000 }, { "epoch": 6.777880468268639, "grad_norm": 0.044706303626298904, "learning_rate": 0.01, "loss": 1.9772, "step": 66003 }, { "epoch": 6.77818853974122, "grad_norm": 0.0753062292933464, "learning_rate": 0.01, "loss": 2.001, "step": 66006 }, { "epoch": 6.778496611213802, "grad_norm": 0.10951106995344162, "learning_rate": 0.01, "loss": 1.9561, "step": 66009 }, { "epoch": 6.778804682686383, "grad_norm": 0.053536590188741684, "learning_rate": 0.01, "loss": 1.9702, "step": 66012 }, { "epoch": 6.779112754158965, "grad_norm": 0.060393862426280975, "learning_rate": 0.01, "loss": 1.9826, "step": 66015 }, { "epoch": 6.779420825631546, "grad_norm": 0.049091193825006485, "learning_rate": 0.01, "loss": 1.9661, "step": 66018 }, { "epoch": 6.779728897104128, "grad_norm": 0.0649518296122551, "learning_rate": 0.01, "loss": 1.9752, "step": 66021 }, { "epoch": 6.7800369685767095, "grad_norm": 0.039447683840990067, "learning_rate": 0.01, "loss": 1.9757, "step": 66024 }, { "epoch": 6.7803450400492915, "grad_norm": 0.04264757037162781, "learning_rate": 0.01, "loss": 1.9537, "step": 66027 }, { "epoch": 6.7806531115218736, "grad_norm": 0.06174059584736824, "learning_rate": 0.01, "loss": 1.981, "step": 66030 }, { "epoch": 6.780961182994455, "grad_norm": 0.05155632272362709, "learning_rate": 0.01, "loss": 1.9752, "step": 66033 }, { "epoch": 6.781269254467036, "grad_norm": 0.08351369947195053, "learning_rate": 0.01, "loss": 1.9641, "step": 66036 }, { "epoch": 6.781577325939618, "grad_norm": 0.09187694638967514, "learning_rate": 0.01, "loss": 1.9735, "step": 66039 }, { "epoch": 6.7818853974122, "grad_norm": 0.21938036382198334, "learning_rate": 0.01, "loss": 1.985, "step": 66042 }, { "epoch": 6.782193468884781, "grad_norm": 0.051359206438064575, "learning_rate": 0.01, "loss": 1.9821, "step": 66045 }, { "epoch": 6.782501540357363, "grad_norm": 0.08765391260385513, "learning_rate": 0.01, "loss": 1.9927, "step": 66048 }, { "epoch": 6.782809611829944, "grad_norm": 0.03909270092844963, "learning_rate": 0.01, "loss": 1.9739, "step": 66051 }, { "epoch": 6.783117683302526, "grad_norm": 0.06215377524495125, "learning_rate": 0.01, "loss": 1.9658, "step": 66054 }, { "epoch": 6.783425754775108, "grad_norm": 0.05099578574299812, "learning_rate": 0.01, "loss": 1.9605, "step": 66057 }, { "epoch": 6.78373382624769, "grad_norm": 0.037330977618694305, "learning_rate": 0.01, "loss": 1.9744, "step": 66060 }, { "epoch": 6.784041897720271, "grad_norm": 0.034515704959630966, "learning_rate": 0.01, "loss": 1.9836, "step": 66063 }, { "epoch": 6.784349969192853, "grad_norm": 0.07824211567640305, "learning_rate": 0.01, "loss": 1.9861, "step": 66066 }, { "epoch": 6.784658040665434, "grad_norm": 0.13664306700229645, "learning_rate": 0.01, "loss": 1.9825, "step": 66069 }, { "epoch": 6.784966112138016, "grad_norm": 0.11008740961551666, "learning_rate": 0.01, "loss": 1.9748, "step": 66072 }, { "epoch": 6.785274183610598, "grad_norm": 0.07172096520662308, "learning_rate": 0.01, "loss": 2.0085, "step": 66075 }, { "epoch": 6.785582255083179, "grad_norm": 0.08354049175977707, "learning_rate": 0.01, "loss": 1.9607, "step": 66078 }, { "epoch": 6.785890326555761, "grad_norm": 0.04367983713746071, "learning_rate": 0.01, "loss": 1.9804, "step": 66081 }, { "epoch": 6.7861983980283425, "grad_norm": 0.03163396567106247, "learning_rate": 0.01, "loss": 1.9793, "step": 66084 }, { "epoch": 6.7865064695009245, "grad_norm": 0.0645727813243866, "learning_rate": 0.01, "loss": 1.991, "step": 66087 }, { "epoch": 6.786814540973506, "grad_norm": 0.04862186312675476, "learning_rate": 0.01, "loss": 1.9681, "step": 66090 }, { "epoch": 6.787122612446088, "grad_norm": 0.035945162177085876, "learning_rate": 0.01, "loss": 1.9657, "step": 66093 }, { "epoch": 6.787430683918669, "grad_norm": 0.10152018815279007, "learning_rate": 0.01, "loss": 1.9673, "step": 66096 }, { "epoch": 6.787738755391251, "grad_norm": 0.047067925333976746, "learning_rate": 0.01, "loss": 1.965, "step": 66099 }, { "epoch": 6.788046826863832, "grad_norm": 0.0677446648478508, "learning_rate": 0.01, "loss": 1.9553, "step": 66102 }, { "epoch": 6.788354898336414, "grad_norm": 0.03530653938651085, "learning_rate": 0.01, "loss": 1.9848, "step": 66105 }, { "epoch": 6.788662969808995, "grad_norm": 0.04466121271252632, "learning_rate": 0.01, "loss": 1.9825, "step": 66108 }, { "epoch": 6.788971041281577, "grad_norm": 0.036109503358602524, "learning_rate": 0.01, "loss": 1.9883, "step": 66111 }, { "epoch": 6.7892791127541585, "grad_norm": 0.03432103618979454, "learning_rate": 0.01, "loss": 1.9818, "step": 66114 }, { "epoch": 6.789587184226741, "grad_norm": 0.06858955323696136, "learning_rate": 0.01, "loss": 1.9798, "step": 66117 }, { "epoch": 6.789895255699323, "grad_norm": 0.053574852645397186, "learning_rate": 0.01, "loss": 1.9778, "step": 66120 }, { "epoch": 6.790203327171904, "grad_norm": 0.0781317800283432, "learning_rate": 0.01, "loss": 1.9848, "step": 66123 }, { "epoch": 6.790511398644486, "grad_norm": 0.0939275249838829, "learning_rate": 0.01, "loss": 1.979, "step": 66126 }, { "epoch": 6.790819470117067, "grad_norm": 0.1132221594452858, "learning_rate": 0.01, "loss": 1.9867, "step": 66129 }, { "epoch": 6.791127541589649, "grad_norm": 0.07932759076356888, "learning_rate": 0.01, "loss": 1.9893, "step": 66132 }, { "epoch": 6.79143561306223, "grad_norm": 0.04189478978514671, "learning_rate": 0.01, "loss": 1.961, "step": 66135 }, { "epoch": 6.791743684534812, "grad_norm": 0.06015237420797348, "learning_rate": 0.01, "loss": 1.9555, "step": 66138 }, { "epoch": 6.792051756007393, "grad_norm": 0.05187036469578743, "learning_rate": 0.01, "loss": 2.0005, "step": 66141 }, { "epoch": 6.7923598274799755, "grad_norm": 0.07512509822845459, "learning_rate": 0.01, "loss": 1.9791, "step": 66144 }, { "epoch": 6.792667898952557, "grad_norm": 0.10434945672750473, "learning_rate": 0.01, "loss": 1.9922, "step": 66147 }, { "epoch": 6.792975970425139, "grad_norm": 0.07873903959989548, "learning_rate": 0.01, "loss": 1.981, "step": 66150 }, { "epoch": 6.793284041897721, "grad_norm": 0.04768504202365875, "learning_rate": 0.01, "loss": 1.9568, "step": 66153 }, { "epoch": 6.793592113370302, "grad_norm": 0.09988034516572952, "learning_rate": 0.01, "loss": 1.9806, "step": 66156 }, { "epoch": 6.793900184842883, "grad_norm": 0.07220810651779175, "learning_rate": 0.01, "loss": 1.9664, "step": 66159 }, { "epoch": 6.794208256315465, "grad_norm": 0.036455634981393814, "learning_rate": 0.01, "loss": 2.0003, "step": 66162 }, { "epoch": 6.794516327788047, "grad_norm": 0.04097495973110199, "learning_rate": 0.01, "loss": 1.9836, "step": 66165 }, { "epoch": 6.794824399260628, "grad_norm": 0.03743165358901024, "learning_rate": 0.01, "loss": 1.9423, "step": 66168 }, { "epoch": 6.79513247073321, "grad_norm": 0.14310762286186218, "learning_rate": 0.01, "loss": 2.0118, "step": 66171 }, { "epoch": 6.7954405422057915, "grad_norm": 0.067170649766922, "learning_rate": 0.01, "loss": 1.9556, "step": 66174 }, { "epoch": 6.795748613678374, "grad_norm": 0.04540235176682472, "learning_rate": 0.01, "loss": 1.9537, "step": 66177 }, { "epoch": 6.796056685150955, "grad_norm": 0.035935305058956146, "learning_rate": 0.01, "loss": 1.9703, "step": 66180 }, { "epoch": 6.796364756623537, "grad_norm": 0.10116109251976013, "learning_rate": 0.01, "loss": 1.994, "step": 66183 }, { "epoch": 6.796672828096118, "grad_norm": 0.09814930707216263, "learning_rate": 0.01, "loss": 1.96, "step": 66186 }, { "epoch": 6.7969808995687, "grad_norm": 0.04029448702931404, "learning_rate": 0.01, "loss": 1.9701, "step": 66189 }, { "epoch": 6.797288971041281, "grad_norm": 0.07453613728284836, "learning_rate": 0.01, "loss": 1.9623, "step": 66192 }, { "epoch": 6.797597042513863, "grad_norm": 0.07255508750677109, "learning_rate": 0.01, "loss": 1.9744, "step": 66195 }, { "epoch": 6.797905113986445, "grad_norm": 0.07982856780290604, "learning_rate": 0.01, "loss": 1.9942, "step": 66198 }, { "epoch": 6.798213185459026, "grad_norm": 0.09988022595643997, "learning_rate": 0.01, "loss": 1.972, "step": 66201 }, { "epoch": 6.7985212569316085, "grad_norm": 0.09353185445070267, "learning_rate": 0.01, "loss": 1.9625, "step": 66204 }, { "epoch": 6.79882932840419, "grad_norm": 0.04059029743075371, "learning_rate": 0.01, "loss": 1.9812, "step": 66207 }, { "epoch": 6.799137399876772, "grad_norm": 0.04064788296818733, "learning_rate": 0.01, "loss": 1.9616, "step": 66210 }, { "epoch": 6.799445471349353, "grad_norm": 0.052321575582027435, "learning_rate": 0.01, "loss": 1.9848, "step": 66213 }, { "epoch": 6.799753542821935, "grad_norm": 0.13621580600738525, "learning_rate": 0.01, "loss": 1.9807, "step": 66216 }, { "epoch": 6.800061614294516, "grad_norm": 0.04013410210609436, "learning_rate": 0.01, "loss": 1.9933, "step": 66219 }, { "epoch": 6.800369685767098, "grad_norm": 0.04535072669386864, "learning_rate": 0.01, "loss": 1.9999, "step": 66222 }, { "epoch": 6.800677757239679, "grad_norm": 0.05595749244093895, "learning_rate": 0.01, "loss": 1.9841, "step": 66225 }, { "epoch": 6.800985828712261, "grad_norm": 0.03648608550429344, "learning_rate": 0.01, "loss": 2.0068, "step": 66228 }, { "epoch": 6.801293900184843, "grad_norm": 0.039576612412929535, "learning_rate": 0.01, "loss": 1.9796, "step": 66231 }, { "epoch": 6.8016019716574245, "grad_norm": 0.0474928542971611, "learning_rate": 0.01, "loss": 1.9933, "step": 66234 }, { "epoch": 6.801910043130006, "grad_norm": 0.04102933779358864, "learning_rate": 0.01, "loss": 1.9754, "step": 66237 }, { "epoch": 6.802218114602588, "grad_norm": 0.03818250820040703, "learning_rate": 0.01, "loss": 1.9846, "step": 66240 }, { "epoch": 6.80252618607517, "grad_norm": 0.038981273770332336, "learning_rate": 0.01, "loss": 1.9789, "step": 66243 }, { "epoch": 6.802834257547751, "grad_norm": 0.12116497755050659, "learning_rate": 0.01, "loss": 1.9555, "step": 66246 }, { "epoch": 6.803142329020333, "grad_norm": 0.05318591371178627, "learning_rate": 0.01, "loss": 1.993, "step": 66249 }, { "epoch": 6.803450400492914, "grad_norm": 0.05970082804560661, "learning_rate": 0.01, "loss": 1.9809, "step": 66252 }, { "epoch": 6.803758471965496, "grad_norm": 0.06618655472993851, "learning_rate": 0.01, "loss": 1.9836, "step": 66255 }, { "epoch": 6.804066543438077, "grad_norm": 0.05062811076641083, "learning_rate": 0.01, "loss": 1.9729, "step": 66258 }, { "epoch": 6.804374614910659, "grad_norm": 0.03643089160323143, "learning_rate": 0.01, "loss": 1.9736, "step": 66261 }, { "epoch": 6.804682686383241, "grad_norm": 0.051748260855674744, "learning_rate": 0.01, "loss": 1.9959, "step": 66264 }, { "epoch": 6.804990757855823, "grad_norm": 0.029979856684803963, "learning_rate": 0.01, "loss": 1.9828, "step": 66267 }, { "epoch": 6.805298829328404, "grad_norm": 0.03129233047366142, "learning_rate": 0.01, "loss": 1.9617, "step": 66270 }, { "epoch": 6.805606900800986, "grad_norm": 0.06324151903390884, "learning_rate": 0.01, "loss": 1.9882, "step": 66273 }, { "epoch": 6.805914972273568, "grad_norm": 0.13628403842449188, "learning_rate": 0.01, "loss": 2.0176, "step": 66276 }, { "epoch": 6.806223043746149, "grad_norm": 0.05685223639011383, "learning_rate": 0.01, "loss": 1.9704, "step": 66279 }, { "epoch": 6.806531115218731, "grad_norm": 0.0537252239882946, "learning_rate": 0.01, "loss": 1.9616, "step": 66282 }, { "epoch": 6.806839186691312, "grad_norm": 0.11015389859676361, "learning_rate": 0.01, "loss": 1.9703, "step": 66285 }, { "epoch": 6.807147258163894, "grad_norm": 0.04667497053742409, "learning_rate": 0.01, "loss": 1.9714, "step": 66288 }, { "epoch": 6.8074553296364755, "grad_norm": 0.03570316731929779, "learning_rate": 0.01, "loss": 1.9779, "step": 66291 }, { "epoch": 6.8077634011090575, "grad_norm": 0.0808587595820427, "learning_rate": 0.01, "loss": 1.9526, "step": 66294 }, { "epoch": 6.808071472581639, "grad_norm": 0.10974206030368805, "learning_rate": 0.01, "loss": 1.9517, "step": 66297 }, { "epoch": 6.808379544054221, "grad_norm": 0.10623728483915329, "learning_rate": 0.01, "loss": 1.9976, "step": 66300 }, { "epoch": 6.808687615526802, "grad_norm": 0.07686350494623184, "learning_rate": 0.01, "loss": 1.9786, "step": 66303 }, { "epoch": 6.808995686999384, "grad_norm": 0.0518915168941021, "learning_rate": 0.01, "loss": 1.9741, "step": 66306 }, { "epoch": 6.809303758471965, "grad_norm": 0.05631876736879349, "learning_rate": 0.01, "loss": 1.9879, "step": 66309 }, { "epoch": 6.809611829944547, "grad_norm": 0.07675952464342117, "learning_rate": 0.01, "loss": 1.9706, "step": 66312 }, { "epoch": 6.809919901417128, "grad_norm": 0.05211762338876724, "learning_rate": 0.01, "loss": 1.9705, "step": 66315 }, { "epoch": 6.81022797288971, "grad_norm": 0.07859452068805695, "learning_rate": 0.01, "loss": 1.9845, "step": 66318 }, { "epoch": 6.810536044362292, "grad_norm": 0.05693339928984642, "learning_rate": 0.01, "loss": 1.9648, "step": 66321 }, { "epoch": 6.810844115834874, "grad_norm": 0.03533701226115227, "learning_rate": 0.01, "loss": 1.9809, "step": 66324 }, { "epoch": 6.811152187307456, "grad_norm": 0.10251706093549728, "learning_rate": 0.01, "loss": 1.9536, "step": 66327 }, { "epoch": 6.811460258780037, "grad_norm": 0.06078473851084709, "learning_rate": 0.01, "loss": 1.9835, "step": 66330 }, { "epoch": 6.811768330252619, "grad_norm": 0.0667276531457901, "learning_rate": 0.01, "loss": 1.9462, "step": 66333 }, { "epoch": 6.8120764017252, "grad_norm": 0.08299112319946289, "learning_rate": 0.01, "loss": 2.012, "step": 66336 }, { "epoch": 6.812384473197782, "grad_norm": 0.10200154036283493, "learning_rate": 0.01, "loss": 1.9656, "step": 66339 }, { "epoch": 6.812692544670363, "grad_norm": 0.06807447969913483, "learning_rate": 0.01, "loss": 1.9781, "step": 66342 }, { "epoch": 6.813000616142945, "grad_norm": 0.12666809558868408, "learning_rate": 0.01, "loss": 1.9803, "step": 66345 }, { "epoch": 6.813308687615526, "grad_norm": 0.07474800199270248, "learning_rate": 0.01, "loss": 1.9846, "step": 66348 }, { "epoch": 6.8136167590881085, "grad_norm": 0.045581359416246414, "learning_rate": 0.01, "loss": 1.9718, "step": 66351 }, { "epoch": 6.8139248305606905, "grad_norm": 0.03591908887028694, "learning_rate": 0.01, "loss": 1.9882, "step": 66354 }, { "epoch": 6.814232902033272, "grad_norm": 0.0317840613424778, "learning_rate": 0.01, "loss": 1.9799, "step": 66357 }, { "epoch": 6.814540973505853, "grad_norm": 0.12169979512691498, "learning_rate": 0.01, "loss": 1.987, "step": 66360 }, { "epoch": 6.814849044978435, "grad_norm": 0.159201517701149, "learning_rate": 0.01, "loss": 1.9776, "step": 66363 }, { "epoch": 6.815157116451017, "grad_norm": 0.0912163183093071, "learning_rate": 0.01, "loss": 1.9631, "step": 66366 }, { "epoch": 6.815465187923598, "grad_norm": 0.05623574182391167, "learning_rate": 0.01, "loss": 1.9889, "step": 66369 }, { "epoch": 6.81577325939618, "grad_norm": 0.0320771262049675, "learning_rate": 0.01, "loss": 1.9731, "step": 66372 }, { "epoch": 6.816081330868761, "grad_norm": 0.033067572861909866, "learning_rate": 0.01, "loss": 2.0206, "step": 66375 }, { "epoch": 6.816389402341343, "grad_norm": 0.043655119836330414, "learning_rate": 0.01, "loss": 1.9751, "step": 66378 }, { "epoch": 6.8166974738139245, "grad_norm": 0.03852735459804535, "learning_rate": 0.01, "loss": 1.9829, "step": 66381 }, { "epoch": 6.817005545286507, "grad_norm": 0.06447228044271469, "learning_rate": 0.01, "loss": 1.9728, "step": 66384 }, { "epoch": 6.817313616759088, "grad_norm": 0.1129051074385643, "learning_rate": 0.01, "loss": 2.0049, "step": 66387 }, { "epoch": 6.81762168823167, "grad_norm": 0.09710655361413956, "learning_rate": 0.01, "loss": 1.9459, "step": 66390 }, { "epoch": 6.817929759704251, "grad_norm": 0.13679730892181396, "learning_rate": 0.01, "loss": 1.9943, "step": 66393 }, { "epoch": 6.818237831176833, "grad_norm": 0.07610119134187698, "learning_rate": 0.01, "loss": 1.9796, "step": 66396 }, { "epoch": 6.818545902649415, "grad_norm": 0.06364092975854874, "learning_rate": 0.01, "loss": 1.9905, "step": 66399 }, { "epoch": 6.818853974121996, "grad_norm": 0.06607314199209213, "learning_rate": 0.01, "loss": 1.9489, "step": 66402 }, { "epoch": 6.819162045594578, "grad_norm": 0.06625553220510483, "learning_rate": 0.01, "loss": 1.9667, "step": 66405 }, { "epoch": 6.819470117067159, "grad_norm": 0.052945904433727264, "learning_rate": 0.01, "loss": 1.9705, "step": 66408 }, { "epoch": 6.8197781885397415, "grad_norm": 0.0977964773774147, "learning_rate": 0.01, "loss": 1.9751, "step": 66411 }, { "epoch": 6.820086260012323, "grad_norm": 0.09723694622516632, "learning_rate": 0.01, "loss": 1.9607, "step": 66414 }, { "epoch": 6.820394331484905, "grad_norm": 0.05858434736728668, "learning_rate": 0.01, "loss": 1.9873, "step": 66417 }, { "epoch": 6.820702402957486, "grad_norm": 0.09421772509813309, "learning_rate": 0.01, "loss": 1.9741, "step": 66420 }, { "epoch": 6.821010474430068, "grad_norm": 0.040733322501182556, "learning_rate": 0.01, "loss": 1.9752, "step": 66423 }, { "epoch": 6.821318545902649, "grad_norm": 0.0649617463350296, "learning_rate": 0.01, "loss": 1.9717, "step": 66426 }, { "epoch": 6.821626617375231, "grad_norm": 0.10224436968564987, "learning_rate": 0.01, "loss": 2.0167, "step": 66429 }, { "epoch": 6.821934688847813, "grad_norm": 0.07691013067960739, "learning_rate": 0.01, "loss": 1.977, "step": 66432 }, { "epoch": 6.822242760320394, "grad_norm": 0.1012914851307869, "learning_rate": 0.01, "loss": 1.985, "step": 66435 }, { "epoch": 6.8225508317929755, "grad_norm": 0.04432602971792221, "learning_rate": 0.01, "loss": 1.9678, "step": 66438 }, { "epoch": 6.8228589032655576, "grad_norm": 0.0863911360502243, "learning_rate": 0.01, "loss": 1.9949, "step": 66441 }, { "epoch": 6.82316697473814, "grad_norm": 0.0747685432434082, "learning_rate": 0.01, "loss": 1.9806, "step": 66444 }, { "epoch": 6.823475046210721, "grad_norm": 0.04725578799843788, "learning_rate": 0.01, "loss": 1.9782, "step": 66447 }, { "epoch": 6.823783117683303, "grad_norm": 0.1259828805923462, "learning_rate": 0.01, "loss": 1.984, "step": 66450 }, { "epoch": 6.824091189155884, "grad_norm": 0.05945250764489174, "learning_rate": 0.01, "loss": 1.9617, "step": 66453 }, { "epoch": 6.824399260628466, "grad_norm": 0.03467526659369469, "learning_rate": 0.01, "loss": 1.9694, "step": 66456 }, { "epoch": 6.824707332101047, "grad_norm": 0.04494983330368996, "learning_rate": 0.01, "loss": 1.9891, "step": 66459 }, { "epoch": 6.825015403573629, "grad_norm": 0.042065590620040894, "learning_rate": 0.01, "loss": 2.0022, "step": 66462 }, { "epoch": 6.82532347504621, "grad_norm": 0.1278277486562729, "learning_rate": 0.01, "loss": 1.9866, "step": 66465 }, { "epoch": 6.8256315465187924, "grad_norm": 0.04210862144827843, "learning_rate": 0.01, "loss": 1.9512, "step": 66468 }, { "epoch": 6.825939617991374, "grad_norm": 0.10816308856010437, "learning_rate": 0.01, "loss": 1.9699, "step": 66471 }, { "epoch": 6.826247689463956, "grad_norm": 0.04535592719912529, "learning_rate": 0.01, "loss": 2.0036, "step": 66474 }, { "epoch": 6.826555760936538, "grad_norm": 0.06771806627511978, "learning_rate": 0.01, "loss": 1.9786, "step": 66477 }, { "epoch": 6.826863832409119, "grad_norm": 0.13082286715507507, "learning_rate": 0.01, "loss": 1.9638, "step": 66480 }, { "epoch": 6.827171903881701, "grad_norm": 0.06389307230710983, "learning_rate": 0.01, "loss": 1.9555, "step": 66483 }, { "epoch": 6.827479975354282, "grad_norm": 0.04055839404463768, "learning_rate": 0.01, "loss": 1.9751, "step": 66486 }, { "epoch": 6.827788046826864, "grad_norm": 0.03493233397603035, "learning_rate": 0.01, "loss": 1.9821, "step": 66489 }, { "epoch": 6.828096118299445, "grad_norm": 0.03460715711116791, "learning_rate": 0.01, "loss": 1.9932, "step": 66492 }, { "epoch": 6.828404189772027, "grad_norm": 0.04448126628994942, "learning_rate": 0.01, "loss": 1.9681, "step": 66495 }, { "epoch": 6.8287122612446085, "grad_norm": 0.14231470227241516, "learning_rate": 0.01, "loss": 1.9878, "step": 66498 }, { "epoch": 6.8290203327171906, "grad_norm": 0.13960252702236176, "learning_rate": 0.01, "loss": 2.0018, "step": 66501 }, { "epoch": 6.829328404189772, "grad_norm": 0.053078074008226395, "learning_rate": 0.01, "loss": 1.9801, "step": 66504 }, { "epoch": 6.829636475662354, "grad_norm": 0.048882726579904556, "learning_rate": 0.01, "loss": 1.9818, "step": 66507 }, { "epoch": 6.829944547134935, "grad_norm": 0.052697841078042984, "learning_rate": 0.01, "loss": 1.9751, "step": 66510 }, { "epoch": 6.830252618607517, "grad_norm": 0.07299689203500748, "learning_rate": 0.01, "loss": 1.9554, "step": 66513 }, { "epoch": 6.830560690080098, "grad_norm": 0.06163341924548149, "learning_rate": 0.01, "loss": 1.9859, "step": 66516 }, { "epoch": 6.83086876155268, "grad_norm": 0.11083027720451355, "learning_rate": 0.01, "loss": 2.0029, "step": 66519 }, { "epoch": 6.831176833025262, "grad_norm": 0.07942959666252136, "learning_rate": 0.01, "loss": 1.9785, "step": 66522 }, { "epoch": 6.831484904497843, "grad_norm": 0.048363711684942245, "learning_rate": 0.01, "loss": 1.9661, "step": 66525 }, { "epoch": 6.8317929759704255, "grad_norm": 0.03769001364707947, "learning_rate": 0.01, "loss": 1.9449, "step": 66528 }, { "epoch": 6.832101047443007, "grad_norm": 0.03747798129916191, "learning_rate": 0.01, "loss": 1.9727, "step": 66531 }, { "epoch": 6.832409118915589, "grad_norm": 0.05700261518359184, "learning_rate": 0.01, "loss": 1.9723, "step": 66534 }, { "epoch": 6.83271719038817, "grad_norm": 0.08550623804330826, "learning_rate": 0.01, "loss": 2.0129, "step": 66537 }, { "epoch": 6.833025261860752, "grad_norm": 0.05213450267910957, "learning_rate": 0.01, "loss": 1.9825, "step": 66540 }, { "epoch": 6.833333333333333, "grad_norm": 0.12226033955812454, "learning_rate": 0.01, "loss": 1.9628, "step": 66543 }, { "epoch": 6.833641404805915, "grad_norm": 0.06343870609998703, "learning_rate": 0.01, "loss": 1.9572, "step": 66546 }, { "epoch": 6.833949476278496, "grad_norm": 0.06879778206348419, "learning_rate": 0.01, "loss": 1.9975, "step": 66549 }, { "epoch": 6.834257547751078, "grad_norm": 0.04970945045351982, "learning_rate": 0.01, "loss": 1.979, "step": 66552 }, { "epoch": 6.83456561922366, "grad_norm": 0.04559726640582085, "learning_rate": 0.01, "loss": 1.9718, "step": 66555 }, { "epoch": 6.8348736906962415, "grad_norm": 0.08180249482393265, "learning_rate": 0.01, "loss": 1.9704, "step": 66558 }, { "epoch": 6.835181762168823, "grad_norm": 0.044129811227321625, "learning_rate": 0.01, "loss": 1.9591, "step": 66561 }, { "epoch": 6.835489833641405, "grad_norm": 0.038416411727666855, "learning_rate": 0.01, "loss": 1.9519, "step": 66564 }, { "epoch": 6.835797905113987, "grad_norm": 0.06428057700395584, "learning_rate": 0.01, "loss": 1.9855, "step": 66567 }, { "epoch": 6.836105976586568, "grad_norm": 0.07858021557331085, "learning_rate": 0.01, "loss": 1.9836, "step": 66570 }, { "epoch": 6.83641404805915, "grad_norm": 0.07772648334503174, "learning_rate": 0.01, "loss": 1.9543, "step": 66573 }, { "epoch": 6.836722119531731, "grad_norm": 0.03566915541887283, "learning_rate": 0.01, "loss": 1.9563, "step": 66576 }, { "epoch": 6.837030191004313, "grad_norm": 0.04193243384361267, "learning_rate": 0.01, "loss": 1.9472, "step": 66579 }, { "epoch": 6.837338262476894, "grad_norm": 0.058167651295661926, "learning_rate": 0.01, "loss": 1.9685, "step": 66582 }, { "epoch": 6.837646333949476, "grad_norm": 0.12235833704471588, "learning_rate": 0.01, "loss": 1.951, "step": 66585 }, { "epoch": 6.837954405422058, "grad_norm": 0.052113085985183716, "learning_rate": 0.01, "loss": 1.9835, "step": 66588 }, { "epoch": 6.83826247689464, "grad_norm": 0.0758160799741745, "learning_rate": 0.01, "loss": 1.9813, "step": 66591 }, { "epoch": 6.838570548367221, "grad_norm": 0.05963435024023056, "learning_rate": 0.01, "loss": 1.9687, "step": 66594 }, { "epoch": 6.838878619839803, "grad_norm": 0.040900785475969315, "learning_rate": 0.01, "loss": 1.9922, "step": 66597 }, { "epoch": 6.839186691312385, "grad_norm": 0.042031388729810715, "learning_rate": 0.01, "loss": 1.9826, "step": 66600 }, { "epoch": 6.839494762784966, "grad_norm": 0.056323789060115814, "learning_rate": 0.01, "loss": 1.9528, "step": 66603 }, { "epoch": 6.839802834257548, "grad_norm": 0.07060620188713074, "learning_rate": 0.01, "loss": 1.9808, "step": 66606 }, { "epoch": 6.840110905730129, "grad_norm": 0.05406404659152031, "learning_rate": 0.01, "loss": 1.9635, "step": 66609 }, { "epoch": 6.840418977202711, "grad_norm": 0.06324061751365662, "learning_rate": 0.01, "loss": 1.9947, "step": 66612 }, { "epoch": 6.8407270486752925, "grad_norm": 0.04893625155091286, "learning_rate": 0.01, "loss": 2.0007, "step": 66615 }, { "epoch": 6.8410351201478745, "grad_norm": 0.062072839587926865, "learning_rate": 0.01, "loss": 1.9802, "step": 66618 }, { "epoch": 6.841343191620456, "grad_norm": 0.12954573333263397, "learning_rate": 0.01, "loss": 1.9708, "step": 66621 }, { "epoch": 6.841651263093038, "grad_norm": 0.06360910087823868, "learning_rate": 0.01, "loss": 1.9877, "step": 66624 }, { "epoch": 6.841959334565619, "grad_norm": 0.0654262900352478, "learning_rate": 0.01, "loss": 1.9869, "step": 66627 }, { "epoch": 6.842267406038201, "grad_norm": 0.12571407854557037, "learning_rate": 0.01, "loss": 1.9901, "step": 66630 }, { "epoch": 6.842575477510783, "grad_norm": 0.09581385552883148, "learning_rate": 0.01, "loss": 1.9594, "step": 66633 }, { "epoch": 6.842883548983364, "grad_norm": 0.08289510756731033, "learning_rate": 0.01, "loss": 1.9766, "step": 66636 }, { "epoch": 6.843191620455945, "grad_norm": 0.043533485382795334, "learning_rate": 0.01, "loss": 1.9793, "step": 66639 }, { "epoch": 6.843499691928527, "grad_norm": 0.04672224447131157, "learning_rate": 0.01, "loss": 1.9816, "step": 66642 }, { "epoch": 6.843807763401109, "grad_norm": 0.03264494985342026, "learning_rate": 0.01, "loss": 1.9976, "step": 66645 }, { "epoch": 6.844115834873691, "grad_norm": 0.10711422562599182, "learning_rate": 0.01, "loss": 1.9952, "step": 66648 }, { "epoch": 6.844423906346273, "grad_norm": 0.08725102990865707, "learning_rate": 0.01, "loss": 1.9728, "step": 66651 }, { "epoch": 6.844731977818854, "grad_norm": 0.03802330791950226, "learning_rate": 0.01, "loss": 1.9667, "step": 66654 }, { "epoch": 6.845040049291436, "grad_norm": 0.0540679395198822, "learning_rate": 0.01, "loss": 1.9887, "step": 66657 }, { "epoch": 6.845348120764017, "grad_norm": 0.10413230210542679, "learning_rate": 0.01, "loss": 1.9451, "step": 66660 }, { "epoch": 6.845656192236599, "grad_norm": 0.07961270213127136, "learning_rate": 0.01, "loss": 1.9462, "step": 66663 }, { "epoch": 6.84596426370918, "grad_norm": 0.06600063294172287, "learning_rate": 0.01, "loss": 1.9298, "step": 66666 }, { "epoch": 6.846272335181762, "grad_norm": 0.10893017053604126, "learning_rate": 0.01, "loss": 2.0068, "step": 66669 }, { "epoch": 6.846580406654343, "grad_norm": 0.07856502383947372, "learning_rate": 0.01, "loss": 1.9899, "step": 66672 }, { "epoch": 6.8468884781269255, "grad_norm": 0.06285273283720016, "learning_rate": 0.01, "loss": 1.9413, "step": 66675 }, { "epoch": 6.8471965495995075, "grad_norm": 0.07361593842506409, "learning_rate": 0.01, "loss": 1.9881, "step": 66678 }, { "epoch": 6.847504621072089, "grad_norm": 0.07996716350317001, "learning_rate": 0.01, "loss": 1.985, "step": 66681 }, { "epoch": 6.847812692544671, "grad_norm": 0.05737863853573799, "learning_rate": 0.01, "loss": 1.9702, "step": 66684 }, { "epoch": 6.848120764017252, "grad_norm": 0.05021853744983673, "learning_rate": 0.01, "loss": 1.9744, "step": 66687 }, { "epoch": 6.848428835489834, "grad_norm": 0.033845316618680954, "learning_rate": 0.01, "loss": 1.9695, "step": 66690 }, { "epoch": 6.848736906962415, "grad_norm": 0.10472722351551056, "learning_rate": 0.01, "loss": 1.973, "step": 66693 }, { "epoch": 6.849044978434997, "grad_norm": 0.07523515820503235, "learning_rate": 0.01, "loss": 1.9599, "step": 66696 }, { "epoch": 6.849353049907578, "grad_norm": 0.08648163080215454, "learning_rate": 0.01, "loss": 1.9795, "step": 66699 }, { "epoch": 6.84966112138016, "grad_norm": 0.05042044818401337, "learning_rate": 0.01, "loss": 1.9494, "step": 66702 }, { "epoch": 6.8499691928527415, "grad_norm": 0.04480702057480812, "learning_rate": 0.01, "loss": 1.9699, "step": 66705 }, { "epoch": 6.850277264325324, "grad_norm": 0.07494408637285233, "learning_rate": 0.01, "loss": 1.9765, "step": 66708 }, { "epoch": 6.850585335797905, "grad_norm": 0.061382777988910675, "learning_rate": 0.01, "loss": 1.9695, "step": 66711 }, { "epoch": 6.850893407270487, "grad_norm": 0.04542558267712593, "learning_rate": 0.01, "loss": 1.9928, "step": 66714 }, { "epoch": 6.851201478743068, "grad_norm": 0.039626192301511765, "learning_rate": 0.01, "loss": 1.9384, "step": 66717 }, { "epoch": 6.85150955021565, "grad_norm": 0.04274001717567444, "learning_rate": 0.01, "loss": 1.9762, "step": 66720 }, { "epoch": 6.851817621688232, "grad_norm": 0.16487199068069458, "learning_rate": 0.01, "loss": 1.9808, "step": 66723 }, { "epoch": 6.852125693160813, "grad_norm": 0.04666552320122719, "learning_rate": 0.01, "loss": 1.9914, "step": 66726 }, { "epoch": 6.852433764633395, "grad_norm": 0.04287717863917351, "learning_rate": 0.01, "loss": 1.9981, "step": 66729 }, { "epoch": 6.852741836105976, "grad_norm": 0.04304524138569832, "learning_rate": 0.01, "loss": 1.961, "step": 66732 }, { "epoch": 6.8530499075785585, "grad_norm": 0.06935624033212662, "learning_rate": 0.01, "loss": 1.9709, "step": 66735 }, { "epoch": 6.85335797905114, "grad_norm": 0.06261073797941208, "learning_rate": 0.01, "loss": 1.9558, "step": 66738 }, { "epoch": 6.853666050523722, "grad_norm": 0.07173455506563187, "learning_rate": 0.01, "loss": 1.9756, "step": 66741 }, { "epoch": 6.853974121996303, "grad_norm": 0.08861742913722992, "learning_rate": 0.01, "loss": 1.991, "step": 66744 }, { "epoch": 6.854282193468885, "grad_norm": 0.05243033543229103, "learning_rate": 0.01, "loss": 1.9649, "step": 66747 }, { "epoch": 6.854590264941466, "grad_norm": 0.03281958028674126, "learning_rate": 0.01, "loss": 1.9917, "step": 66750 }, { "epoch": 6.854898336414048, "grad_norm": 0.10798471421003342, "learning_rate": 0.01, "loss": 2.0004, "step": 66753 }, { "epoch": 6.85520640788663, "grad_norm": 0.10174170881509781, "learning_rate": 0.01, "loss": 1.9616, "step": 66756 }, { "epoch": 6.855514479359211, "grad_norm": 0.03573278710246086, "learning_rate": 0.01, "loss": 1.9906, "step": 66759 }, { "epoch": 6.8558225508317925, "grad_norm": 0.12209411710500717, "learning_rate": 0.01, "loss": 1.9913, "step": 66762 }, { "epoch": 6.8561306223043745, "grad_norm": 0.06323892623186111, "learning_rate": 0.01, "loss": 1.9882, "step": 66765 }, { "epoch": 6.856438693776957, "grad_norm": 0.03758067637681961, "learning_rate": 0.01, "loss": 1.9725, "step": 66768 }, { "epoch": 6.856746765249538, "grad_norm": 0.0766826942563057, "learning_rate": 0.01, "loss": 1.974, "step": 66771 }, { "epoch": 6.85705483672212, "grad_norm": 0.08893147110939026, "learning_rate": 0.01, "loss": 1.9959, "step": 66774 }, { "epoch": 6.857362908194701, "grad_norm": 0.06650066375732422, "learning_rate": 0.01, "loss": 1.9714, "step": 66777 }, { "epoch": 6.857670979667283, "grad_norm": 0.04757289960980415, "learning_rate": 0.01, "loss": 1.9913, "step": 66780 }, { "epoch": 6.857979051139864, "grad_norm": 0.0641031265258789, "learning_rate": 0.01, "loss": 1.9593, "step": 66783 }, { "epoch": 6.858287122612446, "grad_norm": 0.04178297147154808, "learning_rate": 0.01, "loss": 1.9613, "step": 66786 }, { "epoch": 6.858595194085027, "grad_norm": 0.042551252990961075, "learning_rate": 0.01, "loss": 2.0181, "step": 66789 }, { "epoch": 6.858903265557609, "grad_norm": 0.05980202183127403, "learning_rate": 0.01, "loss": 1.9742, "step": 66792 }, { "epoch": 6.859211337030191, "grad_norm": 0.11070612818002701, "learning_rate": 0.01, "loss": 1.953, "step": 66795 }, { "epoch": 6.859519408502773, "grad_norm": 0.03422870859503746, "learning_rate": 0.01, "loss": 1.9798, "step": 66798 }, { "epoch": 6.859827479975355, "grad_norm": 0.04661295935511589, "learning_rate": 0.01, "loss": 1.9874, "step": 66801 }, { "epoch": 6.860135551447936, "grad_norm": 0.04450797662138939, "learning_rate": 0.01, "loss": 2.0017, "step": 66804 }, { "epoch": 6.860443622920518, "grad_norm": 0.03987280651926994, "learning_rate": 0.01, "loss": 1.9817, "step": 66807 }, { "epoch": 6.860751694393099, "grad_norm": 0.0804753229022026, "learning_rate": 0.01, "loss": 1.9825, "step": 66810 }, { "epoch": 6.861059765865681, "grad_norm": 0.09321844577789307, "learning_rate": 0.01, "loss": 1.9841, "step": 66813 }, { "epoch": 6.861367837338262, "grad_norm": 0.128792867064476, "learning_rate": 0.01, "loss": 1.9654, "step": 66816 }, { "epoch": 6.861675908810844, "grad_norm": 0.0815480500459671, "learning_rate": 0.01, "loss": 1.9951, "step": 66819 }, { "epoch": 6.8619839802834255, "grad_norm": 0.047995518893003464, "learning_rate": 0.01, "loss": 1.9672, "step": 66822 }, { "epoch": 6.8622920517560075, "grad_norm": 0.03637409210205078, "learning_rate": 0.01, "loss": 1.9617, "step": 66825 }, { "epoch": 6.862600123228589, "grad_norm": 0.03809746354818344, "learning_rate": 0.01, "loss": 1.978, "step": 66828 }, { "epoch": 6.862908194701171, "grad_norm": 0.03449690714478493, "learning_rate": 0.01, "loss": 1.9781, "step": 66831 }, { "epoch": 6.863216266173753, "grad_norm": 0.09826438128948212, "learning_rate": 0.01, "loss": 1.9755, "step": 66834 }, { "epoch": 6.863524337646334, "grad_norm": 0.09669643640518188, "learning_rate": 0.01, "loss": 1.9931, "step": 66837 }, { "epoch": 6.863832409118915, "grad_norm": 0.03975251317024231, "learning_rate": 0.01, "loss": 1.9816, "step": 66840 }, { "epoch": 6.864140480591497, "grad_norm": 0.045158423483371735, "learning_rate": 0.01, "loss": 1.9833, "step": 66843 }, { "epoch": 6.864448552064079, "grad_norm": 0.08310145139694214, "learning_rate": 0.01, "loss": 1.9594, "step": 66846 }, { "epoch": 6.86475662353666, "grad_norm": 0.068028524518013, "learning_rate": 0.01, "loss": 1.9548, "step": 66849 }, { "epoch": 6.865064695009242, "grad_norm": 0.02985006757080555, "learning_rate": 0.01, "loss": 1.9847, "step": 66852 }, { "epoch": 6.865372766481824, "grad_norm": 0.03493596985936165, "learning_rate": 0.01, "loss": 1.9608, "step": 66855 }, { "epoch": 6.865680837954406, "grad_norm": 0.04760652035474777, "learning_rate": 0.01, "loss": 1.9665, "step": 66858 }, { "epoch": 6.865988909426987, "grad_norm": 0.08393906056880951, "learning_rate": 0.01, "loss": 1.9609, "step": 66861 }, { "epoch": 6.866296980899569, "grad_norm": 0.05745869129896164, "learning_rate": 0.01, "loss": 1.9851, "step": 66864 }, { "epoch": 6.86660505237215, "grad_norm": 0.04294672608375549, "learning_rate": 0.01, "loss": 1.9912, "step": 66867 }, { "epoch": 6.866913123844732, "grad_norm": 0.052304986864328384, "learning_rate": 0.01, "loss": 1.9808, "step": 66870 }, { "epoch": 6.867221195317313, "grad_norm": 0.04665624350309372, "learning_rate": 0.01, "loss": 1.9593, "step": 66873 }, { "epoch": 6.867529266789895, "grad_norm": 0.037743669003248215, "learning_rate": 0.01, "loss": 1.9874, "step": 66876 }, { "epoch": 6.867837338262477, "grad_norm": 0.19228719174861908, "learning_rate": 0.01, "loss": 1.9825, "step": 66879 }, { "epoch": 6.8681454097350585, "grad_norm": 0.044259946793317795, "learning_rate": 0.01, "loss": 2.0154, "step": 66882 }, { "epoch": 6.8684534812076405, "grad_norm": 0.03942949324846268, "learning_rate": 0.01, "loss": 1.981, "step": 66885 }, { "epoch": 6.868761552680222, "grad_norm": 0.03812559321522713, "learning_rate": 0.01, "loss": 1.9677, "step": 66888 }, { "epoch": 6.869069624152804, "grad_norm": 0.03519313409924507, "learning_rate": 0.01, "loss": 1.9776, "step": 66891 }, { "epoch": 6.869377695625385, "grad_norm": 0.04644708335399628, "learning_rate": 0.01, "loss": 1.9763, "step": 66894 }, { "epoch": 6.869685767097967, "grad_norm": 0.059009701013565063, "learning_rate": 0.01, "loss": 1.9847, "step": 66897 }, { "epoch": 6.869993838570548, "grad_norm": 0.08703511953353882, "learning_rate": 0.01, "loss": 1.9553, "step": 66900 }, { "epoch": 6.87030191004313, "grad_norm": 0.06761835515499115, "learning_rate": 0.01, "loss": 1.9753, "step": 66903 }, { "epoch": 6.870609981515711, "grad_norm": 0.12400759011507034, "learning_rate": 0.01, "loss": 1.9843, "step": 66906 }, { "epoch": 6.870918052988293, "grad_norm": 0.13963590562343597, "learning_rate": 0.01, "loss": 1.9987, "step": 66909 }, { "epoch": 6.8712261244608746, "grad_norm": 0.1004619374871254, "learning_rate": 0.01, "loss": 1.9759, "step": 66912 }, { "epoch": 6.871534195933457, "grad_norm": 0.05865470692515373, "learning_rate": 0.01, "loss": 2.0019, "step": 66915 }, { "epoch": 6.871842267406038, "grad_norm": 0.06006789952516556, "learning_rate": 0.01, "loss": 1.9681, "step": 66918 }, { "epoch": 6.87215033887862, "grad_norm": 0.03859030827879906, "learning_rate": 0.01, "loss": 1.9733, "step": 66921 }, { "epoch": 6.872458410351202, "grad_norm": 0.03140386939048767, "learning_rate": 0.01, "loss": 1.9827, "step": 66924 }, { "epoch": 6.872766481823783, "grad_norm": 0.030816826969385147, "learning_rate": 0.01, "loss": 1.9746, "step": 66927 }, { "epoch": 6.873074553296365, "grad_norm": 0.031165439635515213, "learning_rate": 0.01, "loss": 1.9777, "step": 66930 }, { "epoch": 6.873382624768946, "grad_norm": 0.1848057508468628, "learning_rate": 0.01, "loss": 1.9849, "step": 66933 }, { "epoch": 6.873690696241528, "grad_norm": 0.11120212823152542, "learning_rate": 0.01, "loss": 1.9846, "step": 66936 }, { "epoch": 6.8739987677141094, "grad_norm": 0.051300667226314545, "learning_rate": 0.01, "loss": 1.9601, "step": 66939 }, { "epoch": 6.8743068391866915, "grad_norm": 0.03566049784421921, "learning_rate": 0.01, "loss": 1.9589, "step": 66942 }, { "epoch": 6.874614910659273, "grad_norm": 0.0652051493525505, "learning_rate": 0.01, "loss": 1.9625, "step": 66945 }, { "epoch": 6.874922982131855, "grad_norm": 0.06834497302770615, "learning_rate": 0.01, "loss": 1.967, "step": 66948 }, { "epoch": 6.875231053604436, "grad_norm": 0.058571867644786835, "learning_rate": 0.01, "loss": 1.9778, "step": 66951 }, { "epoch": 6.875539125077018, "grad_norm": 0.05274929851293564, "learning_rate": 0.01, "loss": 1.999, "step": 66954 }, { "epoch": 6.8758471965496, "grad_norm": 0.044645778834819794, "learning_rate": 0.01, "loss": 1.9787, "step": 66957 }, { "epoch": 6.876155268022181, "grad_norm": 0.05121855065226555, "learning_rate": 0.01, "loss": 1.9773, "step": 66960 }, { "epoch": 6.876463339494762, "grad_norm": 0.0632731169462204, "learning_rate": 0.01, "loss": 1.991, "step": 66963 }, { "epoch": 6.876771410967344, "grad_norm": 0.0810575783252716, "learning_rate": 0.01, "loss": 1.9805, "step": 66966 }, { "epoch": 6.877079482439926, "grad_norm": 0.06958875060081482, "learning_rate": 0.01, "loss": 1.9784, "step": 66969 }, { "epoch": 6.877387553912508, "grad_norm": 0.06824204325675964, "learning_rate": 0.01, "loss": 2.0009, "step": 66972 }, { "epoch": 6.87769562538509, "grad_norm": 0.05225484445691109, "learning_rate": 0.01, "loss": 1.9467, "step": 66975 }, { "epoch": 6.878003696857671, "grad_norm": 0.05065394937992096, "learning_rate": 0.01, "loss": 1.9945, "step": 66978 }, { "epoch": 6.878311768330253, "grad_norm": 0.13061989843845367, "learning_rate": 0.01, "loss": 1.9895, "step": 66981 }, { "epoch": 6.878619839802834, "grad_norm": 0.12182870507240295, "learning_rate": 0.01, "loss": 1.9567, "step": 66984 }, { "epoch": 6.878927911275416, "grad_norm": 0.048450104892253876, "learning_rate": 0.01, "loss": 1.978, "step": 66987 }, { "epoch": 6.879235982747997, "grad_norm": 0.055096838623285294, "learning_rate": 0.01, "loss": 1.9648, "step": 66990 }, { "epoch": 6.879544054220579, "grad_norm": 0.05567490682005882, "learning_rate": 0.01, "loss": 1.9797, "step": 66993 }, { "epoch": 6.87985212569316, "grad_norm": 0.05405956879258156, "learning_rate": 0.01, "loss": 1.9811, "step": 66996 }, { "epoch": 6.8801601971657425, "grad_norm": 0.08444008976221085, "learning_rate": 0.01, "loss": 1.9848, "step": 66999 }, { "epoch": 6.8804682686383245, "grad_norm": 0.08067743480205536, "learning_rate": 0.01, "loss": 1.9636, "step": 67002 }, { "epoch": 6.880776340110906, "grad_norm": 0.042782243341207504, "learning_rate": 0.01, "loss": 1.9727, "step": 67005 }, { "epoch": 6.881084411583488, "grad_norm": 0.036419086158275604, "learning_rate": 0.01, "loss": 1.9638, "step": 67008 }, { "epoch": 6.881392483056069, "grad_norm": 0.05873832479119301, "learning_rate": 0.01, "loss": 1.9845, "step": 67011 }, { "epoch": 6.881700554528651, "grad_norm": 0.06187519058585167, "learning_rate": 0.01, "loss": 1.9953, "step": 67014 }, { "epoch": 6.882008626001232, "grad_norm": 0.049339089542627335, "learning_rate": 0.01, "loss": 1.966, "step": 67017 }, { "epoch": 6.882316697473814, "grad_norm": 0.037161026149988174, "learning_rate": 0.01, "loss": 1.9465, "step": 67020 }, { "epoch": 6.882624768946395, "grad_norm": 0.03601270541548729, "learning_rate": 0.01, "loss": 1.98, "step": 67023 }, { "epoch": 6.882932840418977, "grad_norm": 0.07066098600625992, "learning_rate": 0.01, "loss": 1.9797, "step": 67026 }, { "epoch": 6.8832409118915585, "grad_norm": 0.1363425999879837, "learning_rate": 0.01, "loss": 1.9648, "step": 67029 }, { "epoch": 6.883548983364141, "grad_norm": 0.060069505125284195, "learning_rate": 0.01, "loss": 1.9898, "step": 67032 }, { "epoch": 6.883857054836723, "grad_norm": 0.08887060731649399, "learning_rate": 0.01, "loss": 1.9947, "step": 67035 }, { "epoch": 6.884165126309304, "grad_norm": 0.03954588621854782, "learning_rate": 0.01, "loss": 1.9815, "step": 67038 }, { "epoch": 6.884473197781885, "grad_norm": 0.04933086037635803, "learning_rate": 0.01, "loss": 1.9766, "step": 67041 }, { "epoch": 6.884781269254467, "grad_norm": 0.05626452714204788, "learning_rate": 0.01, "loss": 1.9614, "step": 67044 }, { "epoch": 6.885089340727049, "grad_norm": 0.04893886297941208, "learning_rate": 0.01, "loss": 1.9646, "step": 67047 }, { "epoch": 6.88539741219963, "grad_norm": 0.10105928778648376, "learning_rate": 0.01, "loss": 1.971, "step": 67050 }, { "epoch": 6.885705483672212, "grad_norm": 0.08832778036594391, "learning_rate": 0.01, "loss": 1.979, "step": 67053 }, { "epoch": 6.886013555144793, "grad_norm": 0.059674110263586044, "learning_rate": 0.01, "loss": 1.9806, "step": 67056 }, { "epoch": 6.8863216266173755, "grad_norm": 0.06233122944831848, "learning_rate": 0.01, "loss": 1.9851, "step": 67059 }, { "epoch": 6.886629698089957, "grad_norm": 0.04024443402886391, "learning_rate": 0.01, "loss": 1.9768, "step": 67062 }, { "epoch": 6.886937769562539, "grad_norm": 0.03950473666191101, "learning_rate": 0.01, "loss": 1.9756, "step": 67065 }, { "epoch": 6.88724584103512, "grad_norm": 0.11595027148723602, "learning_rate": 0.01, "loss": 1.9688, "step": 67068 }, { "epoch": 6.887553912507702, "grad_norm": 0.10208969563245773, "learning_rate": 0.01, "loss": 1.9763, "step": 67071 }, { "epoch": 6.887861983980283, "grad_norm": 0.1006581112742424, "learning_rate": 0.01, "loss": 1.9783, "step": 67074 }, { "epoch": 6.888170055452865, "grad_norm": 0.06794067472219467, "learning_rate": 0.01, "loss": 1.9676, "step": 67077 }, { "epoch": 6.888478126925447, "grad_norm": 0.06897623836994171, "learning_rate": 0.01, "loss": 1.9688, "step": 67080 }, { "epoch": 6.888786198398028, "grad_norm": 0.07438699901103973, "learning_rate": 0.01, "loss": 1.9884, "step": 67083 }, { "epoch": 6.88909426987061, "grad_norm": 0.057675886899232864, "learning_rate": 0.01, "loss": 1.978, "step": 67086 }, { "epoch": 6.8894023413431915, "grad_norm": 0.0770813375711441, "learning_rate": 0.01, "loss": 1.9841, "step": 67089 }, { "epoch": 6.889710412815774, "grad_norm": 0.07523828744888306, "learning_rate": 0.01, "loss": 1.9515, "step": 67092 }, { "epoch": 6.890018484288355, "grad_norm": 0.11752041429281235, "learning_rate": 0.01, "loss": 1.9666, "step": 67095 }, { "epoch": 6.890326555760937, "grad_norm": 0.07361220568418503, "learning_rate": 0.01, "loss": 1.9475, "step": 67098 }, { "epoch": 6.890634627233518, "grad_norm": 0.1421179324388504, "learning_rate": 0.01, "loss": 1.96, "step": 67101 }, { "epoch": 6.8909426987061, "grad_norm": 0.10113172978162766, "learning_rate": 0.01, "loss": 1.9745, "step": 67104 }, { "epoch": 6.891250770178681, "grad_norm": 0.07775873690843582, "learning_rate": 0.01, "loss": 2.0027, "step": 67107 }, { "epoch": 6.891558841651263, "grad_norm": 0.06705240905284882, "learning_rate": 0.01, "loss": 1.9841, "step": 67110 }, { "epoch": 6.891866913123844, "grad_norm": 0.09598524123430252, "learning_rate": 0.01, "loss": 1.9638, "step": 67113 }, { "epoch": 6.892174984596426, "grad_norm": 0.04524253308773041, "learning_rate": 0.01, "loss": 1.9562, "step": 67116 }, { "epoch": 6.892483056069008, "grad_norm": 0.041274599730968475, "learning_rate": 0.01, "loss": 1.9733, "step": 67119 }, { "epoch": 6.89279112754159, "grad_norm": 0.09016481041908264, "learning_rate": 0.01, "loss": 1.9749, "step": 67122 }, { "epoch": 6.893099199014172, "grad_norm": 0.06743042171001434, "learning_rate": 0.01, "loss": 1.9766, "step": 67125 }, { "epoch": 6.893407270486753, "grad_norm": 0.08920291066169739, "learning_rate": 0.01, "loss": 2.0093, "step": 67128 }, { "epoch": 6.893715341959335, "grad_norm": 0.08669997751712799, "learning_rate": 0.01, "loss": 1.9628, "step": 67131 }, { "epoch": 6.894023413431916, "grad_norm": 0.03727005049586296, "learning_rate": 0.01, "loss": 1.9641, "step": 67134 }, { "epoch": 6.894331484904498, "grad_norm": 0.047707851976156235, "learning_rate": 0.01, "loss": 1.9626, "step": 67137 }, { "epoch": 6.894639556377079, "grad_norm": 0.03828176483511925, "learning_rate": 0.01, "loss": 1.9597, "step": 67140 }, { "epoch": 6.894947627849661, "grad_norm": 0.04043647646903992, "learning_rate": 0.01, "loss": 1.9949, "step": 67143 }, { "epoch": 6.8952556993222425, "grad_norm": 0.03885728865861893, "learning_rate": 0.01, "loss": 1.9679, "step": 67146 }, { "epoch": 6.8955637707948245, "grad_norm": 0.041276995092630386, "learning_rate": 0.01, "loss": 1.9662, "step": 67149 }, { "epoch": 6.895871842267406, "grad_norm": 0.04943333566188812, "learning_rate": 0.01, "loss": 1.9549, "step": 67152 }, { "epoch": 6.896179913739988, "grad_norm": 0.04476524516940117, "learning_rate": 0.01, "loss": 1.9485, "step": 67155 }, { "epoch": 6.89648798521257, "grad_norm": 0.10550431907176971, "learning_rate": 0.01, "loss": 1.9662, "step": 67158 }, { "epoch": 6.896796056685151, "grad_norm": 0.09191994369029999, "learning_rate": 0.01, "loss": 1.9677, "step": 67161 }, { "epoch": 6.897104128157732, "grad_norm": 0.05506186559796333, "learning_rate": 0.01, "loss": 1.9712, "step": 67164 }, { "epoch": 6.897412199630314, "grad_norm": 0.06780190020799637, "learning_rate": 0.01, "loss": 1.9831, "step": 67167 }, { "epoch": 6.897720271102896, "grad_norm": 0.04398869350552559, "learning_rate": 0.01, "loss": 1.9709, "step": 67170 }, { "epoch": 6.898028342575477, "grad_norm": 0.05016426742076874, "learning_rate": 0.01, "loss": 1.9821, "step": 67173 }, { "epoch": 6.898336414048059, "grad_norm": 0.03760296478867531, "learning_rate": 0.01, "loss": 1.9833, "step": 67176 }, { "epoch": 6.898644485520641, "grad_norm": 0.06771603971719742, "learning_rate": 0.01, "loss": 1.9924, "step": 67179 }, { "epoch": 6.898952556993223, "grad_norm": 0.07435780018568039, "learning_rate": 0.01, "loss": 1.9733, "step": 67182 }, { "epoch": 6.899260628465804, "grad_norm": 0.05153966695070267, "learning_rate": 0.01, "loss": 1.9968, "step": 67185 }, { "epoch": 6.899568699938386, "grad_norm": 0.06906291097402573, "learning_rate": 0.01, "loss": 1.9666, "step": 67188 }, { "epoch": 6.899876771410967, "grad_norm": 0.08245383203029633, "learning_rate": 0.01, "loss": 1.9731, "step": 67191 }, { "epoch": 6.900184842883549, "grad_norm": 0.06429754942655563, "learning_rate": 0.01, "loss": 1.9942, "step": 67194 }, { "epoch": 6.90049291435613, "grad_norm": 0.03510384261608124, "learning_rate": 0.01, "loss": 1.9801, "step": 67197 }, { "epoch": 6.900800985828712, "grad_norm": 0.063795305788517, "learning_rate": 0.01, "loss": 1.976, "step": 67200 }, { "epoch": 6.901109057301294, "grad_norm": 0.07294676452875137, "learning_rate": 0.01, "loss": 1.9611, "step": 67203 }, { "epoch": 6.9014171287738755, "grad_norm": 0.04569892957806587, "learning_rate": 0.01, "loss": 1.9775, "step": 67206 }, { "epoch": 6.9017252002464575, "grad_norm": 0.0556999109685421, "learning_rate": 0.01, "loss": 1.9949, "step": 67209 }, { "epoch": 6.902033271719039, "grad_norm": 0.038515105843544006, "learning_rate": 0.01, "loss": 2.0006, "step": 67212 }, { "epoch": 6.902341343191621, "grad_norm": 0.08272943645715714, "learning_rate": 0.01, "loss": 1.9549, "step": 67215 }, { "epoch": 6.902649414664202, "grad_norm": 0.08796203136444092, "learning_rate": 0.01, "loss": 1.9787, "step": 67218 }, { "epoch": 6.902957486136784, "grad_norm": 0.0925317257642746, "learning_rate": 0.01, "loss": 1.9775, "step": 67221 }, { "epoch": 6.903265557609365, "grad_norm": 0.04561033099889755, "learning_rate": 0.01, "loss": 1.9481, "step": 67224 }, { "epoch": 6.903573629081947, "grad_norm": 0.10068716108798981, "learning_rate": 0.01, "loss": 1.9978, "step": 67227 }, { "epoch": 6.903881700554528, "grad_norm": 0.06739984452724457, "learning_rate": 0.01, "loss": 1.9716, "step": 67230 }, { "epoch": 6.90418977202711, "grad_norm": 0.05066872388124466, "learning_rate": 0.01, "loss": 1.9969, "step": 67233 }, { "epoch": 6.904497843499692, "grad_norm": 0.047137267887592316, "learning_rate": 0.01, "loss": 1.9838, "step": 67236 }, { "epoch": 6.904805914972274, "grad_norm": 0.10280490666627884, "learning_rate": 0.01, "loss": 1.9662, "step": 67239 }, { "epoch": 6.905113986444855, "grad_norm": 0.08560338616371155, "learning_rate": 0.01, "loss": 1.9644, "step": 67242 }, { "epoch": 6.905422057917437, "grad_norm": 0.13022232055664062, "learning_rate": 0.01, "loss": 1.9786, "step": 67245 }, { "epoch": 6.905730129390019, "grad_norm": 0.04727041721343994, "learning_rate": 0.01, "loss": 1.9905, "step": 67248 }, { "epoch": 6.9060382008626, "grad_norm": 0.04549260064959526, "learning_rate": 0.01, "loss": 1.9749, "step": 67251 }, { "epoch": 6.906346272335182, "grad_norm": 0.14286646246910095, "learning_rate": 0.01, "loss": 1.9913, "step": 67254 }, { "epoch": 6.906654343807763, "grad_norm": 0.04126777872443199, "learning_rate": 0.01, "loss": 1.9516, "step": 67257 }, { "epoch": 6.906962415280345, "grad_norm": 0.04667791351675987, "learning_rate": 0.01, "loss": 1.9566, "step": 67260 }, { "epoch": 6.907270486752926, "grad_norm": 0.11388275027275085, "learning_rate": 0.01, "loss": 1.9958, "step": 67263 }, { "epoch": 6.9075785582255085, "grad_norm": 0.06364165991544724, "learning_rate": 0.01, "loss": 1.9731, "step": 67266 }, { "epoch": 6.90788662969809, "grad_norm": 0.09224580973386765, "learning_rate": 0.01, "loss": 1.9657, "step": 67269 }, { "epoch": 6.908194701170672, "grad_norm": 0.07214430719614029, "learning_rate": 0.01, "loss": 1.9633, "step": 67272 }, { "epoch": 6.908502772643253, "grad_norm": 0.04083709791302681, "learning_rate": 0.01, "loss": 1.9735, "step": 67275 }, { "epoch": 6.908810844115835, "grad_norm": 0.034071508795022964, "learning_rate": 0.01, "loss": 1.9854, "step": 67278 }, { "epoch": 6.909118915588417, "grad_norm": 0.06297708302736282, "learning_rate": 0.01, "loss": 1.9562, "step": 67281 }, { "epoch": 6.909426987060998, "grad_norm": 0.0424063615500927, "learning_rate": 0.01, "loss": 1.9717, "step": 67284 }, { "epoch": 6.90973505853358, "grad_norm": 0.04424364119768143, "learning_rate": 0.01, "loss": 1.9686, "step": 67287 }, { "epoch": 6.910043130006161, "grad_norm": 0.03503073751926422, "learning_rate": 0.01, "loss": 1.9708, "step": 67290 }, { "epoch": 6.910351201478743, "grad_norm": 0.04922670125961304, "learning_rate": 0.01, "loss": 1.9692, "step": 67293 }, { "epoch": 6.9106592729513245, "grad_norm": 0.0473727248609066, "learning_rate": 0.01, "loss": 1.9652, "step": 67296 }, { "epoch": 6.910967344423907, "grad_norm": 0.09901537001132965, "learning_rate": 0.01, "loss": 1.9813, "step": 67299 }, { "epoch": 6.911275415896488, "grad_norm": 0.08178123086690903, "learning_rate": 0.01, "loss": 1.9955, "step": 67302 }, { "epoch": 6.91158348736907, "grad_norm": 0.06637603044509888, "learning_rate": 0.01, "loss": 1.978, "step": 67305 }, { "epoch": 6.911891558841651, "grad_norm": 0.043472740799188614, "learning_rate": 0.01, "loss": 1.9641, "step": 67308 }, { "epoch": 6.912199630314233, "grad_norm": 0.03270704671740532, "learning_rate": 0.01, "loss": 1.9642, "step": 67311 }, { "epoch": 6.912507701786814, "grad_norm": 0.08558196574449539, "learning_rate": 0.01, "loss": 1.9978, "step": 67314 }, { "epoch": 6.912815773259396, "grad_norm": 0.09473107755184174, "learning_rate": 0.01, "loss": 1.9886, "step": 67317 }, { "epoch": 6.913123844731977, "grad_norm": 0.041321370750665665, "learning_rate": 0.01, "loss": 1.9751, "step": 67320 }, { "epoch": 6.913431916204559, "grad_norm": 0.10107487440109253, "learning_rate": 0.01, "loss": 1.977, "step": 67323 }, { "epoch": 6.9137399876771415, "grad_norm": 0.13234496116638184, "learning_rate": 0.01, "loss": 1.9821, "step": 67326 }, { "epoch": 6.914048059149723, "grad_norm": 0.05052580311894417, "learning_rate": 0.01, "loss": 1.9625, "step": 67329 }, { "epoch": 6.914356130622305, "grad_norm": 0.03741452470421791, "learning_rate": 0.01, "loss": 1.9709, "step": 67332 }, { "epoch": 6.914664202094886, "grad_norm": 0.03609738498926163, "learning_rate": 0.01, "loss": 1.9614, "step": 67335 }, { "epoch": 6.914972273567468, "grad_norm": 0.045461494475603104, "learning_rate": 0.01, "loss": 1.9906, "step": 67338 }, { "epoch": 6.915280345040049, "grad_norm": 0.07647386193275452, "learning_rate": 0.01, "loss": 1.9496, "step": 67341 }, { "epoch": 6.915588416512631, "grad_norm": 0.0546477772295475, "learning_rate": 0.01, "loss": 1.9828, "step": 67344 }, { "epoch": 6.915896487985212, "grad_norm": 0.05500864237546921, "learning_rate": 0.01, "loss": 1.9853, "step": 67347 }, { "epoch": 6.916204559457794, "grad_norm": 0.06956373900175095, "learning_rate": 0.01, "loss": 1.9697, "step": 67350 }, { "epoch": 6.9165126309303755, "grad_norm": 0.13182616233825684, "learning_rate": 0.01, "loss": 1.9938, "step": 67353 }, { "epoch": 6.9168207024029575, "grad_norm": 0.06322868913412094, "learning_rate": 0.01, "loss": 1.9599, "step": 67356 }, { "epoch": 6.91712877387554, "grad_norm": 0.053960807621479034, "learning_rate": 0.01, "loss": 1.9484, "step": 67359 }, { "epoch": 6.917436845348121, "grad_norm": 0.03774489462375641, "learning_rate": 0.01, "loss": 1.9874, "step": 67362 }, { "epoch": 6.917744916820702, "grad_norm": 0.04947468638420105, "learning_rate": 0.01, "loss": 1.9725, "step": 67365 }, { "epoch": 6.918052988293284, "grad_norm": 0.09014926850795746, "learning_rate": 0.01, "loss": 1.9956, "step": 67368 }, { "epoch": 6.918361059765866, "grad_norm": 0.047003794461488724, "learning_rate": 0.01, "loss": 2.0002, "step": 67371 }, { "epoch": 6.918669131238447, "grad_norm": 0.04293885454535484, "learning_rate": 0.01, "loss": 2.0253, "step": 67374 }, { "epoch": 6.918977202711029, "grad_norm": 0.08349283039569855, "learning_rate": 0.01, "loss": 1.9856, "step": 67377 }, { "epoch": 6.91928527418361, "grad_norm": 0.10643094033002853, "learning_rate": 0.01, "loss": 2.0004, "step": 67380 }, { "epoch": 6.919593345656192, "grad_norm": 0.05152636021375656, "learning_rate": 0.01, "loss": 1.9818, "step": 67383 }, { "epoch": 6.919901417128774, "grad_norm": 0.045483458787202835, "learning_rate": 0.01, "loss": 1.9808, "step": 67386 }, { "epoch": 6.920209488601356, "grad_norm": 0.10970417410135269, "learning_rate": 0.01, "loss": 1.9907, "step": 67389 }, { "epoch": 6.920517560073937, "grad_norm": 0.0620252899825573, "learning_rate": 0.01, "loss": 1.9623, "step": 67392 }, { "epoch": 6.920825631546519, "grad_norm": 0.042169239372015, "learning_rate": 0.01, "loss": 1.9682, "step": 67395 }, { "epoch": 6.9211337030191, "grad_norm": 0.07429879903793335, "learning_rate": 0.01, "loss": 1.9528, "step": 67398 }, { "epoch": 6.921441774491682, "grad_norm": 0.16096588969230652, "learning_rate": 0.01, "loss": 1.9932, "step": 67401 }, { "epoch": 6.921749845964264, "grad_norm": 0.06262468546628952, "learning_rate": 0.01, "loss": 1.9846, "step": 67404 }, { "epoch": 6.922057917436845, "grad_norm": 0.08196594566106796, "learning_rate": 0.01, "loss": 1.9497, "step": 67407 }, { "epoch": 6.922365988909427, "grad_norm": 0.07014669477939606, "learning_rate": 0.01, "loss": 1.9577, "step": 67410 }, { "epoch": 6.9226740603820085, "grad_norm": 0.048560310155153275, "learning_rate": 0.01, "loss": 1.9628, "step": 67413 }, { "epoch": 6.9229821318545905, "grad_norm": 0.053055111318826675, "learning_rate": 0.01, "loss": 1.9825, "step": 67416 }, { "epoch": 6.923290203327172, "grad_norm": 0.0406053327023983, "learning_rate": 0.01, "loss": 1.9764, "step": 67419 }, { "epoch": 6.923598274799754, "grad_norm": 0.05388790741562843, "learning_rate": 0.01, "loss": 1.9868, "step": 67422 }, { "epoch": 6.923906346272335, "grad_norm": 0.03949485719203949, "learning_rate": 0.01, "loss": 1.9719, "step": 67425 }, { "epoch": 6.924214417744917, "grad_norm": 0.04466044530272484, "learning_rate": 0.01, "loss": 1.9614, "step": 67428 }, { "epoch": 6.924522489217498, "grad_norm": 0.08420266211032867, "learning_rate": 0.01, "loss": 1.9817, "step": 67431 }, { "epoch": 6.92483056069008, "grad_norm": 0.06232386827468872, "learning_rate": 0.01, "loss": 1.9781, "step": 67434 }, { "epoch": 6.925138632162662, "grad_norm": 0.10322209447622299, "learning_rate": 0.01, "loss": 1.9775, "step": 67437 }, { "epoch": 6.925446703635243, "grad_norm": 0.04155166074633598, "learning_rate": 0.01, "loss": 1.9779, "step": 67440 }, { "epoch": 6.925754775107825, "grad_norm": 0.08774439245462418, "learning_rate": 0.01, "loss": 1.9807, "step": 67443 }, { "epoch": 6.926062846580407, "grad_norm": 0.08442433178424835, "learning_rate": 0.01, "loss": 1.9903, "step": 67446 }, { "epoch": 6.926370918052989, "grad_norm": 0.0743742361664772, "learning_rate": 0.01, "loss": 1.9641, "step": 67449 }, { "epoch": 6.92667898952557, "grad_norm": 0.09326713532209396, "learning_rate": 0.01, "loss": 1.9493, "step": 67452 }, { "epoch": 6.926987060998152, "grad_norm": 0.05275671184062958, "learning_rate": 0.01, "loss": 1.9797, "step": 67455 }, { "epoch": 6.927295132470733, "grad_norm": 0.0862322598695755, "learning_rate": 0.01, "loss": 1.9653, "step": 67458 }, { "epoch": 6.927603203943315, "grad_norm": 0.06256649643182755, "learning_rate": 0.01, "loss": 1.9526, "step": 67461 }, { "epoch": 6.927911275415896, "grad_norm": 0.08660640567541122, "learning_rate": 0.01, "loss": 1.9502, "step": 67464 }, { "epoch": 6.928219346888478, "grad_norm": 0.05134911462664604, "learning_rate": 0.01, "loss": 1.9894, "step": 67467 }, { "epoch": 6.9285274183610595, "grad_norm": 0.09808807820081711, "learning_rate": 0.01, "loss": 1.9675, "step": 67470 }, { "epoch": 6.9288354898336415, "grad_norm": 0.04677712544798851, "learning_rate": 0.01, "loss": 1.98, "step": 67473 }, { "epoch": 6.929143561306223, "grad_norm": 0.040743302553892136, "learning_rate": 0.01, "loss": 1.9689, "step": 67476 }, { "epoch": 6.929451632778805, "grad_norm": 0.04844764620065689, "learning_rate": 0.01, "loss": 2.0035, "step": 67479 }, { "epoch": 6.929759704251387, "grad_norm": 0.042815107852220535, "learning_rate": 0.01, "loss": 1.9861, "step": 67482 }, { "epoch": 6.930067775723968, "grad_norm": 0.040781863033771515, "learning_rate": 0.01, "loss": 1.9828, "step": 67485 }, { "epoch": 6.93037584719655, "grad_norm": 0.07171075791120529, "learning_rate": 0.01, "loss": 1.9699, "step": 67488 }, { "epoch": 6.930683918669131, "grad_norm": 0.03687571734189987, "learning_rate": 0.01, "loss": 1.9648, "step": 67491 }, { "epoch": 6.930991990141713, "grad_norm": 0.04326467588543892, "learning_rate": 0.01, "loss": 1.9819, "step": 67494 }, { "epoch": 6.931300061614294, "grad_norm": 0.24775917828083038, "learning_rate": 0.01, "loss": 1.9658, "step": 67497 }, { "epoch": 6.931608133086876, "grad_norm": 0.09830349683761597, "learning_rate": 0.01, "loss": 1.9743, "step": 67500 }, { "epoch": 6.931916204559458, "grad_norm": 0.05593564361333847, "learning_rate": 0.01, "loss": 1.9755, "step": 67503 }, { "epoch": 6.93222427603204, "grad_norm": 0.05278097838163376, "learning_rate": 0.01, "loss": 1.9906, "step": 67506 }, { "epoch": 6.932532347504621, "grad_norm": 0.06709492206573486, "learning_rate": 0.01, "loss": 1.9707, "step": 67509 }, { "epoch": 6.932840418977203, "grad_norm": 0.08978651463985443, "learning_rate": 0.01, "loss": 1.983, "step": 67512 }, { "epoch": 6.933148490449784, "grad_norm": 0.10322022438049316, "learning_rate": 0.01, "loss": 1.9676, "step": 67515 }, { "epoch": 6.933456561922366, "grad_norm": 0.08842363208532333, "learning_rate": 0.01, "loss": 1.9886, "step": 67518 }, { "epoch": 6.933764633394947, "grad_norm": 0.1034180223941803, "learning_rate": 0.01, "loss": 1.9953, "step": 67521 }, { "epoch": 6.934072704867529, "grad_norm": 0.05395280942320824, "learning_rate": 0.01, "loss": 1.9868, "step": 67524 }, { "epoch": 6.934380776340111, "grad_norm": 0.04033922404050827, "learning_rate": 0.01, "loss": 1.9848, "step": 67527 }, { "epoch": 6.9346888478126925, "grad_norm": 0.10645033419132233, "learning_rate": 0.01, "loss": 1.9801, "step": 67530 }, { "epoch": 6.9349969192852745, "grad_norm": 0.03000570461153984, "learning_rate": 0.01, "loss": 1.9612, "step": 67533 }, { "epoch": 6.935304990757856, "grad_norm": 0.06463263183832169, "learning_rate": 0.01, "loss": 1.977, "step": 67536 }, { "epoch": 6.935613062230438, "grad_norm": 0.0651232898235321, "learning_rate": 0.01, "loss": 1.9687, "step": 67539 }, { "epoch": 6.935921133703019, "grad_norm": 0.048303090035915375, "learning_rate": 0.01, "loss": 1.9926, "step": 67542 }, { "epoch": 6.936229205175601, "grad_norm": 0.09571239352226257, "learning_rate": 0.01, "loss": 1.9753, "step": 67545 }, { "epoch": 6.936537276648182, "grad_norm": 0.04778117686510086, "learning_rate": 0.01, "loss": 1.9806, "step": 67548 }, { "epoch": 6.936845348120764, "grad_norm": 0.16268859803676605, "learning_rate": 0.01, "loss": 1.967, "step": 67551 }, { "epoch": 6.937153419593345, "grad_norm": 0.08549009263515472, "learning_rate": 0.01, "loss": 1.9649, "step": 67554 }, { "epoch": 6.937461491065927, "grad_norm": 0.0720728412270546, "learning_rate": 0.01, "loss": 1.9589, "step": 67557 }, { "epoch": 6.937769562538509, "grad_norm": 0.04524467885494232, "learning_rate": 0.01, "loss": 1.9752, "step": 67560 }, { "epoch": 6.938077634011091, "grad_norm": 0.0630386620759964, "learning_rate": 0.01, "loss": 1.9725, "step": 67563 }, { "epoch": 6.938385705483672, "grad_norm": 0.07228325307369232, "learning_rate": 0.01, "loss": 2.0101, "step": 67566 }, { "epoch": 6.938693776956254, "grad_norm": 0.0827326700091362, "learning_rate": 0.01, "loss": 1.9615, "step": 67569 }, { "epoch": 6.939001848428836, "grad_norm": 0.0640362948179245, "learning_rate": 0.01, "loss": 1.9779, "step": 67572 }, { "epoch": 6.939309919901417, "grad_norm": 0.0734630599617958, "learning_rate": 0.01, "loss": 1.9541, "step": 67575 }, { "epoch": 6.939617991373999, "grad_norm": 0.09056416898965836, "learning_rate": 0.01, "loss": 1.9831, "step": 67578 }, { "epoch": 6.93992606284658, "grad_norm": 0.04308030381798744, "learning_rate": 0.01, "loss": 1.9791, "step": 67581 }, { "epoch": 6.940234134319162, "grad_norm": 0.038212817162275314, "learning_rate": 0.01, "loss": 1.9769, "step": 67584 }, { "epoch": 6.940542205791743, "grad_norm": 0.06669415533542633, "learning_rate": 0.01, "loss": 1.9813, "step": 67587 }, { "epoch": 6.9408502772643255, "grad_norm": 0.10573708266019821, "learning_rate": 0.01, "loss": 1.9772, "step": 67590 }, { "epoch": 6.941158348736907, "grad_norm": 0.13396821916103363, "learning_rate": 0.01, "loss": 1.9422, "step": 67593 }, { "epoch": 6.941466420209489, "grad_norm": 0.0890778973698616, "learning_rate": 0.01, "loss": 1.9934, "step": 67596 }, { "epoch": 6.94177449168207, "grad_norm": 0.06729544699192047, "learning_rate": 0.01, "loss": 1.9731, "step": 67599 }, { "epoch": 6.942082563154652, "grad_norm": 0.04166124016046524, "learning_rate": 0.01, "loss": 1.9587, "step": 67602 }, { "epoch": 6.942390634627234, "grad_norm": 0.06778619438409805, "learning_rate": 0.01, "loss": 1.9923, "step": 67605 }, { "epoch": 6.942698706099815, "grad_norm": 0.052519191056489944, "learning_rate": 0.01, "loss": 1.983, "step": 67608 }, { "epoch": 6.943006777572397, "grad_norm": 0.051862701773643494, "learning_rate": 0.01, "loss": 1.9657, "step": 67611 }, { "epoch": 6.943314849044978, "grad_norm": 0.04436833783984184, "learning_rate": 0.01, "loss": 1.975, "step": 67614 }, { "epoch": 6.94362292051756, "grad_norm": 0.08768297731876373, "learning_rate": 0.01, "loss": 1.995, "step": 67617 }, { "epoch": 6.9439309919901415, "grad_norm": 0.10748183727264404, "learning_rate": 0.01, "loss": 1.9836, "step": 67620 }, { "epoch": 6.944239063462724, "grad_norm": 0.06303614377975464, "learning_rate": 0.01, "loss": 2.0053, "step": 67623 }, { "epoch": 6.944547134935305, "grad_norm": 0.0495496429502964, "learning_rate": 0.01, "loss": 2.0075, "step": 67626 }, { "epoch": 6.944855206407887, "grad_norm": 0.07309143245220184, "learning_rate": 0.01, "loss": 1.9995, "step": 67629 }, { "epoch": 6.945163277880468, "grad_norm": 0.09533344954252243, "learning_rate": 0.01, "loss": 1.9682, "step": 67632 }, { "epoch": 6.94547134935305, "grad_norm": 0.07221339643001556, "learning_rate": 0.01, "loss": 1.9699, "step": 67635 }, { "epoch": 6.945779420825632, "grad_norm": 0.0772852674126625, "learning_rate": 0.01, "loss": 1.942, "step": 67638 }, { "epoch": 6.946087492298213, "grad_norm": 0.03566101938486099, "learning_rate": 0.01, "loss": 1.9413, "step": 67641 }, { "epoch": 6.946395563770794, "grad_norm": 0.0641225203871727, "learning_rate": 0.01, "loss": 2.0036, "step": 67644 }, { "epoch": 6.946703635243376, "grad_norm": 0.11845903843641281, "learning_rate": 0.01, "loss": 1.9814, "step": 67647 }, { "epoch": 6.9470117067159585, "grad_norm": 0.1297493278980255, "learning_rate": 0.01, "loss": 1.9722, "step": 67650 }, { "epoch": 6.94731977818854, "grad_norm": 0.0657927542924881, "learning_rate": 0.01, "loss": 1.9527, "step": 67653 }, { "epoch": 6.947627849661122, "grad_norm": 0.0618324875831604, "learning_rate": 0.01, "loss": 1.9936, "step": 67656 }, { "epoch": 6.947935921133703, "grad_norm": 0.04335208609700203, "learning_rate": 0.01, "loss": 1.9779, "step": 67659 }, { "epoch": 6.948243992606285, "grad_norm": 0.03921616077423096, "learning_rate": 0.01, "loss": 1.9908, "step": 67662 }, { "epoch": 6.948552064078866, "grad_norm": 0.05289353057742119, "learning_rate": 0.01, "loss": 1.9806, "step": 67665 }, { "epoch": 6.948860135551448, "grad_norm": 0.037309400737285614, "learning_rate": 0.01, "loss": 1.97, "step": 67668 }, { "epoch": 6.949168207024029, "grad_norm": 0.07012271881103516, "learning_rate": 0.01, "loss": 1.9718, "step": 67671 }, { "epoch": 6.949476278496611, "grad_norm": 0.08038291335105896, "learning_rate": 0.01, "loss": 1.9542, "step": 67674 }, { "epoch": 6.9497843499691925, "grad_norm": 0.08565990626811981, "learning_rate": 0.01, "loss": 1.9578, "step": 67677 }, { "epoch": 6.9500924214417745, "grad_norm": 0.13325315713882446, "learning_rate": 0.01, "loss": 1.9594, "step": 67680 }, { "epoch": 6.950400492914357, "grad_norm": 0.06681843101978302, "learning_rate": 0.01, "loss": 1.9903, "step": 67683 }, { "epoch": 6.950708564386938, "grad_norm": 0.051343731582164764, "learning_rate": 0.01, "loss": 2.0182, "step": 67686 }, { "epoch": 6.95101663585952, "grad_norm": 0.031108930706977844, "learning_rate": 0.01, "loss": 1.9567, "step": 67689 }, { "epoch": 6.951324707332101, "grad_norm": 0.06201615929603577, "learning_rate": 0.01, "loss": 1.9724, "step": 67692 }, { "epoch": 6.951632778804683, "grad_norm": 0.059839654713869095, "learning_rate": 0.01, "loss": 1.9728, "step": 67695 }, { "epoch": 6.951940850277264, "grad_norm": 0.04882393404841423, "learning_rate": 0.01, "loss": 1.9678, "step": 67698 }, { "epoch": 6.952248921749846, "grad_norm": 0.031460147351026535, "learning_rate": 0.01, "loss": 1.9395, "step": 67701 }, { "epoch": 6.952556993222427, "grad_norm": 0.09777851402759552, "learning_rate": 0.01, "loss": 2.0074, "step": 67704 }, { "epoch": 6.952865064695009, "grad_norm": 0.0367160402238369, "learning_rate": 0.01, "loss": 1.9671, "step": 67707 }, { "epoch": 6.953173136167591, "grad_norm": 0.032731834799051285, "learning_rate": 0.01, "loss": 1.9556, "step": 67710 }, { "epoch": 6.953481207640173, "grad_norm": 0.07224103063344955, "learning_rate": 0.01, "loss": 1.978, "step": 67713 }, { "epoch": 6.953789279112754, "grad_norm": 0.062362559139728546, "learning_rate": 0.01, "loss": 1.9805, "step": 67716 }, { "epoch": 6.954097350585336, "grad_norm": 0.05006725341081619, "learning_rate": 0.01, "loss": 1.9899, "step": 67719 }, { "epoch": 6.954405422057917, "grad_norm": 0.03857605531811714, "learning_rate": 0.01, "loss": 1.9778, "step": 67722 }, { "epoch": 6.954713493530499, "grad_norm": 0.10484008491039276, "learning_rate": 0.01, "loss": 1.9787, "step": 67725 }, { "epoch": 6.955021565003081, "grad_norm": 0.09993112832307816, "learning_rate": 0.01, "loss": 1.9794, "step": 67728 }, { "epoch": 6.955329636475662, "grad_norm": 0.08906539529561996, "learning_rate": 0.01, "loss": 1.966, "step": 67731 }, { "epoch": 6.955637707948244, "grad_norm": 0.04950569570064545, "learning_rate": 0.01, "loss": 1.9736, "step": 67734 }, { "epoch": 6.9559457794208255, "grad_norm": 0.04009339585900307, "learning_rate": 0.01, "loss": 1.9712, "step": 67737 }, { "epoch": 6.9562538508934075, "grad_norm": 0.03402652591466904, "learning_rate": 0.01, "loss": 1.9692, "step": 67740 }, { "epoch": 6.956561922365989, "grad_norm": 0.036231786012649536, "learning_rate": 0.01, "loss": 1.975, "step": 67743 }, { "epoch": 6.956869993838571, "grad_norm": 0.06914620101451874, "learning_rate": 0.01, "loss": 1.9675, "step": 67746 }, { "epoch": 6.957178065311152, "grad_norm": 0.12120147049427032, "learning_rate": 0.01, "loss": 1.9943, "step": 67749 }, { "epoch": 6.957486136783734, "grad_norm": 0.04155363887548447, "learning_rate": 0.01, "loss": 1.968, "step": 67752 }, { "epoch": 6.957794208256315, "grad_norm": 0.03857012465596199, "learning_rate": 0.01, "loss": 1.9986, "step": 67755 }, { "epoch": 6.958102279728897, "grad_norm": 0.05268177390098572, "learning_rate": 0.01, "loss": 1.9617, "step": 67758 }, { "epoch": 6.958410351201479, "grad_norm": 0.14224505424499512, "learning_rate": 0.01, "loss": 1.9735, "step": 67761 }, { "epoch": 6.95871842267406, "grad_norm": 0.10331138968467712, "learning_rate": 0.01, "loss": 1.9663, "step": 67764 }, { "epoch": 6.9590264941466415, "grad_norm": 0.13701403141021729, "learning_rate": 0.01, "loss": 1.9779, "step": 67767 }, { "epoch": 6.959334565619224, "grad_norm": 0.09614556282758713, "learning_rate": 0.01, "loss": 1.9599, "step": 67770 }, { "epoch": 6.959642637091806, "grad_norm": 0.07385848462581635, "learning_rate": 0.01, "loss": 1.9619, "step": 67773 }, { "epoch": 6.959950708564387, "grad_norm": 0.05891263857483864, "learning_rate": 0.01, "loss": 1.9567, "step": 67776 }, { "epoch": 6.960258780036969, "grad_norm": 0.06532695889472961, "learning_rate": 0.01, "loss": 1.9592, "step": 67779 }, { "epoch": 6.96056685150955, "grad_norm": 0.048335302621126175, "learning_rate": 0.01, "loss": 1.9872, "step": 67782 }, { "epoch": 6.960874922982132, "grad_norm": 0.0490678995847702, "learning_rate": 0.01, "loss": 1.9964, "step": 67785 }, { "epoch": 6.961182994454713, "grad_norm": 0.057322729378938675, "learning_rate": 0.01, "loss": 1.9868, "step": 67788 }, { "epoch": 6.961491065927295, "grad_norm": 0.09200582653284073, "learning_rate": 0.01, "loss": 1.9913, "step": 67791 }, { "epoch": 6.961799137399876, "grad_norm": 0.08140811324119568, "learning_rate": 0.01, "loss": 1.9687, "step": 67794 }, { "epoch": 6.9621072088724585, "grad_norm": 0.08386159688234329, "learning_rate": 0.01, "loss": 1.9775, "step": 67797 }, { "epoch": 6.96241528034504, "grad_norm": 0.09306125342845917, "learning_rate": 0.01, "loss": 1.9717, "step": 67800 }, { "epoch": 6.962723351817622, "grad_norm": 0.0702492967247963, "learning_rate": 0.01, "loss": 1.9634, "step": 67803 }, { "epoch": 6.963031423290204, "grad_norm": 0.07399389147758484, "learning_rate": 0.01, "loss": 1.9792, "step": 67806 }, { "epoch": 6.963339494762785, "grad_norm": 0.05269570276141167, "learning_rate": 0.01, "loss": 1.9827, "step": 67809 }, { "epoch": 6.963647566235367, "grad_norm": 0.0840226262807846, "learning_rate": 0.01, "loss": 1.9654, "step": 67812 }, { "epoch": 6.963955637707948, "grad_norm": 0.06911315023899078, "learning_rate": 0.01, "loss": 1.9819, "step": 67815 }, { "epoch": 6.96426370918053, "grad_norm": 0.04625274986028671, "learning_rate": 0.01, "loss": 1.9673, "step": 67818 }, { "epoch": 6.964571780653111, "grad_norm": 0.033663198351860046, "learning_rate": 0.01, "loss": 1.9811, "step": 67821 }, { "epoch": 6.964879852125693, "grad_norm": 0.035202909260988235, "learning_rate": 0.01, "loss": 1.9427, "step": 67824 }, { "epoch": 6.9651879235982745, "grad_norm": 0.04282594099640846, "learning_rate": 0.01, "loss": 1.9854, "step": 67827 }, { "epoch": 6.965495995070857, "grad_norm": 0.07154384255409241, "learning_rate": 0.01, "loss": 2.0019, "step": 67830 }, { "epoch": 6.965804066543438, "grad_norm": 0.09028030931949615, "learning_rate": 0.01, "loss": 1.9761, "step": 67833 }, { "epoch": 6.96611213801602, "grad_norm": 0.046586085110902786, "learning_rate": 0.01, "loss": 1.9904, "step": 67836 }, { "epoch": 6.966420209488601, "grad_norm": 0.03928957134485245, "learning_rate": 0.01, "loss": 1.9578, "step": 67839 }, { "epoch": 6.966728280961183, "grad_norm": 0.04739971458911896, "learning_rate": 0.01, "loss": 1.9665, "step": 67842 }, { "epoch": 6.967036352433764, "grad_norm": 0.06463371962308884, "learning_rate": 0.01, "loss": 1.978, "step": 67845 }, { "epoch": 6.967344423906346, "grad_norm": 0.03283145651221275, "learning_rate": 0.01, "loss": 1.9722, "step": 67848 }, { "epoch": 6.967652495378928, "grad_norm": 0.15586607158184052, "learning_rate": 0.01, "loss": 1.9614, "step": 67851 }, { "epoch": 6.967960566851509, "grad_norm": 0.08082199841737747, "learning_rate": 0.01, "loss": 1.9538, "step": 67854 }, { "epoch": 6.9682686383240915, "grad_norm": 0.06927336007356644, "learning_rate": 0.01, "loss": 1.9578, "step": 67857 }, { "epoch": 6.968576709796673, "grad_norm": 0.03947534039616585, "learning_rate": 0.01, "loss": 1.9882, "step": 67860 }, { "epoch": 6.968884781269255, "grad_norm": 0.059715401381254196, "learning_rate": 0.01, "loss": 1.9768, "step": 67863 }, { "epoch": 6.969192852741836, "grad_norm": 0.056624263525009155, "learning_rate": 0.01, "loss": 1.9577, "step": 67866 }, { "epoch": 6.969500924214418, "grad_norm": 0.0887855812907219, "learning_rate": 0.01, "loss": 1.9986, "step": 67869 }, { "epoch": 6.969808995686999, "grad_norm": 0.08432299643754959, "learning_rate": 0.01, "loss": 1.9656, "step": 67872 }, { "epoch": 6.970117067159581, "grad_norm": 0.0985739529132843, "learning_rate": 0.01, "loss": 1.984, "step": 67875 }, { "epoch": 6.970425138632162, "grad_norm": 0.0615275539457798, "learning_rate": 0.01, "loss": 2.0046, "step": 67878 }, { "epoch": 6.970733210104744, "grad_norm": 0.04872935637831688, "learning_rate": 0.01, "loss": 1.9751, "step": 67881 }, { "epoch": 6.971041281577326, "grad_norm": 0.04057471454143524, "learning_rate": 0.01, "loss": 1.9835, "step": 67884 }, { "epoch": 6.9713493530499075, "grad_norm": 0.09079626202583313, "learning_rate": 0.01, "loss": 1.9851, "step": 67887 }, { "epoch": 6.97165742452249, "grad_norm": 0.12390229851007462, "learning_rate": 0.01, "loss": 2.0097, "step": 67890 }, { "epoch": 6.971965495995071, "grad_norm": 0.08013787120580673, "learning_rate": 0.01, "loss": 1.9916, "step": 67893 }, { "epoch": 6.972273567467653, "grad_norm": 0.10543009638786316, "learning_rate": 0.01, "loss": 1.9595, "step": 67896 }, { "epoch": 6.972581638940234, "grad_norm": 0.06962577998638153, "learning_rate": 0.01, "loss": 1.9368, "step": 67899 }, { "epoch": 6.972889710412816, "grad_norm": 0.05179775133728981, "learning_rate": 0.01, "loss": 1.9784, "step": 67902 }, { "epoch": 6.973197781885397, "grad_norm": 0.04999608173966408, "learning_rate": 0.01, "loss": 1.9589, "step": 67905 }, { "epoch": 6.973505853357979, "grad_norm": 0.037668853998184204, "learning_rate": 0.01, "loss": 1.974, "step": 67908 }, { "epoch": 6.97381392483056, "grad_norm": 0.04385687783360481, "learning_rate": 0.01, "loss": 1.9713, "step": 67911 }, { "epoch": 6.9741219963031424, "grad_norm": 0.03812088444828987, "learning_rate": 0.01, "loss": 1.9791, "step": 67914 }, { "epoch": 6.974430067775724, "grad_norm": 0.08592873066663742, "learning_rate": 0.01, "loss": 1.9476, "step": 67917 }, { "epoch": 6.974738139248306, "grad_norm": 0.07441151887178421, "learning_rate": 0.01, "loss": 1.9734, "step": 67920 }, { "epoch": 6.975046210720887, "grad_norm": 0.055950410664081573, "learning_rate": 0.01, "loss": 1.965, "step": 67923 }, { "epoch": 6.975354282193469, "grad_norm": 0.07224567234516144, "learning_rate": 0.01, "loss": 1.9649, "step": 67926 }, { "epoch": 6.975662353666051, "grad_norm": 0.17487280070781708, "learning_rate": 0.01, "loss": 1.9706, "step": 67929 }, { "epoch": 6.975970425138632, "grad_norm": 0.040765196084976196, "learning_rate": 0.01, "loss": 1.9787, "step": 67932 }, { "epoch": 6.976278496611214, "grad_norm": 0.05561240762472153, "learning_rate": 0.01, "loss": 1.9699, "step": 67935 }, { "epoch": 6.976586568083795, "grad_norm": 0.07443460822105408, "learning_rate": 0.01, "loss": 1.9553, "step": 67938 }, { "epoch": 6.976894639556377, "grad_norm": 0.0669572651386261, "learning_rate": 0.01, "loss": 1.9887, "step": 67941 }, { "epoch": 6.9772027110289585, "grad_norm": 0.05161336436867714, "learning_rate": 0.01, "loss": 1.9854, "step": 67944 }, { "epoch": 6.9775107825015406, "grad_norm": 0.0417034886777401, "learning_rate": 0.01, "loss": 1.9739, "step": 67947 }, { "epoch": 6.977818853974122, "grad_norm": 0.06474210321903229, "learning_rate": 0.01, "loss": 1.9796, "step": 67950 }, { "epoch": 6.978126925446704, "grad_norm": 0.07746025174856186, "learning_rate": 0.01, "loss": 1.9753, "step": 67953 }, { "epoch": 6.978434996919285, "grad_norm": 0.045474573969841, "learning_rate": 0.01, "loss": 1.9518, "step": 67956 }, { "epoch": 6.978743068391867, "grad_norm": 0.03205155208706856, "learning_rate": 0.01, "loss": 1.9783, "step": 67959 }, { "epoch": 6.979051139864449, "grad_norm": 0.03780858963727951, "learning_rate": 0.01, "loss": 1.9759, "step": 67962 }, { "epoch": 6.97935921133703, "grad_norm": 0.04689191281795502, "learning_rate": 0.01, "loss": 1.9845, "step": 67965 }, { "epoch": 6.979667282809611, "grad_norm": 0.05307697504758835, "learning_rate": 0.01, "loss": 1.9462, "step": 67968 }, { "epoch": 6.979975354282193, "grad_norm": 0.19176854193210602, "learning_rate": 0.01, "loss": 1.9786, "step": 67971 }, { "epoch": 6.9802834257547755, "grad_norm": 0.13577581942081451, "learning_rate": 0.01, "loss": 1.9874, "step": 67974 }, { "epoch": 6.980591497227357, "grad_norm": 0.10115823894739151, "learning_rate": 0.01, "loss": 2.0068, "step": 67977 }, { "epoch": 6.980899568699939, "grad_norm": 0.08148372173309326, "learning_rate": 0.01, "loss": 1.9795, "step": 67980 }, { "epoch": 6.98120764017252, "grad_norm": 0.05191081017255783, "learning_rate": 0.01, "loss": 1.9597, "step": 67983 }, { "epoch": 6.981515711645102, "grad_norm": 0.06080775707960129, "learning_rate": 0.01, "loss": 1.978, "step": 67986 }, { "epoch": 6.981823783117683, "grad_norm": 0.07841724157333374, "learning_rate": 0.01, "loss": 2.0001, "step": 67989 }, { "epoch": 6.982131854590265, "grad_norm": 0.04652271047234535, "learning_rate": 0.01, "loss": 1.989, "step": 67992 }, { "epoch": 6.982439926062846, "grad_norm": 0.10853028297424316, "learning_rate": 0.01, "loss": 1.9663, "step": 67995 }, { "epoch": 6.982747997535428, "grad_norm": 0.03634324297308922, "learning_rate": 0.01, "loss": 1.9614, "step": 67998 }, { "epoch": 6.9830560690080095, "grad_norm": 0.13538099825382233, "learning_rate": 0.01, "loss": 1.9805, "step": 68001 }, { "epoch": 6.9833641404805915, "grad_norm": 0.06273335963487625, "learning_rate": 0.01, "loss": 1.9564, "step": 68004 }, { "epoch": 6.983672211953174, "grad_norm": 0.08126869797706604, "learning_rate": 0.01, "loss": 2.0036, "step": 68007 }, { "epoch": 6.983980283425755, "grad_norm": 0.05156498774886131, "learning_rate": 0.01, "loss": 1.9956, "step": 68010 }, { "epoch": 6.984288354898337, "grad_norm": 0.05055593326687813, "learning_rate": 0.01, "loss": 1.9717, "step": 68013 }, { "epoch": 6.984596426370918, "grad_norm": 0.04435109347105026, "learning_rate": 0.01, "loss": 1.9627, "step": 68016 }, { "epoch": 6.9849044978435, "grad_norm": 0.13580620288848877, "learning_rate": 0.01, "loss": 1.9931, "step": 68019 }, { "epoch": 6.985212569316081, "grad_norm": 0.03387708216905594, "learning_rate": 0.01, "loss": 1.9527, "step": 68022 }, { "epoch": 6.985520640788663, "grad_norm": 0.09359924495220184, "learning_rate": 0.01, "loss": 1.9677, "step": 68025 }, { "epoch": 6.985828712261244, "grad_norm": 0.09586784243583679, "learning_rate": 0.01, "loss": 1.958, "step": 68028 }, { "epoch": 6.986136783733826, "grad_norm": 0.08071450144052505, "learning_rate": 0.01, "loss": 1.9878, "step": 68031 }, { "epoch": 6.986444855206408, "grad_norm": 0.04609205946326256, "learning_rate": 0.01, "loss": 1.9778, "step": 68034 }, { "epoch": 6.98675292667899, "grad_norm": 0.05660049244761467, "learning_rate": 0.01, "loss": 1.9692, "step": 68037 }, { "epoch": 6.987060998151571, "grad_norm": 0.04224297031760216, "learning_rate": 0.01, "loss": 1.9708, "step": 68040 }, { "epoch": 6.987369069624153, "grad_norm": 0.056567490100860596, "learning_rate": 0.01, "loss": 1.9471, "step": 68043 }, { "epoch": 6.987677141096734, "grad_norm": 0.10390833020210266, "learning_rate": 0.01, "loss": 1.9526, "step": 68046 }, { "epoch": 6.987985212569316, "grad_norm": 0.04906808212399483, "learning_rate": 0.01, "loss": 1.9883, "step": 68049 }, { "epoch": 6.988293284041898, "grad_norm": 0.0763496458530426, "learning_rate": 0.01, "loss": 1.9625, "step": 68052 }, { "epoch": 6.988601355514479, "grad_norm": 0.042168740183115005, "learning_rate": 0.01, "loss": 1.971, "step": 68055 }, { "epoch": 6.988909426987061, "grad_norm": 0.044075168669223785, "learning_rate": 0.01, "loss": 1.9538, "step": 68058 }, { "epoch": 6.9892174984596425, "grad_norm": 0.06443364918231964, "learning_rate": 0.01, "loss": 1.9974, "step": 68061 }, { "epoch": 6.9895255699322245, "grad_norm": 0.06181558594107628, "learning_rate": 0.01, "loss": 1.9731, "step": 68064 }, { "epoch": 6.989833641404806, "grad_norm": 0.061378926038742065, "learning_rate": 0.01, "loss": 1.9591, "step": 68067 }, { "epoch": 6.990141712877388, "grad_norm": 0.11718502640724182, "learning_rate": 0.01, "loss": 1.9726, "step": 68070 }, { "epoch": 6.990449784349969, "grad_norm": 0.10844454169273376, "learning_rate": 0.01, "loss": 1.9943, "step": 68073 }, { "epoch": 6.990757855822551, "grad_norm": 0.04362885281443596, "learning_rate": 0.01, "loss": 1.9694, "step": 68076 }, { "epoch": 6.991065927295132, "grad_norm": 0.04131380096077919, "learning_rate": 0.01, "loss": 1.969, "step": 68079 }, { "epoch": 6.991373998767714, "grad_norm": 0.03312958776950836, "learning_rate": 0.01, "loss": 1.9731, "step": 68082 }, { "epoch": 6.991682070240296, "grad_norm": 0.05648859217762947, "learning_rate": 0.01, "loss": 1.9942, "step": 68085 }, { "epoch": 6.991990141712877, "grad_norm": 0.07927577197551727, "learning_rate": 0.01, "loss": 1.9604, "step": 68088 }, { "epoch": 6.992298213185459, "grad_norm": 0.081300288438797, "learning_rate": 0.01, "loss": 1.9668, "step": 68091 }, { "epoch": 6.992606284658041, "grad_norm": 0.08239664137363434, "learning_rate": 0.01, "loss": 1.991, "step": 68094 }, { "epoch": 6.992914356130623, "grad_norm": 0.14173032343387604, "learning_rate": 0.01, "loss": 1.9795, "step": 68097 }, { "epoch": 6.993222427603204, "grad_norm": 0.039897531270980835, "learning_rate": 0.01, "loss": 1.9652, "step": 68100 }, { "epoch": 6.993530499075786, "grad_norm": 0.08170229941606522, "learning_rate": 0.01, "loss": 1.9603, "step": 68103 }, { "epoch": 6.993838570548367, "grad_norm": 0.09108921140432358, "learning_rate": 0.01, "loss": 1.9484, "step": 68106 }, { "epoch": 6.994146642020949, "grad_norm": 0.048272643238306046, "learning_rate": 0.01, "loss": 1.9744, "step": 68109 }, { "epoch": 6.99445471349353, "grad_norm": 0.06137422099709511, "learning_rate": 0.01, "loss": 1.9776, "step": 68112 }, { "epoch": 6.994762784966112, "grad_norm": 0.060723815113306046, "learning_rate": 0.01, "loss": 1.9579, "step": 68115 }, { "epoch": 6.995070856438693, "grad_norm": 0.11094491183757782, "learning_rate": 0.01, "loss": 1.9974, "step": 68118 }, { "epoch": 6.9953789279112755, "grad_norm": 0.041866280138492584, "learning_rate": 0.01, "loss": 1.9696, "step": 68121 }, { "epoch": 6.995686999383857, "grad_norm": 0.032954033464193344, "learning_rate": 0.01, "loss": 1.9706, "step": 68124 }, { "epoch": 6.995995070856439, "grad_norm": 0.034947719424963, "learning_rate": 0.01, "loss": 1.975, "step": 68127 }, { "epoch": 6.996303142329021, "grad_norm": 0.03938392922282219, "learning_rate": 0.01, "loss": 1.994, "step": 68130 }, { "epoch": 6.996611213801602, "grad_norm": 0.1045796275138855, "learning_rate": 0.01, "loss": 1.9709, "step": 68133 }, { "epoch": 6.996919285274184, "grad_norm": 0.10855524986982346, "learning_rate": 0.01, "loss": 1.9603, "step": 68136 }, { "epoch": 6.997227356746765, "grad_norm": 0.08628056198358536, "learning_rate": 0.01, "loss": 1.9827, "step": 68139 }, { "epoch": 6.997535428219347, "grad_norm": 0.0505918487906456, "learning_rate": 0.01, "loss": 1.9752, "step": 68142 }, { "epoch": 6.997843499691928, "grad_norm": 0.04251931607723236, "learning_rate": 0.01, "loss": 1.9555, "step": 68145 }, { "epoch": 6.99815157116451, "grad_norm": 0.05464445427060127, "learning_rate": 0.01, "loss": 1.97, "step": 68148 }, { "epoch": 6.9984596426370915, "grad_norm": 0.04259735345840454, "learning_rate": 0.01, "loss": 1.9922, "step": 68151 }, { "epoch": 6.998767714109674, "grad_norm": 0.06472709774971008, "learning_rate": 0.01, "loss": 1.9549, "step": 68154 }, { "epoch": 6.999075785582255, "grad_norm": 0.0695829764008522, "learning_rate": 0.01, "loss": 1.9776, "step": 68157 }, { "epoch": 6.999383857054837, "grad_norm": 0.09822755306959152, "learning_rate": 0.01, "loss": 1.9376, "step": 68160 }, { "epoch": 6.999691928527419, "grad_norm": 0.10002230852842331, "learning_rate": 0.01, "loss": 1.9992, "step": 68163 }, { "epoch": 7.0, "grad_norm": 0.05069267004728317, "learning_rate": 0.01, "loss": 1.9642, "step": 68166 } ], "logging_steps": 3, "max_steps": 68166, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.45178225692133e+21, "train_batch_size": 18, "trial_name": null, "trial_params": null }