{ "best_metric": null, "best_model_checkpoint": null, "epoch": 99.87349397590361, "eval_steps": 500, "global_step": 10300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00963855421686747, "grad_norm": 3.041989326477051, "learning_rate": 4.999514563106796e-05, "loss": 0.3103, "step": 1 }, { "epoch": 0.01927710843373494, "grad_norm": 4.0105366706848145, "learning_rate": 4.9990291262135925e-05, "loss": 0.4418, "step": 2 }, { "epoch": 0.02891566265060241, "grad_norm": 5.507172107696533, "learning_rate": 4.998543689320388e-05, "loss": 0.4649, "step": 3 }, { "epoch": 0.03855421686746988, "grad_norm": 4.181346893310547, "learning_rate": 4.998058252427185e-05, "loss": 0.4272, "step": 4 }, { "epoch": 0.04819277108433735, "grad_norm": 2.9665005207061768, "learning_rate": 4.9975728155339805e-05, "loss": 0.373, "step": 5 }, { "epoch": 0.05783132530120482, "grad_norm": 5.162627220153809, "learning_rate": 4.997087378640777e-05, "loss": 0.6008, "step": 6 }, { "epoch": 0.06746987951807229, "grad_norm": 4.46385383605957, "learning_rate": 4.996601941747573e-05, "loss": 0.3327, "step": 7 }, { "epoch": 0.07710843373493977, "grad_norm": 4.564752578735352, "learning_rate": 4.996116504854369e-05, "loss": 0.3101, "step": 8 }, { "epoch": 0.08674698795180723, "grad_norm": 3.910353422164917, "learning_rate": 4.995631067961165e-05, "loss": 0.6689, "step": 9 }, { "epoch": 0.0963855421686747, "grad_norm": 6.103231906890869, "learning_rate": 4.9951456310679614e-05, "loss": 0.3963, "step": 10 }, { "epoch": 0.10602409638554217, "grad_norm": 4.763515472412109, "learning_rate": 4.994660194174757e-05, "loss": 0.3332, "step": 11 }, { "epoch": 0.11566265060240964, "grad_norm": 4.097593784332275, "learning_rate": 4.9941747572815536e-05, "loss": 0.3287, "step": 12 }, { "epoch": 0.12530120481927712, "grad_norm": 6.789514064788818, "learning_rate": 4.99368932038835e-05, "loss": 0.4718, "step": 13 }, { "epoch": 0.13493975903614458, "grad_norm": 3.2843971252441406, "learning_rate": 4.993203883495146e-05, "loss": 0.4499, "step": 14 }, { "epoch": 0.14457831325301204, "grad_norm": 3.794027090072632, "learning_rate": 4.992718446601942e-05, "loss": 0.3701, "step": 15 }, { "epoch": 0.15421686746987953, "grad_norm": 5.1562180519104, "learning_rate": 4.992233009708738e-05, "loss": 0.3626, "step": 16 }, { "epoch": 0.163855421686747, "grad_norm": 8.213271141052246, "learning_rate": 4.9917475728155345e-05, "loss": 0.4486, "step": 17 }, { "epoch": 0.17349397590361446, "grad_norm": 7.212838172912598, "learning_rate": 4.99126213592233e-05, "loss": 0.4439, "step": 18 }, { "epoch": 0.18313253012048192, "grad_norm": 4.129086971282959, "learning_rate": 4.990776699029127e-05, "loss": 0.3822, "step": 19 }, { "epoch": 0.1927710843373494, "grad_norm": 4.607855796813965, "learning_rate": 4.9902912621359225e-05, "loss": 0.3394, "step": 20 }, { "epoch": 0.20240963855421687, "grad_norm": 2.8675599098205566, "learning_rate": 4.989805825242719e-05, "loss": 0.2533, "step": 21 }, { "epoch": 0.21204819277108433, "grad_norm": 3.890111207962036, "learning_rate": 4.989320388349515e-05, "loss": 0.3061, "step": 22 }, { "epoch": 0.2216867469879518, "grad_norm": 4.288766860961914, "learning_rate": 4.988834951456311e-05, "loss": 0.2601, "step": 23 }, { "epoch": 0.23132530120481928, "grad_norm": 5.433402061462402, "learning_rate": 4.988349514563107e-05, "loss": 0.4958, "step": 24 }, { "epoch": 0.24096385542168675, "grad_norm": 4.627699375152588, "learning_rate": 4.9878640776699034e-05, "loss": 0.3588, "step": 25 }, { "epoch": 0.25060240963855424, "grad_norm": 3.1267292499542236, "learning_rate": 4.987378640776699e-05, "loss": 0.3505, "step": 26 }, { "epoch": 0.26024096385542167, "grad_norm": 3.7748382091522217, "learning_rate": 4.986893203883496e-05, "loss": 0.3656, "step": 27 }, { "epoch": 0.26987951807228916, "grad_norm": 5.67606258392334, "learning_rate": 4.9864077669902914e-05, "loss": 0.3824, "step": 28 }, { "epoch": 0.27951807228915665, "grad_norm": 5.525846004486084, "learning_rate": 4.985922330097087e-05, "loss": 0.5561, "step": 29 }, { "epoch": 0.2891566265060241, "grad_norm": 4.6780595779418945, "learning_rate": 4.985436893203884e-05, "loss": 0.3481, "step": 30 }, { "epoch": 0.2987951807228916, "grad_norm": 3.3279879093170166, "learning_rate": 4.9849514563106795e-05, "loss": 0.368, "step": 31 }, { "epoch": 0.30843373493975906, "grad_norm": 6.731245517730713, "learning_rate": 4.984466019417476e-05, "loss": 0.376, "step": 32 }, { "epoch": 0.3180722891566265, "grad_norm": 5.8013434410095215, "learning_rate": 4.983980582524272e-05, "loss": 0.405, "step": 33 }, { "epoch": 0.327710843373494, "grad_norm": 6.942627429962158, "learning_rate": 4.983495145631068e-05, "loss": 0.2417, "step": 34 }, { "epoch": 0.3373493975903614, "grad_norm": 4.623160362243652, "learning_rate": 4.983009708737864e-05, "loss": 0.3738, "step": 35 }, { "epoch": 0.3469879518072289, "grad_norm": 5.5930681228637695, "learning_rate": 4.9825242718446604e-05, "loss": 0.3559, "step": 36 }, { "epoch": 0.3566265060240964, "grad_norm": 3.3385379314422607, "learning_rate": 4.982038834951456e-05, "loss": 0.3076, "step": 37 }, { "epoch": 0.36626506024096384, "grad_norm": 7.084656238555908, "learning_rate": 4.981553398058253e-05, "loss": 0.5206, "step": 38 }, { "epoch": 0.3759036144578313, "grad_norm": 4.753252029418945, "learning_rate": 4.981067961165049e-05, "loss": 0.2533, "step": 39 }, { "epoch": 0.3855421686746988, "grad_norm": 5.297796726226807, "learning_rate": 4.980582524271845e-05, "loss": 0.3591, "step": 40 }, { "epoch": 0.39518072289156625, "grad_norm": 4.459458827972412, "learning_rate": 4.980097087378641e-05, "loss": 0.3897, "step": 41 }, { "epoch": 0.40481927710843374, "grad_norm": 4.648364543914795, "learning_rate": 4.979611650485437e-05, "loss": 0.3451, "step": 42 }, { "epoch": 0.41445783132530123, "grad_norm": 4.197800636291504, "learning_rate": 4.9791262135922335e-05, "loss": 0.4456, "step": 43 }, { "epoch": 0.42409638554216866, "grad_norm": 3.9615325927734375, "learning_rate": 4.978640776699029e-05, "loss": 0.3475, "step": 44 }, { "epoch": 0.43373493975903615, "grad_norm": 4.834892749786377, "learning_rate": 4.978155339805826e-05, "loss": 0.209, "step": 45 }, { "epoch": 0.4433734939759036, "grad_norm": 10.037736892700195, "learning_rate": 4.9776699029126215e-05, "loss": 0.3974, "step": 46 }, { "epoch": 0.4530120481927711, "grad_norm": 3.257591962814331, "learning_rate": 4.977184466019418e-05, "loss": 0.3491, "step": 47 }, { "epoch": 0.46265060240963857, "grad_norm": 3.897066593170166, "learning_rate": 4.976699029126214e-05, "loss": 0.4563, "step": 48 }, { "epoch": 0.472289156626506, "grad_norm": 6.287714004516602, "learning_rate": 4.97621359223301e-05, "loss": 0.2903, "step": 49 }, { "epoch": 0.4819277108433735, "grad_norm": 5.9175872802734375, "learning_rate": 4.975728155339806e-05, "loss": 0.4342, "step": 50 }, { "epoch": 0.491566265060241, "grad_norm": 5.247127532958984, "learning_rate": 4.9752427184466024e-05, "loss": 0.3251, "step": 51 }, { "epoch": 0.5012048192771085, "grad_norm": 4.772215366363525, "learning_rate": 4.974757281553398e-05, "loss": 0.3903, "step": 52 }, { "epoch": 0.5108433734939759, "grad_norm": 2.509791612625122, "learning_rate": 4.9742718446601947e-05, "loss": 0.1988, "step": 53 }, { "epoch": 0.5204819277108433, "grad_norm": 6.531315326690674, "learning_rate": 4.9737864077669904e-05, "loss": 0.4796, "step": 54 }, { "epoch": 0.5301204819277109, "grad_norm": 3.2526941299438477, "learning_rate": 4.973300970873787e-05, "loss": 0.3017, "step": 55 }, { "epoch": 0.5397590361445783, "grad_norm": 2.8929591178894043, "learning_rate": 4.9728155339805827e-05, "loss": 0.2134, "step": 56 }, { "epoch": 0.5493975903614458, "grad_norm": 8.145646095275879, "learning_rate": 4.9723300970873784e-05, "loss": 0.3157, "step": 57 }, { "epoch": 0.5590361445783133, "grad_norm": 4.986313819885254, "learning_rate": 4.971844660194175e-05, "loss": 0.253, "step": 58 }, { "epoch": 0.5686746987951807, "grad_norm": 6.5913310050964355, "learning_rate": 4.9713592233009707e-05, "loss": 0.4097, "step": 59 }, { "epoch": 0.5783132530120482, "grad_norm": 4.570411205291748, "learning_rate": 4.970873786407767e-05, "loss": 0.2776, "step": 60 }, { "epoch": 0.5879518072289157, "grad_norm": 4.85597562789917, "learning_rate": 4.970388349514563e-05, "loss": 0.2812, "step": 61 }, { "epoch": 0.5975903614457831, "grad_norm": 8.386072158813477, "learning_rate": 4.9699029126213593e-05, "loss": 0.5213, "step": 62 }, { "epoch": 0.6072289156626506, "grad_norm": 4.076353549957275, "learning_rate": 4.969417475728155e-05, "loss": 0.2822, "step": 63 }, { "epoch": 0.6168674698795181, "grad_norm": 2.5952796936035156, "learning_rate": 4.968932038834952e-05, "loss": 0.1891, "step": 64 }, { "epoch": 0.6265060240963856, "grad_norm": 3.5527873039245605, "learning_rate": 4.968446601941748e-05, "loss": 0.3143, "step": 65 }, { "epoch": 0.636144578313253, "grad_norm": 3.0175695419311523, "learning_rate": 4.9679611650485445e-05, "loss": 0.2783, "step": 66 }, { "epoch": 0.6457831325301204, "grad_norm": 10.41678524017334, "learning_rate": 4.96747572815534e-05, "loss": 0.1709, "step": 67 }, { "epoch": 0.655421686746988, "grad_norm": 3.185204267501831, "learning_rate": 4.966990291262136e-05, "loss": 0.2855, "step": 68 }, { "epoch": 0.6650602409638554, "grad_norm": 4.778441429138184, "learning_rate": 4.9665048543689325e-05, "loss": 0.4171, "step": 69 }, { "epoch": 0.6746987951807228, "grad_norm": 3.6435158252716064, "learning_rate": 4.966019417475728e-05, "loss": 0.4036, "step": 70 }, { "epoch": 0.6843373493975904, "grad_norm": 6.590524673461914, "learning_rate": 4.965533980582525e-05, "loss": 0.2405, "step": 71 }, { "epoch": 0.6939759036144578, "grad_norm": 3.496969699859619, "learning_rate": 4.9650485436893205e-05, "loss": 0.2323, "step": 72 }, { "epoch": 0.7036144578313253, "grad_norm": 4.630599021911621, "learning_rate": 4.964563106796117e-05, "loss": 0.4026, "step": 73 }, { "epoch": 0.7132530120481928, "grad_norm": 2.70885968208313, "learning_rate": 4.964077669902913e-05, "loss": 0.0922, "step": 74 }, { "epoch": 0.7228915662650602, "grad_norm": 4.13625955581665, "learning_rate": 4.963592233009709e-05, "loss": 0.5761, "step": 75 }, { "epoch": 0.7325301204819277, "grad_norm": 6.3974127769470215, "learning_rate": 4.963106796116505e-05, "loss": 0.3358, "step": 76 }, { "epoch": 0.7421686746987952, "grad_norm": 5.156192302703857, "learning_rate": 4.9626213592233014e-05, "loss": 0.3513, "step": 77 }, { "epoch": 0.7518072289156627, "grad_norm": 4.392216205596924, "learning_rate": 4.962135922330097e-05, "loss": 0.4988, "step": 78 }, { "epoch": 0.7614457831325301, "grad_norm": 4.40324068069458, "learning_rate": 4.9616504854368936e-05, "loss": 0.3051, "step": 79 }, { "epoch": 0.7710843373493976, "grad_norm": 4.078240871429443, "learning_rate": 4.9611650485436894e-05, "loss": 0.2126, "step": 80 }, { "epoch": 0.7807228915662651, "grad_norm": 5.342082500457764, "learning_rate": 4.960679611650486e-05, "loss": 0.348, "step": 81 }, { "epoch": 0.7903614457831325, "grad_norm": 6.321410655975342, "learning_rate": 4.9601941747572816e-05, "loss": 0.1834, "step": 82 }, { "epoch": 0.8, "grad_norm": 2.0763497352600098, "learning_rate": 4.9597087378640774e-05, "loss": 0.1028, "step": 83 }, { "epoch": 0.8096385542168675, "grad_norm": 4.324068069458008, "learning_rate": 4.959223300970874e-05, "loss": 0.3079, "step": 84 }, { "epoch": 0.8192771084337349, "grad_norm": 3.0537025928497314, "learning_rate": 4.9587378640776696e-05, "loss": 0.2299, "step": 85 }, { "epoch": 0.8289156626506025, "grad_norm": 3.318640947341919, "learning_rate": 4.958252427184466e-05, "loss": 0.2913, "step": 86 }, { "epoch": 0.8385542168674699, "grad_norm": 6.559309005737305, "learning_rate": 4.957766990291262e-05, "loss": 0.7576, "step": 87 }, { "epoch": 0.8481927710843373, "grad_norm": 4.64049768447876, "learning_rate": 4.957281553398058e-05, "loss": 0.3131, "step": 88 }, { "epoch": 0.8578313253012049, "grad_norm": 1.9431257247924805, "learning_rate": 4.956796116504855e-05, "loss": 0.0998, "step": 89 }, { "epoch": 0.8674698795180723, "grad_norm": 5.275748252868652, "learning_rate": 4.956310679611651e-05, "loss": 0.3976, "step": 90 }, { "epoch": 0.8771084337349397, "grad_norm": 5.866461753845215, "learning_rate": 4.955825242718447e-05, "loss": 0.3276, "step": 91 }, { "epoch": 0.8867469879518072, "grad_norm": 5.155888557434082, "learning_rate": 4.9553398058252435e-05, "loss": 0.4489, "step": 92 }, { "epoch": 0.8963855421686747, "grad_norm": 5.958845615386963, "learning_rate": 4.954854368932039e-05, "loss": 0.2508, "step": 93 }, { "epoch": 0.9060240963855422, "grad_norm": 6.993139266967773, "learning_rate": 4.954368932038835e-05, "loss": 0.578, "step": 94 }, { "epoch": 0.9156626506024096, "grad_norm": 3.445073127746582, "learning_rate": 4.9538834951456315e-05, "loss": 0.2314, "step": 95 }, { "epoch": 0.9253012048192771, "grad_norm": 4.6569623947143555, "learning_rate": 4.953398058252427e-05, "loss": 0.3835, "step": 96 }, { "epoch": 0.9349397590361446, "grad_norm": 6.608811855316162, "learning_rate": 4.952912621359224e-05, "loss": 0.4451, "step": 97 }, { "epoch": 0.944578313253012, "grad_norm": 7.3629841804504395, "learning_rate": 4.9524271844660195e-05, "loss": 0.48, "step": 98 }, { "epoch": 0.9542168674698795, "grad_norm": 5.241456985473633, "learning_rate": 4.951941747572816e-05, "loss": 0.2665, "step": 99 }, { "epoch": 0.963855421686747, "grad_norm": 5.332890033721924, "learning_rate": 4.951456310679612e-05, "loss": 0.1973, "step": 100 }, { "epoch": 0.9734939759036144, "grad_norm": 4.698935031890869, "learning_rate": 4.950970873786408e-05, "loss": 0.5683, "step": 101 }, { "epoch": 0.983132530120482, "grad_norm": 4.515371322631836, "learning_rate": 4.950485436893204e-05, "loss": 0.2223, "step": 102 }, { "epoch": 0.9927710843373494, "grad_norm": 6.110688209533691, "learning_rate": 4.9500000000000004e-05, "loss": 0.4055, "step": 103 }, { "epoch": 1.008433734939759, "grad_norm": 15.725619316101074, "learning_rate": 4.949514563106796e-05, "loss": 0.3719, "step": 104 }, { "epoch": 1.0180722891566265, "grad_norm": 2.8734893798828125, "learning_rate": 4.9490291262135926e-05, "loss": 0.2104, "step": 105 }, { "epoch": 1.027710843373494, "grad_norm": 3.3540632724761963, "learning_rate": 4.9485436893203884e-05, "loss": 0.3031, "step": 106 }, { "epoch": 1.0373493975903614, "grad_norm": 8.294384956359863, "learning_rate": 4.948058252427185e-05, "loss": 0.3037, "step": 107 }, { "epoch": 1.046987951807229, "grad_norm": 12.567138671875, "learning_rate": 4.9475728155339806e-05, "loss": 0.2864, "step": 108 }, { "epoch": 1.0566265060240965, "grad_norm": 22.55906867980957, "learning_rate": 4.947087378640777e-05, "loss": 0.2517, "step": 109 }, { "epoch": 1.0662650602409638, "grad_norm": 8.584589004516602, "learning_rate": 4.946601941747573e-05, "loss": 0.2844, "step": 110 }, { "epoch": 1.0759036144578313, "grad_norm": 10.08066177368164, "learning_rate": 4.9461165048543686e-05, "loss": 0.326, "step": 111 }, { "epoch": 1.0855421686746989, "grad_norm": 1.7596691846847534, "learning_rate": 4.945631067961165e-05, "loss": 0.481, "step": 112 }, { "epoch": 1.0951807228915662, "grad_norm": 5.7214155197143555, "learning_rate": 4.945145631067961e-05, "loss": 0.2502, "step": 113 }, { "epoch": 1.1048192771084338, "grad_norm": 1.5307161808013916, "learning_rate": 4.944660194174758e-05, "loss": 0.3641, "step": 114 }, { "epoch": 1.1144578313253013, "grad_norm": 2.6252524852752686, "learning_rate": 4.944174757281554e-05, "loss": 0.3454, "step": 115 }, { "epoch": 1.1240963855421686, "grad_norm": 3.5030617713928223, "learning_rate": 4.94368932038835e-05, "loss": 0.4317, "step": 116 }, { "epoch": 1.1337349397590362, "grad_norm": 11.596004486083984, "learning_rate": 4.943203883495146e-05, "loss": 0.2121, "step": 117 }, { "epoch": 1.1433734939759037, "grad_norm": 16.691661834716797, "learning_rate": 4.9427184466019424e-05, "loss": 0.2338, "step": 118 }, { "epoch": 1.153012048192771, "grad_norm": 2.9958200454711914, "learning_rate": 4.942233009708738e-05, "loss": 0.258, "step": 119 }, { "epoch": 1.1626506024096386, "grad_norm": 2.343837261199951, "learning_rate": 4.941747572815535e-05, "loss": 0.2786, "step": 120 }, { "epoch": 1.1722891566265061, "grad_norm": 8.101180076599121, "learning_rate": 4.9412621359223304e-05, "loss": 0.3732, "step": 121 }, { "epoch": 1.1819277108433734, "grad_norm": 6.390224456787109, "learning_rate": 4.940776699029126e-05, "loss": 0.3351, "step": 122 }, { "epoch": 1.191566265060241, "grad_norm": 15.108895301818848, "learning_rate": 4.940291262135923e-05, "loss": 0.2458, "step": 123 }, { "epoch": 1.2012048192771085, "grad_norm": 11.687868118286133, "learning_rate": 4.9398058252427184e-05, "loss": 0.2508, "step": 124 }, { "epoch": 1.2108433734939759, "grad_norm": 9.82097053527832, "learning_rate": 4.939320388349515e-05, "loss": 0.2214, "step": 125 }, { "epoch": 1.2204819277108434, "grad_norm": 7.318925380706787, "learning_rate": 4.938834951456311e-05, "loss": 0.3301, "step": 126 }, { "epoch": 1.230120481927711, "grad_norm": 6.230999946594238, "learning_rate": 4.938349514563107e-05, "loss": 0.256, "step": 127 }, { "epoch": 1.2397590361445783, "grad_norm": 2.0459160804748535, "learning_rate": 4.937864077669903e-05, "loss": 0.1629, "step": 128 }, { "epoch": 1.2493975903614458, "grad_norm": 3.2358787059783936, "learning_rate": 4.9373786407766994e-05, "loss": 0.2641, "step": 129 }, { "epoch": 1.2590361445783134, "grad_norm": 8.844782829284668, "learning_rate": 4.936893203883495e-05, "loss": 0.2022, "step": 130 }, { "epoch": 1.2686746987951807, "grad_norm": 4.701797962188721, "learning_rate": 4.9364077669902916e-05, "loss": 0.1458, "step": 131 }, { "epoch": 1.2783132530120482, "grad_norm": 6.029572486877441, "learning_rate": 4.9359223300970874e-05, "loss": 0.4588, "step": 132 }, { "epoch": 1.2879518072289158, "grad_norm": 5.736677646636963, "learning_rate": 4.935436893203884e-05, "loss": 0.3159, "step": 133 }, { "epoch": 1.297590361445783, "grad_norm": 9.074060440063477, "learning_rate": 4.9349514563106796e-05, "loss": 0.2607, "step": 134 }, { "epoch": 1.3072289156626506, "grad_norm": 16.03469467163086, "learning_rate": 4.934466019417476e-05, "loss": 0.4602, "step": 135 }, { "epoch": 1.3168674698795182, "grad_norm": 4.5732855796813965, "learning_rate": 4.933980582524272e-05, "loss": 0.2461, "step": 136 }, { "epoch": 1.3265060240963855, "grad_norm": 4.980825424194336, "learning_rate": 4.933495145631068e-05, "loss": 0.4133, "step": 137 }, { "epoch": 1.336144578313253, "grad_norm": 20.003965377807617, "learning_rate": 4.933009708737864e-05, "loss": 0.4925, "step": 138 }, { "epoch": 1.3457831325301206, "grad_norm": 19.45616912841797, "learning_rate": 4.9325242718446605e-05, "loss": 0.2793, "step": 139 }, { "epoch": 1.355421686746988, "grad_norm": 20.614852905273438, "learning_rate": 4.932038834951457e-05, "loss": 0.2814, "step": 140 }, { "epoch": 1.3650602409638555, "grad_norm": 2.4331717491149902, "learning_rate": 4.931553398058253e-05, "loss": 0.3424, "step": 141 }, { "epoch": 1.374698795180723, "grad_norm": 14.668139457702637, "learning_rate": 4.931067961165049e-05, "loss": 0.3485, "step": 142 }, { "epoch": 1.3843373493975903, "grad_norm": 4.745983123779297, "learning_rate": 4.930582524271845e-05, "loss": 0.2241, "step": 143 }, { "epoch": 1.393975903614458, "grad_norm": 39.34345626831055, "learning_rate": 4.9300970873786414e-05, "loss": 0.3174, "step": 144 }, { "epoch": 1.4036144578313254, "grad_norm": 4.316334247589111, "learning_rate": 4.929611650485437e-05, "loss": 0.1636, "step": 145 }, { "epoch": 1.4132530120481928, "grad_norm": 8.434325218200684, "learning_rate": 4.9291262135922336e-05, "loss": 0.3752, "step": 146 }, { "epoch": 1.4228915662650603, "grad_norm": 17.779144287109375, "learning_rate": 4.9286407766990294e-05, "loss": 0.3782, "step": 147 }, { "epoch": 1.4325301204819278, "grad_norm": 5.278225898742676, "learning_rate": 4.928155339805826e-05, "loss": 0.2831, "step": 148 }, { "epoch": 1.4421686746987952, "grad_norm": 33.951332092285156, "learning_rate": 4.9276699029126216e-05, "loss": 0.4006, "step": 149 }, { "epoch": 1.4518072289156627, "grad_norm": 3.4480018615722656, "learning_rate": 4.9271844660194174e-05, "loss": 0.1737, "step": 150 }, { "epoch": 1.46144578313253, "grad_norm": 4.670155048370361, "learning_rate": 4.926699029126214e-05, "loss": 0.1807, "step": 151 }, { "epoch": 1.4710843373493976, "grad_norm": 15.04736042022705, "learning_rate": 4.9262135922330097e-05, "loss": 0.3227, "step": 152 }, { "epoch": 1.4807228915662651, "grad_norm": 8.030580520629883, "learning_rate": 4.925728155339806e-05, "loss": 0.3489, "step": 153 }, { "epoch": 1.4903614457831325, "grad_norm": 7.050323486328125, "learning_rate": 4.925242718446602e-05, "loss": 0.2165, "step": 154 }, { "epoch": 1.5, "grad_norm": 1.9770101308822632, "learning_rate": 4.924757281553398e-05, "loss": 0.3198, "step": 155 }, { "epoch": 1.5096385542168673, "grad_norm": 6.540473461151123, "learning_rate": 4.924271844660194e-05, "loss": 0.2652, "step": 156 }, { "epoch": 1.519277108433735, "grad_norm": 3.9413344860076904, "learning_rate": 4.9237864077669906e-05, "loss": 0.3652, "step": 157 }, { "epoch": 1.5289156626506024, "grad_norm": 1.5650358200073242, "learning_rate": 4.9233009708737863e-05, "loss": 0.3745, "step": 158 }, { "epoch": 1.5385542168674697, "grad_norm": 2.1920645236968994, "learning_rate": 4.922815533980583e-05, "loss": 0.3788, "step": 159 }, { "epoch": 1.5481927710843375, "grad_norm": 8.255963325500488, "learning_rate": 4.9223300970873786e-05, "loss": 0.3126, "step": 160 }, { "epoch": 1.5578313253012048, "grad_norm": 7.690483570098877, "learning_rate": 4.921844660194175e-05, "loss": 0.2764, "step": 161 }, { "epoch": 1.5674698795180722, "grad_norm": 17.896501541137695, "learning_rate": 4.921359223300971e-05, "loss": 0.456, "step": 162 }, { "epoch": 1.57710843373494, "grad_norm": 2.46951961517334, "learning_rate": 4.920873786407767e-05, "loss": 0.3116, "step": 163 }, { "epoch": 1.5867469879518072, "grad_norm": 7.176329135894775, "learning_rate": 4.920388349514563e-05, "loss": 0.2532, "step": 164 }, { "epoch": 1.5963855421686746, "grad_norm": 7.509214878082275, "learning_rate": 4.9199029126213595e-05, "loss": 0.4828, "step": 165 }, { "epoch": 1.6060240963855423, "grad_norm": 3.6190664768218994, "learning_rate": 4.919417475728156e-05, "loss": 0.2889, "step": 166 }, { "epoch": 1.6156626506024097, "grad_norm": 6.364062786102295, "learning_rate": 4.918932038834952e-05, "loss": 0.2613, "step": 167 }, { "epoch": 1.625301204819277, "grad_norm": 19.052656173706055, "learning_rate": 4.918446601941748e-05, "loss": 0.2479, "step": 168 }, { "epoch": 1.6349397590361445, "grad_norm": 14.350653648376465, "learning_rate": 4.917961165048544e-05, "loss": 0.354, "step": 169 }, { "epoch": 1.644578313253012, "grad_norm": 6.910058498382568, "learning_rate": 4.9174757281553404e-05, "loss": 0.2114, "step": 170 }, { "epoch": 1.6542168674698794, "grad_norm": 5.5534467697143555, "learning_rate": 4.916990291262136e-05, "loss": 0.3484, "step": 171 }, { "epoch": 1.663855421686747, "grad_norm": 13.177319526672363, "learning_rate": 4.9165048543689326e-05, "loss": 0.2346, "step": 172 }, { "epoch": 1.6734939759036145, "grad_norm": 11.107186317443848, "learning_rate": 4.9160194174757284e-05, "loss": 0.5454, "step": 173 }, { "epoch": 1.6831325301204818, "grad_norm": 2.8228461742401123, "learning_rate": 4.915533980582525e-05, "loss": 0.3584, "step": 174 }, { "epoch": 1.6927710843373494, "grad_norm": 11.182879447937012, "learning_rate": 4.9150485436893206e-05, "loss": 0.2141, "step": 175 }, { "epoch": 1.702409638554217, "grad_norm": 7.72999906539917, "learning_rate": 4.914563106796117e-05, "loss": 0.5345, "step": 176 }, { "epoch": 1.7120481927710842, "grad_norm": 10.895652770996094, "learning_rate": 4.914077669902913e-05, "loss": 0.3984, "step": 177 }, { "epoch": 1.7216867469879518, "grad_norm": 26.709976196289062, "learning_rate": 4.9135922330097086e-05, "loss": 0.1543, "step": 178 }, { "epoch": 1.7313253012048193, "grad_norm": 2.1014726161956787, "learning_rate": 4.913106796116505e-05, "loss": 0.3785, "step": 179 }, { "epoch": 1.7409638554216866, "grad_norm": 6.007961273193359, "learning_rate": 4.912621359223301e-05, "loss": 0.4561, "step": 180 }, { "epoch": 1.7506024096385542, "grad_norm": 14.90966796875, "learning_rate": 4.912135922330097e-05, "loss": 0.268, "step": 181 }, { "epoch": 1.7602409638554217, "grad_norm": 30.75738525390625, "learning_rate": 4.911650485436893e-05, "loss": 0.3619, "step": 182 }, { "epoch": 1.769879518072289, "grad_norm": 2.744455099105835, "learning_rate": 4.9111650485436895e-05, "loss": 0.4327, "step": 183 }, { "epoch": 1.7795180722891566, "grad_norm": 8.176011085510254, "learning_rate": 4.910679611650485e-05, "loss": 0.3974, "step": 184 }, { "epoch": 1.7891566265060241, "grad_norm": 13.19811725616455, "learning_rate": 4.910194174757282e-05, "loss": 0.5166, "step": 185 }, { "epoch": 1.7987951807228915, "grad_norm": 5.908742427825928, "learning_rate": 4.9097087378640776e-05, "loss": 0.1131, "step": 186 }, { "epoch": 1.808433734939759, "grad_norm": 19.400938034057617, "learning_rate": 4.909223300970874e-05, "loss": 0.2994, "step": 187 }, { "epoch": 1.8180722891566266, "grad_norm": 6.470232963562012, "learning_rate": 4.90873786407767e-05, "loss": 0.2387, "step": 188 }, { "epoch": 1.8277108433734939, "grad_norm": 1.2153048515319824, "learning_rate": 4.908252427184466e-05, "loss": 0.2254, "step": 189 }, { "epoch": 1.8373493975903614, "grad_norm": 7.900503158569336, "learning_rate": 4.907766990291263e-05, "loss": 0.4513, "step": 190 }, { "epoch": 1.846987951807229, "grad_norm": 3.0783004760742188, "learning_rate": 4.9072815533980585e-05, "loss": 0.4082, "step": 191 }, { "epoch": 1.8566265060240963, "grad_norm": 17.84771728515625, "learning_rate": 4.906796116504855e-05, "loss": 0.4534, "step": 192 }, { "epoch": 1.8662650602409638, "grad_norm": 6.539433002471924, "learning_rate": 4.906310679611651e-05, "loss": 0.2514, "step": 193 }, { "epoch": 1.8759036144578314, "grad_norm": 11.290789604187012, "learning_rate": 4.905825242718447e-05, "loss": 0.2745, "step": 194 }, { "epoch": 1.8855421686746987, "grad_norm": 47.274452209472656, "learning_rate": 4.905339805825243e-05, "loss": 0.5843, "step": 195 }, { "epoch": 1.8951807228915662, "grad_norm": 7.504261016845703, "learning_rate": 4.9048543689320394e-05, "loss": 0.3095, "step": 196 }, { "epoch": 1.9048192771084338, "grad_norm": 12.961978912353516, "learning_rate": 4.904368932038835e-05, "loss": 0.488, "step": 197 }, { "epoch": 1.9144578313253011, "grad_norm": 4.256357192993164, "learning_rate": 4.9038834951456316e-05, "loss": 0.2775, "step": 198 }, { "epoch": 1.9240963855421687, "grad_norm": 8.69863510131836, "learning_rate": 4.9033980582524274e-05, "loss": 0.2412, "step": 199 }, { "epoch": 1.9337349397590362, "grad_norm": 10.119851112365723, "learning_rate": 4.902912621359224e-05, "loss": 0.3733, "step": 200 }, { "epoch": 1.9433734939759035, "grad_norm": 4.402695655822754, "learning_rate": 4.9024271844660196e-05, "loss": 0.2637, "step": 201 }, { "epoch": 1.953012048192771, "grad_norm": 10.955796241760254, "learning_rate": 4.901941747572816e-05, "loss": 0.428, "step": 202 }, { "epoch": 1.9626506024096386, "grad_norm": 9.667986869812012, "learning_rate": 4.901456310679612e-05, "loss": 0.3269, "step": 203 }, { "epoch": 1.972289156626506, "grad_norm": 11.471282958984375, "learning_rate": 4.900970873786408e-05, "loss": 0.2764, "step": 204 }, { "epoch": 1.9819277108433735, "grad_norm": 12.558530807495117, "learning_rate": 4.900485436893204e-05, "loss": 0.3452, "step": 205 }, { "epoch": 1.991566265060241, "grad_norm": 13.613166809082031, "learning_rate": 4.9e-05, "loss": 0.4822, "step": 206 }, { "epoch": 2.0072289156626506, "grad_norm": 6.274798393249512, "learning_rate": 4.899514563106796e-05, "loss": 0.1682, "step": 207 }, { "epoch": 2.016867469879518, "grad_norm": 6.998697280883789, "learning_rate": 4.899029126213592e-05, "loss": 0.2592, "step": 208 }, { "epoch": 2.0265060240963857, "grad_norm": 13.206299781799316, "learning_rate": 4.8985436893203885e-05, "loss": 0.4093, "step": 209 }, { "epoch": 2.036144578313253, "grad_norm": 8.973857879638672, "learning_rate": 4.898058252427184e-05, "loss": 0.3242, "step": 210 }, { "epoch": 2.0457831325301203, "grad_norm": 12.741914749145508, "learning_rate": 4.897572815533981e-05, "loss": 0.1394, "step": 211 }, { "epoch": 2.055421686746988, "grad_norm": 3.6222760677337646, "learning_rate": 4.8970873786407765e-05, "loss": 0.1874, "step": 212 }, { "epoch": 2.0650602409638554, "grad_norm": 2.9758031368255615, "learning_rate": 4.896601941747573e-05, "loss": 0.1019, "step": 213 }, { "epoch": 2.0746987951807228, "grad_norm": 11.02209186553955, "learning_rate": 4.896116504854369e-05, "loss": 0.3443, "step": 214 }, { "epoch": 2.0843373493975905, "grad_norm": 8.036227226257324, "learning_rate": 4.895631067961166e-05, "loss": 0.2912, "step": 215 }, { "epoch": 2.093975903614458, "grad_norm": 16.067045211791992, "learning_rate": 4.895145631067962e-05, "loss": 0.3622, "step": 216 }, { "epoch": 2.103614457831325, "grad_norm": 3.9156429767608643, "learning_rate": 4.8946601941747574e-05, "loss": 0.2042, "step": 217 }, { "epoch": 2.113253012048193, "grad_norm": 0.9984110593795776, "learning_rate": 4.894174757281554e-05, "loss": 0.226, "step": 218 }, { "epoch": 2.1228915662650603, "grad_norm": 20.342836380004883, "learning_rate": 4.89368932038835e-05, "loss": 0.2828, "step": 219 }, { "epoch": 2.1325301204819276, "grad_norm": 3.1645607948303223, "learning_rate": 4.893203883495146e-05, "loss": 0.3226, "step": 220 }, { "epoch": 2.1421686746987953, "grad_norm": 3.0275161266326904, "learning_rate": 4.892718446601942e-05, "loss": 0.2457, "step": 221 }, { "epoch": 2.1518072289156627, "grad_norm": 3.876610279083252, "learning_rate": 4.8922330097087384e-05, "loss": 0.1715, "step": 222 }, { "epoch": 2.16144578313253, "grad_norm": 14.552804946899414, "learning_rate": 4.891747572815534e-05, "loss": 0.1908, "step": 223 }, { "epoch": 2.1710843373493978, "grad_norm": 10.864977836608887, "learning_rate": 4.8912621359223306e-05, "loss": 0.3427, "step": 224 }, { "epoch": 2.180722891566265, "grad_norm": 3.972689390182495, "learning_rate": 4.8907766990291264e-05, "loss": 0.2821, "step": 225 }, { "epoch": 2.1903614457831324, "grad_norm": 3.522738218307495, "learning_rate": 4.890291262135923e-05, "loss": 0.2765, "step": 226 }, { "epoch": 2.2, "grad_norm": 2.871262311935425, "learning_rate": 4.8898058252427186e-05, "loss": 0.3216, "step": 227 }, { "epoch": 2.2096385542168675, "grad_norm": 10.792874336242676, "learning_rate": 4.889320388349515e-05, "loss": 0.1611, "step": 228 }, { "epoch": 2.219277108433735, "grad_norm": 9.869952201843262, "learning_rate": 4.888834951456311e-05, "loss": 0.3651, "step": 229 }, { "epoch": 2.2289156626506026, "grad_norm": 5.6764373779296875, "learning_rate": 4.888349514563107e-05, "loss": 0.2461, "step": 230 }, { "epoch": 2.23855421686747, "grad_norm": 3.657252073287964, "learning_rate": 4.887864077669903e-05, "loss": 0.6616, "step": 231 }, { "epoch": 2.2481927710843372, "grad_norm": 55.44496536254883, "learning_rate": 4.887378640776699e-05, "loss": 0.3775, "step": 232 }, { "epoch": 2.257831325301205, "grad_norm": 3.4450442790985107, "learning_rate": 4.886893203883495e-05, "loss": 0.2935, "step": 233 }, { "epoch": 2.2674698795180723, "grad_norm": 9.721818923950195, "learning_rate": 4.886407766990291e-05, "loss": 0.4474, "step": 234 }, { "epoch": 2.2771084337349397, "grad_norm": 20.61795997619629, "learning_rate": 4.8859223300970875e-05, "loss": 0.3916, "step": 235 }, { "epoch": 2.2867469879518074, "grad_norm": 7.548231601715088, "learning_rate": 4.885436893203883e-05, "loss": 0.2279, "step": 236 }, { "epoch": 2.2963855421686747, "grad_norm": 6.752506256103516, "learning_rate": 4.88495145631068e-05, "loss": 0.3517, "step": 237 }, { "epoch": 2.306024096385542, "grad_norm": 33.941749572753906, "learning_rate": 4.8844660194174755e-05, "loss": 0.3055, "step": 238 }, { "epoch": 2.31566265060241, "grad_norm": 3.714153528213501, "learning_rate": 4.883980582524272e-05, "loss": 0.1702, "step": 239 }, { "epoch": 2.325301204819277, "grad_norm": 11.285866737365723, "learning_rate": 4.8834951456310684e-05, "loss": 0.4677, "step": 240 }, { "epoch": 2.3349397590361445, "grad_norm": 3.8346710205078125, "learning_rate": 4.883009708737865e-05, "loss": 0.2868, "step": 241 }, { "epoch": 2.3445783132530122, "grad_norm": 21.44188690185547, "learning_rate": 4.8825242718446606e-05, "loss": 0.2658, "step": 242 }, { "epoch": 2.3542168674698796, "grad_norm": 2.993797779083252, "learning_rate": 4.8820388349514564e-05, "loss": 0.1677, "step": 243 }, { "epoch": 2.363855421686747, "grad_norm": 12.544434547424316, "learning_rate": 4.881553398058253e-05, "loss": 0.3745, "step": 244 }, { "epoch": 2.3734939759036147, "grad_norm": 9.597868919372559, "learning_rate": 4.8810679611650486e-05, "loss": 0.2657, "step": 245 }, { "epoch": 2.383132530120482, "grad_norm": 10.340269088745117, "learning_rate": 4.880582524271845e-05, "loss": 0.268, "step": 246 }, { "epoch": 2.3927710843373493, "grad_norm": 2.9151694774627686, "learning_rate": 4.880097087378641e-05, "loss": 0.2691, "step": 247 }, { "epoch": 2.402409638554217, "grad_norm": 5.148952484130859, "learning_rate": 4.879611650485437e-05, "loss": 0.241, "step": 248 }, { "epoch": 2.4120481927710844, "grad_norm": 2.7053420543670654, "learning_rate": 4.879126213592233e-05, "loss": 0.5545, "step": 249 }, { "epoch": 2.4216867469879517, "grad_norm": 7.3540873527526855, "learning_rate": 4.8786407766990296e-05, "loss": 0.2774, "step": 250 }, { "epoch": 2.4313253012048195, "grad_norm": 4.422383785247803, "learning_rate": 4.878155339805825e-05, "loss": 0.2047, "step": 251 }, { "epoch": 2.440963855421687, "grad_norm": 14.173503875732422, "learning_rate": 4.877669902912622e-05, "loss": 0.3962, "step": 252 }, { "epoch": 2.450602409638554, "grad_norm": 11.374469757080078, "learning_rate": 4.8771844660194176e-05, "loss": 0.4419, "step": 253 }, { "epoch": 2.460240963855422, "grad_norm": 2.496692180633545, "learning_rate": 4.876699029126214e-05, "loss": 0.3967, "step": 254 }, { "epoch": 2.4698795180722892, "grad_norm": 10.10113525390625, "learning_rate": 4.87621359223301e-05, "loss": 0.2948, "step": 255 }, { "epoch": 2.4795180722891565, "grad_norm": 4.080567836761475, "learning_rate": 4.875728155339806e-05, "loss": 0.3614, "step": 256 }, { "epoch": 2.4891566265060243, "grad_norm": 11.169478416442871, "learning_rate": 4.875242718446602e-05, "loss": 0.2903, "step": 257 }, { "epoch": 2.4987951807228916, "grad_norm": 3.96718430519104, "learning_rate": 4.8747572815533985e-05, "loss": 0.4681, "step": 258 }, { "epoch": 2.508433734939759, "grad_norm": 3.495339870452881, "learning_rate": 4.874271844660194e-05, "loss": 0.3059, "step": 259 }, { "epoch": 2.5180722891566267, "grad_norm": 10.91677474975586, "learning_rate": 4.87378640776699e-05, "loss": 0.4235, "step": 260 }, { "epoch": 2.527710843373494, "grad_norm": 18.68159294128418, "learning_rate": 4.8733009708737865e-05, "loss": 0.2671, "step": 261 }, { "epoch": 2.5373493975903614, "grad_norm": 3.249129056930542, "learning_rate": 4.872815533980582e-05, "loss": 0.5433, "step": 262 }, { "epoch": 2.546987951807229, "grad_norm": 5.9366679191589355, "learning_rate": 4.872330097087379e-05, "loss": 0.2111, "step": 263 }, { "epoch": 2.5566265060240965, "grad_norm": 4.232071876525879, "learning_rate": 4.8718446601941745e-05, "loss": 0.2117, "step": 264 }, { "epoch": 2.566265060240964, "grad_norm": 3.160550117492676, "learning_rate": 4.871359223300971e-05, "loss": 0.4508, "step": 265 }, { "epoch": 2.5759036144578316, "grad_norm": 19.8659610748291, "learning_rate": 4.8708737864077674e-05, "loss": 0.3601, "step": 266 }, { "epoch": 2.585542168674699, "grad_norm": 8.850093841552734, "learning_rate": 4.870388349514564e-05, "loss": 0.3376, "step": 267 }, { "epoch": 2.595180722891566, "grad_norm": 6.833710670471191, "learning_rate": 4.8699029126213596e-05, "loss": 0.3744, "step": 268 }, { "epoch": 2.604819277108434, "grad_norm": 4.658482551574707, "learning_rate": 4.869417475728156e-05, "loss": 0.2394, "step": 269 }, { "epoch": 2.6144578313253013, "grad_norm": 32.11513137817383, "learning_rate": 4.868932038834952e-05, "loss": 0.3085, "step": 270 }, { "epoch": 2.6240963855421686, "grad_norm": 32.16909408569336, "learning_rate": 4.8684466019417476e-05, "loss": 0.5384, "step": 271 }, { "epoch": 2.6337349397590364, "grad_norm": 6.196805477142334, "learning_rate": 4.867961165048544e-05, "loss": 0.3027, "step": 272 }, { "epoch": 2.6433734939759037, "grad_norm": 40.545101165771484, "learning_rate": 4.86747572815534e-05, "loss": 0.2939, "step": 273 }, { "epoch": 2.653012048192771, "grad_norm": 1.9337053298950195, "learning_rate": 4.866990291262136e-05, "loss": 0.0899, "step": 274 }, { "epoch": 2.662650602409639, "grad_norm": 2.399242639541626, "learning_rate": 4.866504854368932e-05, "loss": 0.3963, "step": 275 }, { "epoch": 2.672289156626506, "grad_norm": 5.162437915802002, "learning_rate": 4.8660194174757285e-05, "loss": 0.3165, "step": 276 }, { "epoch": 2.6819277108433734, "grad_norm": 7.203155517578125, "learning_rate": 4.865533980582524e-05, "loss": 0.3541, "step": 277 }, { "epoch": 2.691566265060241, "grad_norm": 8.846324920654297, "learning_rate": 4.865048543689321e-05, "loss": 0.5019, "step": 278 }, { "epoch": 2.7012048192771085, "grad_norm": 9.042549133300781, "learning_rate": 4.8645631067961165e-05, "loss": 0.1958, "step": 279 }, { "epoch": 2.710843373493976, "grad_norm": 2.4408771991729736, "learning_rate": 4.864077669902913e-05, "loss": 0.4348, "step": 280 }, { "epoch": 2.7204819277108436, "grad_norm": 6.609375, "learning_rate": 4.863592233009709e-05, "loss": 0.2421, "step": 281 }, { "epoch": 2.730120481927711, "grad_norm": 3.9408557415008545, "learning_rate": 4.863106796116505e-05, "loss": 0.1714, "step": 282 }, { "epoch": 2.7397590361445783, "grad_norm": 4.102409362792969, "learning_rate": 4.862621359223301e-05, "loss": 0.3124, "step": 283 }, { "epoch": 2.749397590361446, "grad_norm": 6.1018147468566895, "learning_rate": 4.8621359223300975e-05, "loss": 0.3523, "step": 284 }, { "epoch": 2.7590361445783134, "grad_norm": 10.550959587097168, "learning_rate": 4.861650485436893e-05, "loss": 0.4182, "step": 285 }, { "epoch": 2.7686746987951807, "grad_norm": 10.64708137512207, "learning_rate": 4.86116504854369e-05, "loss": 0.2721, "step": 286 }, { "epoch": 2.7783132530120485, "grad_norm": 3.7174439430236816, "learning_rate": 4.8606796116504855e-05, "loss": 0.335, "step": 287 }, { "epoch": 2.787951807228916, "grad_norm": 6.692753314971924, "learning_rate": 4.860194174757281e-05, "loss": 0.5631, "step": 288 }, { "epoch": 2.797590361445783, "grad_norm": 6.677447319030762, "learning_rate": 4.859708737864078e-05, "loss": 0.3048, "step": 289 }, { "epoch": 2.807228915662651, "grad_norm": 13.431446075439453, "learning_rate": 4.8592233009708735e-05, "loss": 0.1784, "step": 290 }, { "epoch": 2.816867469879518, "grad_norm": 13.643593788146973, "learning_rate": 4.8587378640776706e-05, "loss": 0.3815, "step": 291 }, { "epoch": 2.8265060240963855, "grad_norm": 18.291589736938477, "learning_rate": 4.8582524271844664e-05, "loss": 0.4145, "step": 292 }, { "epoch": 2.8361445783132533, "grad_norm": 4.816029071807861, "learning_rate": 4.857766990291263e-05, "loss": 0.4424, "step": 293 }, { "epoch": 2.8457831325301206, "grad_norm": 11.071351051330566, "learning_rate": 4.8572815533980586e-05, "loss": 0.3801, "step": 294 }, { "epoch": 2.855421686746988, "grad_norm": 2.3176536560058594, "learning_rate": 4.856796116504855e-05, "loss": 0.2564, "step": 295 }, { "epoch": 2.8650602409638557, "grad_norm": 5.677560329437256, "learning_rate": 4.856310679611651e-05, "loss": 0.5631, "step": 296 }, { "epoch": 2.874698795180723, "grad_norm": 12.62639045715332, "learning_rate": 4.855825242718447e-05, "loss": 0.4948, "step": 297 }, { "epoch": 2.8843373493975903, "grad_norm": 28.455333709716797, "learning_rate": 4.855339805825243e-05, "loss": 0.2099, "step": 298 }, { "epoch": 2.8939759036144577, "grad_norm": 3.928844690322876, "learning_rate": 4.854854368932039e-05, "loss": 0.2672, "step": 299 }, { "epoch": 2.9036144578313254, "grad_norm": 1.73330819606781, "learning_rate": 4.854368932038835e-05, "loss": 0.2292, "step": 300 }, { "epoch": 2.9132530120481928, "grad_norm": 3.049363136291504, "learning_rate": 4.853883495145631e-05, "loss": 0.2993, "step": 301 }, { "epoch": 2.92289156626506, "grad_norm": 3.599433422088623, "learning_rate": 4.8533980582524275e-05, "loss": 0.415, "step": 302 }, { "epoch": 2.932530120481928, "grad_norm": 16.67830467224121, "learning_rate": 4.852912621359223e-05, "loss": 0.2948, "step": 303 }, { "epoch": 2.942168674698795, "grad_norm": 12.277578353881836, "learning_rate": 4.85242718446602e-05, "loss": 0.3889, "step": 304 }, { "epoch": 2.9518072289156625, "grad_norm": 4.137121677398682, "learning_rate": 4.8519417475728155e-05, "loss": 0.2765, "step": 305 }, { "epoch": 2.9614457831325303, "grad_norm": 9.521222114562988, "learning_rate": 4.851456310679612e-05, "loss": 0.3332, "step": 306 }, { "epoch": 2.9710843373493976, "grad_norm": 10.027732849121094, "learning_rate": 4.850970873786408e-05, "loss": 0.3006, "step": 307 }, { "epoch": 2.980722891566265, "grad_norm": 6.071107864379883, "learning_rate": 4.850485436893204e-05, "loss": 0.3326, "step": 308 }, { "epoch": 2.9903614457831327, "grad_norm": 15.463041305541992, "learning_rate": 4.85e-05, "loss": 0.3237, "step": 309 }, { "epoch": 3.0060240963855422, "grad_norm": 10.86634349822998, "learning_rate": 4.8495145631067964e-05, "loss": 0.2649, "step": 310 }, { "epoch": 3.0156626506024096, "grad_norm": 3.6566250324249268, "learning_rate": 4.849029126213592e-05, "loss": 0.3468, "step": 311 }, { "epoch": 3.025301204819277, "grad_norm": 2.280978202819824, "learning_rate": 4.848543689320389e-05, "loss": 0.2189, "step": 312 }, { "epoch": 3.0349397590361447, "grad_norm": 3.0840845108032227, "learning_rate": 4.8480582524271844e-05, "loss": 0.2321, "step": 313 }, { "epoch": 3.044578313253012, "grad_norm": 11.658539772033691, "learning_rate": 4.847572815533981e-05, "loss": 0.1893, "step": 314 }, { "epoch": 3.0542168674698793, "grad_norm": 3.972259998321533, "learning_rate": 4.847087378640777e-05, "loss": 0.2175, "step": 315 }, { "epoch": 3.063855421686747, "grad_norm": 11.73523998260498, "learning_rate": 4.846601941747573e-05, "loss": 0.1722, "step": 316 }, { "epoch": 3.0734939759036144, "grad_norm": 4.1933674812316895, "learning_rate": 4.8461165048543696e-05, "loss": 0.3671, "step": 317 }, { "epoch": 3.0831325301204817, "grad_norm": 6.807486057281494, "learning_rate": 4.8456310679611654e-05, "loss": 0.2394, "step": 318 }, { "epoch": 3.0927710843373495, "grad_norm": 4.856175422668457, "learning_rate": 4.845145631067962e-05, "loss": 0.2479, "step": 319 }, { "epoch": 3.102409638554217, "grad_norm": 3.0709140300750732, "learning_rate": 4.8446601941747576e-05, "loss": 0.2395, "step": 320 }, { "epoch": 3.112048192771084, "grad_norm": 1.958458423614502, "learning_rate": 4.844174757281554e-05, "loss": 0.1465, "step": 321 }, { "epoch": 3.121686746987952, "grad_norm": 2.7163004875183105, "learning_rate": 4.84368932038835e-05, "loss": 0.2635, "step": 322 }, { "epoch": 3.1313253012048192, "grad_norm": 2.8570730686187744, "learning_rate": 4.843203883495146e-05, "loss": 0.301, "step": 323 }, { "epoch": 3.1409638554216865, "grad_norm": 7.458629131317139, "learning_rate": 4.842718446601942e-05, "loss": 0.2936, "step": 324 }, { "epoch": 3.1506024096385543, "grad_norm": 1.9340667724609375, "learning_rate": 4.8422330097087385e-05, "loss": 0.4741, "step": 325 }, { "epoch": 3.1602409638554216, "grad_norm": 4.611169338226318, "learning_rate": 4.841747572815534e-05, "loss": 0.1891, "step": 326 }, { "epoch": 3.169879518072289, "grad_norm": 2.3895087242126465, "learning_rate": 4.84126213592233e-05, "loss": 0.2595, "step": 327 }, { "epoch": 3.1795180722891567, "grad_norm": 2.668553590774536, "learning_rate": 4.8407766990291265e-05, "loss": 0.1941, "step": 328 }, { "epoch": 3.189156626506024, "grad_norm": 13.16700553894043, "learning_rate": 4.840291262135922e-05, "loss": 0.2608, "step": 329 }, { "epoch": 3.1987951807228914, "grad_norm": 0.7606527209281921, "learning_rate": 4.839805825242719e-05, "loss": 0.183, "step": 330 }, { "epoch": 3.208433734939759, "grad_norm": 3.6898252964019775, "learning_rate": 4.8393203883495145e-05, "loss": 0.4922, "step": 331 }, { "epoch": 3.2180722891566265, "grad_norm": 4.055345058441162, "learning_rate": 4.838834951456311e-05, "loss": 0.3571, "step": 332 }, { "epoch": 3.227710843373494, "grad_norm": 3.7873995304107666, "learning_rate": 4.838349514563107e-05, "loss": 0.1632, "step": 333 }, { "epoch": 3.2373493975903616, "grad_norm": 4.20884370803833, "learning_rate": 4.837864077669903e-05, "loss": 0.1767, "step": 334 }, { "epoch": 3.246987951807229, "grad_norm": 2.8951821327209473, "learning_rate": 4.837378640776699e-05, "loss": 0.2529, "step": 335 }, { "epoch": 3.256626506024096, "grad_norm": 1.7725900411605835, "learning_rate": 4.8368932038834954e-05, "loss": 0.4313, "step": 336 }, { "epoch": 3.266265060240964, "grad_norm": 7.639788627624512, "learning_rate": 4.836407766990291e-05, "loss": 0.333, "step": 337 }, { "epoch": 3.2759036144578313, "grad_norm": 8.003412246704102, "learning_rate": 4.8359223300970876e-05, "loss": 0.2858, "step": 338 }, { "epoch": 3.2855421686746986, "grad_norm": 17.66754150390625, "learning_rate": 4.8354368932038834e-05, "loss": 0.299, "step": 339 }, { "epoch": 3.2951807228915664, "grad_norm": 24.617053985595703, "learning_rate": 4.83495145631068e-05, "loss": 0.5169, "step": 340 }, { "epoch": 3.3048192771084337, "grad_norm": 10.229394912719727, "learning_rate": 4.834466019417476e-05, "loss": 0.4821, "step": 341 }, { "epoch": 3.314457831325301, "grad_norm": 16.286630630493164, "learning_rate": 4.833980582524272e-05, "loss": 0.3209, "step": 342 }, { "epoch": 3.324096385542169, "grad_norm": 7.675579071044922, "learning_rate": 4.8334951456310686e-05, "loss": 0.2657, "step": 343 }, { "epoch": 3.333734939759036, "grad_norm": 2.992511510848999, "learning_rate": 4.833009708737864e-05, "loss": 0.2002, "step": 344 }, { "epoch": 3.3433734939759034, "grad_norm": 5.159013271331787, "learning_rate": 4.832524271844661e-05, "loss": 0.3003, "step": 345 }, { "epoch": 3.353012048192771, "grad_norm": 5.324949741363525, "learning_rate": 4.8320388349514566e-05, "loss": 0.3093, "step": 346 }, { "epoch": 3.3626506024096385, "grad_norm": 6.342013359069824, "learning_rate": 4.831553398058253e-05, "loss": 0.2327, "step": 347 }, { "epoch": 3.372289156626506, "grad_norm": 5.937595844268799, "learning_rate": 4.831067961165049e-05, "loss": 0.1349, "step": 348 }, { "epoch": 3.3819277108433736, "grad_norm": 1.739205241203308, "learning_rate": 4.830582524271845e-05, "loss": 0.2309, "step": 349 }, { "epoch": 3.391566265060241, "grad_norm": 11.153475761413574, "learning_rate": 4.830097087378641e-05, "loss": 0.4251, "step": 350 }, { "epoch": 3.4012048192771083, "grad_norm": 20.642248153686523, "learning_rate": 4.8296116504854375e-05, "loss": 0.3862, "step": 351 }, { "epoch": 3.410843373493976, "grad_norm": 2.7987499237060547, "learning_rate": 4.829126213592233e-05, "loss": 0.1818, "step": 352 }, { "epoch": 3.4204819277108434, "grad_norm": 46.03645706176758, "learning_rate": 4.82864077669903e-05, "loss": 0.2855, "step": 353 }, { "epoch": 3.4301204819277107, "grad_norm": 13.885860443115234, "learning_rate": 4.8281553398058255e-05, "loss": 0.3165, "step": 354 }, { "epoch": 3.4397590361445785, "grad_norm": 4.371873378753662, "learning_rate": 4.827669902912621e-05, "loss": 0.3451, "step": 355 }, { "epoch": 3.4493975903614458, "grad_norm": 15.056500434875488, "learning_rate": 4.827184466019418e-05, "loss": 0.5055, "step": 356 }, { "epoch": 3.459036144578313, "grad_norm": 4.623950958251953, "learning_rate": 4.8266990291262135e-05, "loss": 0.2494, "step": 357 }, { "epoch": 3.468674698795181, "grad_norm": 3.747121810913086, "learning_rate": 4.82621359223301e-05, "loss": 0.3079, "step": 358 }, { "epoch": 3.478313253012048, "grad_norm": 6.18499231338501, "learning_rate": 4.825728155339806e-05, "loss": 0.3545, "step": 359 }, { "epoch": 3.4879518072289155, "grad_norm": 7.779442310333252, "learning_rate": 4.825242718446602e-05, "loss": 0.3738, "step": 360 }, { "epoch": 3.4975903614457833, "grad_norm": 8.615042686462402, "learning_rate": 4.824757281553398e-05, "loss": 0.4425, "step": 361 }, { "epoch": 3.5072289156626506, "grad_norm": 1.3331774473190308, "learning_rate": 4.8242718446601944e-05, "loss": 0.1979, "step": 362 }, { "epoch": 3.516867469879518, "grad_norm": 9.577934265136719, "learning_rate": 4.82378640776699e-05, "loss": 0.3031, "step": 363 }, { "epoch": 3.5265060240963857, "grad_norm": 2.169813871383667, "learning_rate": 4.8233009708737866e-05, "loss": 0.2494, "step": 364 }, { "epoch": 3.536144578313253, "grad_norm": 8.667654991149902, "learning_rate": 4.8228155339805824e-05, "loss": 0.3884, "step": 365 }, { "epoch": 3.5457831325301203, "grad_norm": 21.827733993530273, "learning_rate": 4.822330097087379e-05, "loss": 0.2987, "step": 366 }, { "epoch": 3.555421686746988, "grad_norm": 6.126503944396973, "learning_rate": 4.821844660194175e-05, "loss": 0.1916, "step": 367 }, { "epoch": 3.5650602409638554, "grad_norm": 3.958144187927246, "learning_rate": 4.821359223300971e-05, "loss": 0.1395, "step": 368 }, { "epoch": 3.5746987951807228, "grad_norm": 6.880648612976074, "learning_rate": 4.8208737864077675e-05, "loss": 0.4516, "step": 369 }, { "epoch": 3.5843373493975905, "grad_norm": 3.2553937435150146, "learning_rate": 4.820388349514563e-05, "loss": 0.2637, "step": 370 }, { "epoch": 3.593975903614458, "grad_norm": 2.7937304973602295, "learning_rate": 4.81990291262136e-05, "loss": 0.3, "step": 371 }, { "epoch": 3.603614457831325, "grad_norm": 3.20497727394104, "learning_rate": 4.8194174757281555e-05, "loss": 0.327, "step": 372 }, { "epoch": 3.613253012048193, "grad_norm": 16.870901107788086, "learning_rate": 4.818932038834952e-05, "loss": 0.3487, "step": 373 }, { "epoch": 3.6228915662650603, "grad_norm": 8.549176216125488, "learning_rate": 4.818446601941748e-05, "loss": 0.3717, "step": 374 }, { "epoch": 3.6325301204819276, "grad_norm": 3.760505437850952, "learning_rate": 4.817961165048544e-05, "loss": 0.2628, "step": 375 }, { "epoch": 3.6421686746987953, "grad_norm": 7.075438976287842, "learning_rate": 4.81747572815534e-05, "loss": 0.1991, "step": 376 }, { "epoch": 3.6518072289156627, "grad_norm": 2.0375442504882812, "learning_rate": 4.8169902912621365e-05, "loss": 0.1959, "step": 377 }, { "epoch": 3.66144578313253, "grad_norm": 12.940825462341309, "learning_rate": 4.816504854368932e-05, "loss": 0.3213, "step": 378 }, { "epoch": 3.6710843373493978, "grad_norm": 2.442385196685791, "learning_rate": 4.816019417475729e-05, "loss": 0.4027, "step": 379 }, { "epoch": 3.680722891566265, "grad_norm": 25.66146469116211, "learning_rate": 4.8155339805825245e-05, "loss": 0.3434, "step": 380 }, { "epoch": 3.6903614457831324, "grad_norm": 7.936379432678223, "learning_rate": 4.81504854368932e-05, "loss": 0.3155, "step": 381 }, { "epoch": 3.7, "grad_norm": 3.494856357574463, "learning_rate": 4.814563106796117e-05, "loss": 0.1789, "step": 382 }, { "epoch": 3.7096385542168675, "grad_norm": 4.1026740074157715, "learning_rate": 4.8140776699029125e-05, "loss": 0.3026, "step": 383 }, { "epoch": 3.719277108433735, "grad_norm": 9.93897819519043, "learning_rate": 4.813592233009709e-05, "loss": 0.2226, "step": 384 }, { "epoch": 3.7289156626506026, "grad_norm": 2.944387912750244, "learning_rate": 4.813106796116505e-05, "loss": 0.357, "step": 385 }, { "epoch": 3.73855421686747, "grad_norm": 9.707123756408691, "learning_rate": 4.812621359223301e-05, "loss": 0.3568, "step": 386 }, { "epoch": 3.7481927710843372, "grad_norm": 14.196006774902344, "learning_rate": 4.812135922330097e-05, "loss": 0.532, "step": 387 }, { "epoch": 3.757831325301205, "grad_norm": 4.824563980102539, "learning_rate": 4.8116504854368934e-05, "loss": 0.3535, "step": 388 }, { "epoch": 3.7674698795180723, "grad_norm": 2.9592583179473877, "learning_rate": 4.811165048543689e-05, "loss": 0.3913, "step": 389 }, { "epoch": 3.7771084337349397, "grad_norm": 5.993344783782959, "learning_rate": 4.8106796116504856e-05, "loss": 0.3728, "step": 390 }, { "epoch": 3.7867469879518074, "grad_norm": 9.887032508850098, "learning_rate": 4.8101941747572814e-05, "loss": 0.421, "step": 391 }, { "epoch": 3.7963855421686747, "grad_norm": 6.334994792938232, "learning_rate": 4.809708737864078e-05, "loss": 0.3319, "step": 392 }, { "epoch": 3.806024096385542, "grad_norm": 16.990022659301758, "learning_rate": 4.809223300970874e-05, "loss": 0.2699, "step": 393 }, { "epoch": 3.81566265060241, "grad_norm": 1.5799612998962402, "learning_rate": 4.80873786407767e-05, "loss": 0.3397, "step": 394 }, { "epoch": 3.825301204819277, "grad_norm": 3.9984216690063477, "learning_rate": 4.8082524271844665e-05, "loss": 0.2438, "step": 395 }, { "epoch": 3.8349397590361445, "grad_norm": 4.066417694091797, "learning_rate": 4.807766990291262e-05, "loss": 0.274, "step": 396 }, { "epoch": 3.8445783132530122, "grad_norm": 15.173487663269043, "learning_rate": 4.807281553398059e-05, "loss": 0.2982, "step": 397 }, { "epoch": 3.8542168674698796, "grad_norm": 4.687649726867676, "learning_rate": 4.8067961165048545e-05, "loss": 0.16, "step": 398 }, { "epoch": 3.863855421686747, "grad_norm": 9.955038070678711, "learning_rate": 4.806310679611651e-05, "loss": 0.2337, "step": 399 }, { "epoch": 3.8734939759036147, "grad_norm": 2.9626681804656982, "learning_rate": 4.805825242718447e-05, "loss": 0.2593, "step": 400 }, { "epoch": 3.883132530120482, "grad_norm": 3.2831172943115234, "learning_rate": 4.805339805825243e-05, "loss": 0.4422, "step": 401 }, { "epoch": 3.8927710843373493, "grad_norm": 4.8138885498046875, "learning_rate": 4.804854368932039e-05, "loss": 0.2712, "step": 402 }, { "epoch": 3.902409638554217, "grad_norm": 7.099545001983643, "learning_rate": 4.8043689320388354e-05, "loss": 0.3047, "step": 403 }, { "epoch": 3.9120481927710844, "grad_norm": 7.569052219390869, "learning_rate": 4.803883495145631e-05, "loss": 0.386, "step": 404 }, { "epoch": 3.9216867469879517, "grad_norm": 5.4520158767700195, "learning_rate": 4.8033980582524277e-05, "loss": 0.424, "step": 405 }, { "epoch": 3.9313253012048195, "grad_norm": 4.981518268585205, "learning_rate": 4.8029126213592234e-05, "loss": 0.4284, "step": 406 }, { "epoch": 3.940963855421687, "grad_norm": 1.929745078086853, "learning_rate": 4.80242718446602e-05, "loss": 0.2715, "step": 407 }, { "epoch": 3.950602409638554, "grad_norm": 9.611261367797852, "learning_rate": 4.801941747572816e-05, "loss": 0.292, "step": 408 }, { "epoch": 3.960240963855422, "grad_norm": 3.6579716205596924, "learning_rate": 4.8014563106796114e-05, "loss": 0.3296, "step": 409 }, { "epoch": 3.9698795180722892, "grad_norm": 15.938058853149414, "learning_rate": 4.800970873786408e-05, "loss": 0.4003, "step": 410 }, { "epoch": 3.9795180722891565, "grad_norm": 11.312716484069824, "learning_rate": 4.800485436893204e-05, "loss": 0.1457, "step": 411 }, { "epoch": 3.9891566265060243, "grad_norm": 5.677548408508301, "learning_rate": 4.8e-05, "loss": 0.211, "step": 412 }, { "epoch": 4.004819277108433, "grad_norm": 9.243349075317383, "learning_rate": 4.799514563106796e-05, "loss": 0.3758, "step": 413 }, { "epoch": 4.014457831325301, "grad_norm": 10.21805477142334, "learning_rate": 4.7990291262135924e-05, "loss": 0.3508, "step": 414 }, { "epoch": 4.024096385542169, "grad_norm": 1.940496563911438, "learning_rate": 4.798543689320388e-05, "loss": 0.1487, "step": 415 }, { "epoch": 4.033734939759036, "grad_norm": 3.357924222946167, "learning_rate": 4.7980582524271846e-05, "loss": 0.1871, "step": 416 }, { "epoch": 4.043373493975904, "grad_norm": 2.1289312839508057, "learning_rate": 4.797572815533981e-05, "loss": 0.3024, "step": 417 }, { "epoch": 4.053012048192771, "grad_norm": 5.220306873321533, "learning_rate": 4.7970873786407775e-05, "loss": 0.5222, "step": 418 }, { "epoch": 4.062650602409638, "grad_norm": 4.573537826538086, "learning_rate": 4.796601941747573e-05, "loss": 0.2912, "step": 419 }, { "epoch": 4.072289156626506, "grad_norm": 3.657864809036255, "learning_rate": 4.796116504854369e-05, "loss": 0.238, "step": 420 }, { "epoch": 4.081927710843374, "grad_norm": 19.217174530029297, "learning_rate": 4.7956310679611655e-05, "loss": 0.383, "step": 421 }, { "epoch": 4.091566265060241, "grad_norm": 12.424019813537598, "learning_rate": 4.795145631067961e-05, "loss": 0.2686, "step": 422 }, { "epoch": 4.1012048192771084, "grad_norm": 3.739607810974121, "learning_rate": 4.794660194174758e-05, "loss": 0.1798, "step": 423 }, { "epoch": 4.110843373493976, "grad_norm": 5.497581958770752, "learning_rate": 4.7941747572815535e-05, "loss": 0.3515, "step": 424 }, { "epoch": 4.120481927710843, "grad_norm": 12.470587730407715, "learning_rate": 4.79368932038835e-05, "loss": 0.5776, "step": 425 }, { "epoch": 4.130120481927711, "grad_norm": 14.701051712036133, "learning_rate": 4.793203883495146e-05, "loss": 0.3842, "step": 426 }, { "epoch": 4.139759036144579, "grad_norm": 8.966710090637207, "learning_rate": 4.792718446601942e-05, "loss": 0.3236, "step": 427 }, { "epoch": 4.1493975903614455, "grad_norm": 11.464835166931152, "learning_rate": 4.792233009708738e-05, "loss": 0.2527, "step": 428 }, { "epoch": 4.159036144578313, "grad_norm": 2.708972930908203, "learning_rate": 4.7917475728155344e-05, "loss": 0.2542, "step": 429 }, { "epoch": 4.168674698795181, "grad_norm": 16.673585891723633, "learning_rate": 4.79126213592233e-05, "loss": 0.3369, "step": 430 }, { "epoch": 4.178313253012048, "grad_norm": 14.991854667663574, "learning_rate": 4.7907766990291266e-05, "loss": 0.3458, "step": 431 }, { "epoch": 4.187951807228916, "grad_norm": 3.2478296756744385, "learning_rate": 4.7902912621359224e-05, "loss": 0.372, "step": 432 }, { "epoch": 4.1975903614457835, "grad_norm": 5.034290313720703, "learning_rate": 4.789805825242719e-05, "loss": 0.4128, "step": 433 }, { "epoch": 4.20722891566265, "grad_norm": 2.4183523654937744, "learning_rate": 4.7893203883495146e-05, "loss": 0.2943, "step": 434 }, { "epoch": 4.216867469879518, "grad_norm": 11.12487506866455, "learning_rate": 4.788834951456311e-05, "loss": 0.2589, "step": 435 }, { "epoch": 4.226506024096386, "grad_norm": 3.7382962703704834, "learning_rate": 4.788349514563107e-05, "loss": 0.4678, "step": 436 }, { "epoch": 4.236144578313253, "grad_norm": 1.6145668029785156, "learning_rate": 4.7878640776699026e-05, "loss": 0.2726, "step": 437 }, { "epoch": 4.2457831325301205, "grad_norm": 6.578299045562744, "learning_rate": 4.787378640776699e-05, "loss": 0.2097, "step": 438 }, { "epoch": 4.255421686746988, "grad_norm": 5.125805377960205, "learning_rate": 4.786893203883495e-05, "loss": 0.3861, "step": 439 }, { "epoch": 4.265060240963855, "grad_norm": 1.9273425340652466, "learning_rate": 4.786407766990291e-05, "loss": 0.1264, "step": 440 }, { "epoch": 4.274698795180723, "grad_norm": 8.942710876464844, "learning_rate": 4.785922330097087e-05, "loss": 0.2847, "step": 441 }, { "epoch": 4.284337349397591, "grad_norm": 6.792550563812256, "learning_rate": 4.785436893203884e-05, "loss": 0.2858, "step": 442 }, { "epoch": 4.293975903614458, "grad_norm": 4.3086371421813965, "learning_rate": 4.78495145631068e-05, "loss": 0.4037, "step": 443 }, { "epoch": 4.303614457831325, "grad_norm": 1.2378965616226196, "learning_rate": 4.7844660194174765e-05, "loss": 0.2221, "step": 444 }, { "epoch": 4.313253012048193, "grad_norm": 6.863492012023926, "learning_rate": 4.783980582524272e-05, "loss": 0.2925, "step": 445 }, { "epoch": 4.32289156626506, "grad_norm": 6.852990627288818, "learning_rate": 4.783495145631069e-05, "loss": 0.2797, "step": 446 }, { "epoch": 4.332530120481928, "grad_norm": 2.6686103343963623, "learning_rate": 4.7830097087378645e-05, "loss": 0.4903, "step": 447 }, { "epoch": 4.3421686746987955, "grad_norm": 2.3293888568878174, "learning_rate": 4.78252427184466e-05, "loss": 0.2546, "step": 448 }, { "epoch": 4.351807228915662, "grad_norm": 12.948970794677734, "learning_rate": 4.782038834951457e-05, "loss": 0.3897, "step": 449 }, { "epoch": 4.36144578313253, "grad_norm": 4.577274799346924, "learning_rate": 4.7815533980582525e-05, "loss": 0.2902, "step": 450 }, { "epoch": 4.371084337349398, "grad_norm": 6.2672905921936035, "learning_rate": 4.781067961165049e-05, "loss": 0.3002, "step": 451 }, { "epoch": 4.380722891566265, "grad_norm": 2.155344247817993, "learning_rate": 4.780582524271845e-05, "loss": 0.1777, "step": 452 }, { "epoch": 4.390361445783133, "grad_norm": 3.858290195465088, "learning_rate": 4.780097087378641e-05, "loss": 0.2839, "step": 453 }, { "epoch": 4.4, "grad_norm": 9.242982864379883, "learning_rate": 4.779611650485437e-05, "loss": 0.1817, "step": 454 }, { "epoch": 4.409638554216867, "grad_norm": 7.638232231140137, "learning_rate": 4.7791262135922334e-05, "loss": 0.3877, "step": 455 }, { "epoch": 4.419277108433735, "grad_norm": 4.355132579803467, "learning_rate": 4.778640776699029e-05, "loss": 0.2376, "step": 456 }, { "epoch": 4.428915662650603, "grad_norm": 3.3889713287353516, "learning_rate": 4.7781553398058256e-05, "loss": 0.1894, "step": 457 }, { "epoch": 4.43855421686747, "grad_norm": 5.962655067443848, "learning_rate": 4.7776699029126214e-05, "loss": 0.3937, "step": 458 }, { "epoch": 4.448192771084337, "grad_norm": 4.1917290687561035, "learning_rate": 4.777184466019418e-05, "loss": 0.2621, "step": 459 }, { "epoch": 4.457831325301205, "grad_norm": 7.988455772399902, "learning_rate": 4.7766990291262136e-05, "loss": 0.2441, "step": 460 }, { "epoch": 4.467469879518072, "grad_norm": 3.436630964279175, "learning_rate": 4.77621359223301e-05, "loss": 0.1915, "step": 461 }, { "epoch": 4.47710843373494, "grad_norm": 25.35869598388672, "learning_rate": 4.775728155339806e-05, "loss": 0.4975, "step": 462 }, { "epoch": 4.486746987951808, "grad_norm": 4.029381275177002, "learning_rate": 4.775242718446602e-05, "loss": 0.3215, "step": 463 }, { "epoch": 4.4963855421686745, "grad_norm": 3.130215644836426, "learning_rate": 4.774757281553398e-05, "loss": 0.3379, "step": 464 }, { "epoch": 4.506024096385542, "grad_norm": 5.755617141723633, "learning_rate": 4.774271844660194e-05, "loss": 0.4959, "step": 465 }, { "epoch": 4.51566265060241, "grad_norm": 6.544938564300537, "learning_rate": 4.77378640776699e-05, "loss": 0.4053, "step": 466 }, { "epoch": 4.525301204819277, "grad_norm": 9.988370895385742, "learning_rate": 4.773300970873786e-05, "loss": 0.4035, "step": 467 }, { "epoch": 4.534939759036145, "grad_norm": 2.721207857131958, "learning_rate": 4.772815533980583e-05, "loss": 0.221, "step": 468 }, { "epoch": 4.544578313253012, "grad_norm": 14.226771354675293, "learning_rate": 4.772330097087379e-05, "loss": 0.4788, "step": 469 }, { "epoch": 4.554216867469879, "grad_norm": 3.464500904083252, "learning_rate": 4.7718446601941754e-05, "loss": 0.1431, "step": 470 }, { "epoch": 4.563855421686747, "grad_norm": 4.279683589935303, "learning_rate": 4.771359223300971e-05, "loss": 0.3959, "step": 471 }, { "epoch": 4.573493975903615, "grad_norm": 3.056666612625122, "learning_rate": 4.770873786407768e-05, "loss": 0.3033, "step": 472 }, { "epoch": 4.583132530120482, "grad_norm": 5.068829536437988, "learning_rate": 4.7703883495145634e-05, "loss": 0.3634, "step": 473 }, { "epoch": 4.5927710843373495, "grad_norm": 3.566631317138672, "learning_rate": 4.76990291262136e-05, "loss": 0.1721, "step": 474 }, { "epoch": 4.602409638554217, "grad_norm": 15.675792694091797, "learning_rate": 4.769417475728156e-05, "loss": 0.4079, "step": 475 }, { "epoch": 4.612048192771084, "grad_norm": 5.214557647705078, "learning_rate": 4.7689320388349515e-05, "loss": 0.4397, "step": 476 }, { "epoch": 4.621686746987952, "grad_norm": 1.7465325593948364, "learning_rate": 4.768446601941748e-05, "loss": 0.3491, "step": 477 }, { "epoch": 4.63132530120482, "grad_norm": 7.4678955078125, "learning_rate": 4.767961165048544e-05, "loss": 0.5309, "step": 478 }, { "epoch": 4.6409638554216865, "grad_norm": 3.2674384117126465, "learning_rate": 4.76747572815534e-05, "loss": 0.3654, "step": 479 }, { "epoch": 4.650602409638554, "grad_norm": 7.523477554321289, "learning_rate": 4.766990291262136e-05, "loss": 0.3803, "step": 480 }, { "epoch": 4.660240963855422, "grad_norm": 2.020399808883667, "learning_rate": 4.7665048543689324e-05, "loss": 0.2135, "step": 481 }, { "epoch": 4.669879518072289, "grad_norm": 20.322874069213867, "learning_rate": 4.766019417475728e-05, "loss": 0.3117, "step": 482 }, { "epoch": 4.679518072289157, "grad_norm": 3.9505465030670166, "learning_rate": 4.7655339805825246e-05, "loss": 0.4017, "step": 483 }, { "epoch": 4.6891566265060245, "grad_norm": 6.553677082061768, "learning_rate": 4.7650485436893204e-05, "loss": 0.3075, "step": 484 }, { "epoch": 4.698795180722891, "grad_norm": 10.442938804626465, "learning_rate": 4.764563106796117e-05, "loss": 0.397, "step": 485 }, { "epoch": 4.708433734939759, "grad_norm": 4.371866703033447, "learning_rate": 4.7640776699029126e-05, "loss": 0.1721, "step": 486 }, { "epoch": 4.718072289156627, "grad_norm": 2.6197402477264404, "learning_rate": 4.763592233009709e-05, "loss": 0.3512, "step": 487 }, { "epoch": 4.727710843373494, "grad_norm": 2.8696651458740234, "learning_rate": 4.763106796116505e-05, "loss": 0.2086, "step": 488 }, { "epoch": 4.7373493975903616, "grad_norm": 1.0096957683563232, "learning_rate": 4.762621359223301e-05, "loss": 0.0703, "step": 489 }, { "epoch": 4.746987951807229, "grad_norm": 53.72925567626953, "learning_rate": 4.762135922330097e-05, "loss": 0.2063, "step": 490 }, { "epoch": 4.756626506024096, "grad_norm": 6.8379034996032715, "learning_rate": 4.7616504854368935e-05, "loss": 0.296, "step": 491 }, { "epoch": 4.766265060240964, "grad_norm": 6.302413463592529, "learning_rate": 4.761165048543689e-05, "loss": 0.5587, "step": 492 }, { "epoch": 4.775903614457832, "grad_norm": 3.1472277641296387, "learning_rate": 4.760679611650486e-05, "loss": 0.2238, "step": 493 }, { "epoch": 4.785542168674699, "grad_norm": 4.399823188781738, "learning_rate": 4.760194174757282e-05, "loss": 0.1301, "step": 494 }, { "epoch": 4.795180722891566, "grad_norm": 4.216440200805664, "learning_rate": 4.759708737864078e-05, "loss": 0.3646, "step": 495 }, { "epoch": 4.804819277108434, "grad_norm": 8.250653266906738, "learning_rate": 4.7592233009708744e-05, "loss": 0.3273, "step": 496 }, { "epoch": 4.814457831325301, "grad_norm": 10.920014381408691, "learning_rate": 4.75873786407767e-05, "loss": 0.2714, "step": 497 }, { "epoch": 4.824096385542169, "grad_norm": 1.625805139541626, "learning_rate": 4.7582524271844667e-05, "loss": 0.3085, "step": 498 }, { "epoch": 4.833734939759037, "grad_norm": 1.4361283779144287, "learning_rate": 4.7577669902912624e-05, "loss": 0.164, "step": 499 }, { "epoch": 4.843373493975903, "grad_norm": 7.2461628913879395, "learning_rate": 4.757281553398059e-05, "loss": 0.528, "step": 500 }, { "epoch": 4.853012048192771, "grad_norm": 4.7429633140563965, "learning_rate": 4.7567961165048547e-05, "loss": 0.3941, "step": 501 }, { "epoch": 4.862650602409639, "grad_norm": 7.92336893081665, "learning_rate": 4.756310679611651e-05, "loss": 0.2788, "step": 502 }, { "epoch": 4.872289156626506, "grad_norm": 2.3442676067352295, "learning_rate": 4.755825242718447e-05, "loss": 0.1698, "step": 503 }, { "epoch": 4.881927710843374, "grad_norm": 7.419341564178467, "learning_rate": 4.755339805825243e-05, "loss": 0.2554, "step": 504 }, { "epoch": 4.891566265060241, "grad_norm": 1.3446584939956665, "learning_rate": 4.754854368932039e-05, "loss": 0.1721, "step": 505 }, { "epoch": 4.901204819277108, "grad_norm": 4.389260292053223, "learning_rate": 4.754368932038835e-05, "loss": 0.2741, "step": 506 }, { "epoch": 4.910843373493976, "grad_norm": 2.501661777496338, "learning_rate": 4.7538834951456313e-05, "loss": 0.2268, "step": 507 }, { "epoch": 4.920481927710844, "grad_norm": 4.686625003814697, "learning_rate": 4.753398058252427e-05, "loss": 0.1691, "step": 508 }, { "epoch": 4.930120481927711, "grad_norm": 1.0261642932891846, "learning_rate": 4.7529126213592236e-05, "loss": 0.1787, "step": 509 }, { "epoch": 4.9397590361445785, "grad_norm": 8.389043807983398, "learning_rate": 4.7524271844660193e-05, "loss": 0.4076, "step": 510 }, { "epoch": 4.949397590361446, "grad_norm": 14.262243270874023, "learning_rate": 4.751941747572816e-05, "loss": 0.1135, "step": 511 }, { "epoch": 4.959036144578313, "grad_norm": 2.1697404384613037, "learning_rate": 4.7514563106796116e-05, "loss": 0.174, "step": 512 }, { "epoch": 4.968674698795181, "grad_norm": 4.01217794418335, "learning_rate": 4.750970873786408e-05, "loss": 0.1956, "step": 513 }, { "epoch": 4.978313253012049, "grad_norm": 21.94748306274414, "learning_rate": 4.750485436893204e-05, "loss": 0.4294, "step": 514 }, { "epoch": 4.9879518072289155, "grad_norm": 3.770840644836426, "learning_rate": 4.75e-05, "loss": 0.2801, "step": 515 }, { "epoch": 5.0036144578313255, "grad_norm": 2.282386541366577, "learning_rate": 4.749514563106796e-05, "loss": 0.1927, "step": 516 }, { "epoch": 5.013253012048192, "grad_norm": 3.9907848834991455, "learning_rate": 4.7490291262135925e-05, "loss": 0.2588, "step": 517 }, { "epoch": 5.02289156626506, "grad_norm": 4.138058662414551, "learning_rate": 4.748543689320389e-05, "loss": 0.3325, "step": 518 }, { "epoch": 5.032530120481928, "grad_norm": 3.050960063934326, "learning_rate": 4.748058252427185e-05, "loss": 0.2275, "step": 519 }, { "epoch": 5.042168674698795, "grad_norm": 8.700907707214355, "learning_rate": 4.747572815533981e-05, "loss": 0.3219, "step": 520 }, { "epoch": 5.051807228915663, "grad_norm": 3.229574680328369, "learning_rate": 4.747087378640777e-05, "loss": 0.3417, "step": 521 }, { "epoch": 5.06144578313253, "grad_norm": 3.1844496726989746, "learning_rate": 4.7466019417475734e-05, "loss": 0.3663, "step": 522 }, { "epoch": 5.071084337349397, "grad_norm": 3.8453056812286377, "learning_rate": 4.746116504854369e-05, "loss": 0.4332, "step": 523 }, { "epoch": 5.080722891566265, "grad_norm": 2.213808059692383, "learning_rate": 4.7456310679611656e-05, "loss": 0.3359, "step": 524 }, { "epoch": 5.090361445783133, "grad_norm": 2.095156192779541, "learning_rate": 4.7451456310679614e-05, "loss": 0.1737, "step": 525 }, { "epoch": 5.1, "grad_norm": 11.841104507446289, "learning_rate": 4.744660194174758e-05, "loss": 0.3201, "step": 526 }, { "epoch": 5.109638554216867, "grad_norm": 6.668193817138672, "learning_rate": 4.7441747572815536e-05, "loss": 0.2961, "step": 527 }, { "epoch": 5.119277108433735, "grad_norm": 2.418901205062866, "learning_rate": 4.74368932038835e-05, "loss": 0.2983, "step": 528 }, { "epoch": 5.128915662650602, "grad_norm": 3.297058343887329, "learning_rate": 4.743203883495146e-05, "loss": 0.197, "step": 529 }, { "epoch": 5.13855421686747, "grad_norm": 22.413698196411133, "learning_rate": 4.7427184466019416e-05, "loss": 0.5006, "step": 530 }, { "epoch": 5.148192771084338, "grad_norm": 10.084492683410645, "learning_rate": 4.742233009708738e-05, "loss": 0.2916, "step": 531 }, { "epoch": 5.1578313253012045, "grad_norm": 3.491637945175171, "learning_rate": 4.741747572815534e-05, "loss": 0.3224, "step": 532 }, { "epoch": 5.167469879518072, "grad_norm": 5.171361446380615, "learning_rate": 4.74126213592233e-05, "loss": 0.2718, "step": 533 }, { "epoch": 5.17710843373494, "grad_norm": 6.2447829246521, "learning_rate": 4.740776699029126e-05, "loss": 0.3576, "step": 534 }, { "epoch": 5.186746987951807, "grad_norm": 2.1757843494415283, "learning_rate": 4.7402912621359226e-05, "loss": 0.2126, "step": 535 }, { "epoch": 5.196385542168675, "grad_norm": 6.777596950531006, "learning_rate": 4.739805825242718e-05, "loss": 0.3627, "step": 536 }, { "epoch": 5.206024096385542, "grad_norm": 3.3118045330047607, "learning_rate": 4.739320388349515e-05, "loss": 0.1828, "step": 537 }, { "epoch": 5.215662650602409, "grad_norm": 6.0190253257751465, "learning_rate": 4.7388349514563106e-05, "loss": 0.4115, "step": 538 }, { "epoch": 5.225301204819277, "grad_norm": 2.909242630004883, "learning_rate": 4.738349514563107e-05, "loss": 0.1799, "step": 539 }, { "epoch": 5.234939759036145, "grad_norm": 2.3271608352661133, "learning_rate": 4.737864077669903e-05, "loss": 0.2969, "step": 540 }, { "epoch": 5.244578313253012, "grad_norm": 2.5888850688934326, "learning_rate": 4.737378640776699e-05, "loss": 0.1739, "step": 541 }, { "epoch": 5.2542168674698795, "grad_norm": 6.855090141296387, "learning_rate": 4.736893203883495e-05, "loss": 0.1887, "step": 542 }, { "epoch": 5.263855421686747, "grad_norm": 6.452843189239502, "learning_rate": 4.7364077669902915e-05, "loss": 0.1944, "step": 543 }, { "epoch": 5.273493975903614, "grad_norm": 2.189025402069092, "learning_rate": 4.735922330097088e-05, "loss": 0.2579, "step": 544 }, { "epoch": 5.283132530120482, "grad_norm": 6.604395866394043, "learning_rate": 4.735436893203884e-05, "loss": 0.3347, "step": 545 }, { "epoch": 5.29277108433735, "grad_norm": 4.606276988983154, "learning_rate": 4.73495145631068e-05, "loss": 0.3765, "step": 546 }, { "epoch": 5.3024096385542165, "grad_norm": 21.141279220581055, "learning_rate": 4.734466019417476e-05, "loss": 0.272, "step": 547 }, { "epoch": 5.312048192771084, "grad_norm": 12.497819900512695, "learning_rate": 4.7339805825242724e-05, "loss": 0.3898, "step": 548 }, { "epoch": 5.321686746987952, "grad_norm": 25.39984703063965, "learning_rate": 4.733495145631068e-05, "loss": 0.3824, "step": 549 }, { "epoch": 5.331325301204819, "grad_norm": 3.193082094192505, "learning_rate": 4.7330097087378646e-05, "loss": 0.2577, "step": 550 }, { "epoch": 5.340963855421687, "grad_norm": 7.330043315887451, "learning_rate": 4.7325242718446604e-05, "loss": 0.2387, "step": 551 }, { "epoch": 5.3506024096385545, "grad_norm": 4.145134449005127, "learning_rate": 4.732038834951457e-05, "loss": 0.2278, "step": 552 }, { "epoch": 5.360240963855421, "grad_norm": 4.272213459014893, "learning_rate": 4.7315533980582526e-05, "loss": 0.1947, "step": 553 }, { "epoch": 5.369879518072289, "grad_norm": 5.619327068328857, "learning_rate": 4.731067961165049e-05, "loss": 0.2286, "step": 554 }, { "epoch": 5.379518072289157, "grad_norm": 4.237926006317139, "learning_rate": 4.730582524271845e-05, "loss": 0.2686, "step": 555 }, { "epoch": 5.389156626506024, "grad_norm": 12.60412883758545, "learning_rate": 4.730097087378641e-05, "loss": 0.289, "step": 556 }, { "epoch": 5.3987951807228916, "grad_norm": 28.29212760925293, "learning_rate": 4.729611650485437e-05, "loss": 0.1585, "step": 557 }, { "epoch": 5.408433734939759, "grad_norm": 5.42540168762207, "learning_rate": 4.729126213592233e-05, "loss": 0.1695, "step": 558 }, { "epoch": 5.418072289156626, "grad_norm": 4.239876747131348, "learning_rate": 4.728640776699029e-05, "loss": 0.2568, "step": 559 }, { "epoch": 5.427710843373494, "grad_norm": 10.219941139221191, "learning_rate": 4.728155339805825e-05, "loss": 0.4778, "step": 560 }, { "epoch": 5.437349397590362, "grad_norm": 12.00943660736084, "learning_rate": 4.7276699029126215e-05, "loss": 0.2765, "step": 561 }, { "epoch": 5.446987951807229, "grad_norm": 7.733757495880127, "learning_rate": 4.727184466019417e-05, "loss": 0.4012, "step": 562 }, { "epoch": 5.456626506024096, "grad_norm": 0.9454633593559265, "learning_rate": 4.726699029126214e-05, "loss": 0.1198, "step": 563 }, { "epoch": 5.466265060240964, "grad_norm": 4.734395980834961, "learning_rate": 4.7262135922330095e-05, "loss": 0.3342, "step": 564 }, { "epoch": 5.475903614457831, "grad_norm": 1.615465760231018, "learning_rate": 4.725728155339806e-05, "loss": 0.1746, "step": 565 }, { "epoch": 5.485542168674699, "grad_norm": 3.369479179382324, "learning_rate": 4.725242718446602e-05, "loss": 0.2601, "step": 566 }, { "epoch": 5.495180722891567, "grad_norm": 5.686043739318848, "learning_rate": 4.724757281553398e-05, "loss": 0.2704, "step": 567 }, { "epoch": 5.504819277108433, "grad_norm": 2.9258170127868652, "learning_rate": 4.724271844660194e-05, "loss": 0.124, "step": 568 }, { "epoch": 5.514457831325301, "grad_norm": 2.0521063804626465, "learning_rate": 4.7237864077669904e-05, "loss": 0.2826, "step": 569 }, { "epoch": 5.524096385542169, "grad_norm": 1.8690638542175293, "learning_rate": 4.723300970873787e-05, "loss": 0.3174, "step": 570 }, { "epoch": 5.533734939759036, "grad_norm": 4.3231377601623535, "learning_rate": 4.722815533980583e-05, "loss": 0.2084, "step": 571 }, { "epoch": 5.543373493975904, "grad_norm": 4.7133588790893555, "learning_rate": 4.722330097087379e-05, "loss": 0.3874, "step": 572 }, { "epoch": 5.553012048192771, "grad_norm": 10.317421913146973, "learning_rate": 4.721844660194175e-05, "loss": 0.2639, "step": 573 }, { "epoch": 5.562650602409638, "grad_norm": 10.738027572631836, "learning_rate": 4.7213592233009714e-05, "loss": 0.3945, "step": 574 }, { "epoch": 5.572289156626506, "grad_norm": 10.268561363220215, "learning_rate": 4.720873786407767e-05, "loss": 0.4122, "step": 575 }, { "epoch": 5.581927710843374, "grad_norm": 2.3756887912750244, "learning_rate": 4.7203883495145636e-05, "loss": 0.1086, "step": 576 }, { "epoch": 5.591566265060241, "grad_norm": 3.3451125621795654, "learning_rate": 4.7199029126213594e-05, "loss": 0.3667, "step": 577 }, { "epoch": 5.6012048192771084, "grad_norm": 11.700827598571777, "learning_rate": 4.719417475728156e-05, "loss": 0.2824, "step": 578 }, { "epoch": 5.610843373493976, "grad_norm": 3.4391212463378906, "learning_rate": 4.7189320388349516e-05, "loss": 0.2147, "step": 579 }, { "epoch": 5.620481927710843, "grad_norm": 2.1455631256103516, "learning_rate": 4.718446601941748e-05, "loss": 0.2716, "step": 580 }, { "epoch": 5.630120481927711, "grad_norm": 3.901353120803833, "learning_rate": 4.717961165048544e-05, "loss": 0.2808, "step": 581 }, { "epoch": 5.639759036144579, "grad_norm": 2.0172948837280273, "learning_rate": 4.71747572815534e-05, "loss": 0.2519, "step": 582 }, { "epoch": 5.6493975903614455, "grad_norm": 3.33083438873291, "learning_rate": 4.716990291262136e-05, "loss": 0.4287, "step": 583 }, { "epoch": 5.659036144578313, "grad_norm": 3.2284653186798096, "learning_rate": 4.7165048543689325e-05, "loss": 0.5311, "step": 584 }, { "epoch": 5.668674698795181, "grad_norm": 3.083561897277832, "learning_rate": 4.716019417475728e-05, "loss": 0.2991, "step": 585 }, { "epoch": 5.678313253012048, "grad_norm": 8.177228927612305, "learning_rate": 4.715533980582524e-05, "loss": 0.3136, "step": 586 }, { "epoch": 5.687951807228916, "grad_norm": 3.8282432556152344, "learning_rate": 4.7150485436893205e-05, "loss": 0.4172, "step": 587 }, { "epoch": 5.6975903614457835, "grad_norm": 3.6085753440856934, "learning_rate": 4.714563106796116e-05, "loss": 0.4284, "step": 588 }, { "epoch": 5.70722891566265, "grad_norm": 2.3309924602508545, "learning_rate": 4.714077669902913e-05, "loss": 0.2105, "step": 589 }, { "epoch": 5.716867469879518, "grad_norm": 4.158708095550537, "learning_rate": 4.7135922330097085e-05, "loss": 0.4371, "step": 590 }, { "epoch": 5.726506024096386, "grad_norm": 3.1363871097564697, "learning_rate": 4.713106796116505e-05, "loss": 0.2276, "step": 591 }, { "epoch": 5.736144578313253, "grad_norm": 4.141551494598389, "learning_rate": 4.712621359223301e-05, "loss": 0.3555, "step": 592 }, { "epoch": 5.7457831325301205, "grad_norm": 4.082943916320801, "learning_rate": 4.712135922330097e-05, "loss": 0.2892, "step": 593 }, { "epoch": 5.755421686746988, "grad_norm": 4.130939960479736, "learning_rate": 4.7116504854368937e-05, "loss": 0.1921, "step": 594 }, { "epoch": 5.765060240963855, "grad_norm": 7.356468200683594, "learning_rate": 4.71116504854369e-05, "loss": 0.2988, "step": 595 }, { "epoch": 5.774698795180723, "grad_norm": 1.8935840129852295, "learning_rate": 4.710679611650486e-05, "loss": 0.2453, "step": 596 }, { "epoch": 5.784337349397591, "grad_norm": 2.4765913486480713, "learning_rate": 4.7101941747572817e-05, "loss": 0.3244, "step": 597 }, { "epoch": 5.793975903614458, "grad_norm": 9.680598258972168, "learning_rate": 4.709708737864078e-05, "loss": 0.4157, "step": 598 }, { "epoch": 5.803614457831325, "grad_norm": 2.855682849884033, "learning_rate": 4.709223300970874e-05, "loss": 0.2576, "step": 599 }, { "epoch": 5.813253012048193, "grad_norm": 3.978502035140991, "learning_rate": 4.7087378640776703e-05, "loss": 0.3854, "step": 600 }, { "epoch": 5.82289156626506, "grad_norm": 5.253439903259277, "learning_rate": 4.708252427184466e-05, "loss": 0.4906, "step": 601 }, { "epoch": 5.832530120481928, "grad_norm": 9.00416088104248, "learning_rate": 4.7077669902912626e-05, "loss": 0.2825, "step": 602 }, { "epoch": 5.8421686746987955, "grad_norm": 4.725515842437744, "learning_rate": 4.7072815533980583e-05, "loss": 0.267, "step": 603 }, { "epoch": 5.851807228915662, "grad_norm": 2.0062952041625977, "learning_rate": 4.706796116504855e-05, "loss": 0.2626, "step": 604 }, { "epoch": 5.86144578313253, "grad_norm": 4.290450572967529, "learning_rate": 4.7063106796116506e-05, "loss": 0.4559, "step": 605 }, { "epoch": 5.871084337349398, "grad_norm": 4.311503887176514, "learning_rate": 4.705825242718447e-05, "loss": 0.3295, "step": 606 }, { "epoch": 5.880722891566265, "grad_norm": 4.33099889755249, "learning_rate": 4.705339805825243e-05, "loss": 0.3473, "step": 607 }, { "epoch": 5.890361445783133, "grad_norm": 3.356210231781006, "learning_rate": 4.704854368932039e-05, "loss": 0.3806, "step": 608 }, { "epoch": 5.9, "grad_norm": 2.5847747325897217, "learning_rate": 4.704368932038835e-05, "loss": 0.2698, "step": 609 }, { "epoch": 5.909638554216867, "grad_norm": 4.08158016204834, "learning_rate": 4.7038834951456315e-05, "loss": 0.3208, "step": 610 }, { "epoch": 5.919277108433735, "grad_norm": 4.038919448852539, "learning_rate": 4.703398058252427e-05, "loss": 0.3264, "step": 611 }, { "epoch": 5.928915662650603, "grad_norm": 1.9158716201782227, "learning_rate": 4.702912621359224e-05, "loss": 0.3531, "step": 612 }, { "epoch": 5.93855421686747, "grad_norm": 3.83559513092041, "learning_rate": 4.7024271844660195e-05, "loss": 0.196, "step": 613 }, { "epoch": 5.948192771084337, "grad_norm": 15.866037368774414, "learning_rate": 4.701941747572815e-05, "loss": 0.363, "step": 614 }, { "epoch": 5.957831325301205, "grad_norm": 2.2197341918945312, "learning_rate": 4.701456310679612e-05, "loss": 0.2192, "step": 615 }, { "epoch": 5.967469879518072, "grad_norm": 2.248267889022827, "learning_rate": 4.7009708737864075e-05, "loss": 0.3204, "step": 616 }, { "epoch": 5.97710843373494, "grad_norm": 3.654074192047119, "learning_rate": 4.700485436893204e-05, "loss": 0.3416, "step": 617 }, { "epoch": 5.986746987951808, "grad_norm": 8.048283576965332, "learning_rate": 4.7e-05, "loss": 0.3682, "step": 618 }, { "epoch": 6.002409638554217, "grad_norm": 4.990579128265381, "learning_rate": 4.699514563106797e-05, "loss": 0.485, "step": 619 }, { "epoch": 6.0120481927710845, "grad_norm": 4.4218549728393555, "learning_rate": 4.6990291262135926e-05, "loss": 0.606, "step": 620 }, { "epoch": 6.021686746987951, "grad_norm": 2.5484702587127686, "learning_rate": 4.698543689320389e-05, "loss": 0.28, "step": 621 }, { "epoch": 6.031325301204819, "grad_norm": 3.606996536254883, "learning_rate": 4.698058252427185e-05, "loss": 0.2614, "step": 622 }, { "epoch": 6.040963855421687, "grad_norm": 5.550053596496582, "learning_rate": 4.697572815533981e-05, "loss": 0.2881, "step": 623 }, { "epoch": 6.050602409638554, "grad_norm": 9.581787109375, "learning_rate": 4.697087378640777e-05, "loss": 0.5484, "step": 624 }, { "epoch": 6.0602409638554215, "grad_norm": 3.318267583847046, "learning_rate": 4.696601941747573e-05, "loss": 0.3053, "step": 625 }, { "epoch": 6.069879518072289, "grad_norm": 10.991321563720703, "learning_rate": 4.696116504854369e-05, "loss": 0.6063, "step": 626 }, { "epoch": 6.079518072289156, "grad_norm": 4.313769817352295, "learning_rate": 4.695631067961165e-05, "loss": 0.4093, "step": 627 }, { "epoch": 6.089156626506024, "grad_norm": 4.167516231536865, "learning_rate": 4.6951456310679615e-05, "loss": 0.3735, "step": 628 }, { "epoch": 6.098795180722892, "grad_norm": 6.8690409660339355, "learning_rate": 4.694660194174757e-05, "loss": 0.4625, "step": 629 }, { "epoch": 6.108433734939759, "grad_norm": 7.424356460571289, "learning_rate": 4.694174757281554e-05, "loss": 0.3973, "step": 630 }, { "epoch": 6.118072289156626, "grad_norm": 2.868095874786377, "learning_rate": 4.6936893203883496e-05, "loss": 0.289, "step": 631 }, { "epoch": 6.127710843373494, "grad_norm": 3.601726531982422, "learning_rate": 4.693203883495146e-05, "loss": 0.2394, "step": 632 }, { "epoch": 6.137349397590361, "grad_norm": 3.0409858226776123, "learning_rate": 4.692718446601942e-05, "loss": 0.234, "step": 633 }, { "epoch": 6.146987951807229, "grad_norm": 15.212102890014648, "learning_rate": 4.692233009708738e-05, "loss": 0.4083, "step": 634 }, { "epoch": 6.156626506024097, "grad_norm": 10.962355613708496, "learning_rate": 4.691747572815534e-05, "loss": 0.2696, "step": 635 }, { "epoch": 6.166265060240963, "grad_norm": 6.064901351928711, "learning_rate": 4.6912621359223305e-05, "loss": 0.2914, "step": 636 }, { "epoch": 6.175903614457831, "grad_norm": 5.803147315979004, "learning_rate": 4.690776699029126e-05, "loss": 0.3014, "step": 637 }, { "epoch": 6.185542168674699, "grad_norm": 3.0587425231933594, "learning_rate": 4.690291262135923e-05, "loss": 0.1854, "step": 638 }, { "epoch": 6.195180722891566, "grad_norm": 4.632628917694092, "learning_rate": 4.6898058252427185e-05, "loss": 0.1271, "step": 639 }, { "epoch": 6.204819277108434, "grad_norm": 9.980729103088379, "learning_rate": 4.689320388349515e-05, "loss": 0.5117, "step": 640 }, { "epoch": 6.214457831325301, "grad_norm": 6.37877893447876, "learning_rate": 4.688834951456311e-05, "loss": 0.279, "step": 641 }, { "epoch": 6.224096385542168, "grad_norm": 2.7885565757751465, "learning_rate": 4.6883495145631065e-05, "loss": 0.2851, "step": 642 }, { "epoch": 6.233734939759036, "grad_norm": 3.205705404281616, "learning_rate": 4.687864077669903e-05, "loss": 0.3701, "step": 643 }, { "epoch": 6.243373493975904, "grad_norm": 2.465770721435547, "learning_rate": 4.6873786407766994e-05, "loss": 0.3111, "step": 644 }, { "epoch": 6.253012048192771, "grad_norm": 2.839329719543457, "learning_rate": 4.686893203883496e-05, "loss": 0.1702, "step": 645 }, { "epoch": 6.2626506024096384, "grad_norm": 3.9319772720336914, "learning_rate": 4.6864077669902916e-05, "loss": 0.2106, "step": 646 }, { "epoch": 6.272289156626506, "grad_norm": 5.414967060089111, "learning_rate": 4.685922330097088e-05, "loss": 0.2433, "step": 647 }, { "epoch": 6.281927710843373, "grad_norm": 2.001967668533325, "learning_rate": 4.685436893203884e-05, "loss": 0.2257, "step": 648 }, { "epoch": 6.291566265060241, "grad_norm": 4.128058433532715, "learning_rate": 4.68495145631068e-05, "loss": 0.3189, "step": 649 }, { "epoch": 6.301204819277109, "grad_norm": 1.5946038961410522, "learning_rate": 4.684466019417476e-05, "loss": 0.1898, "step": 650 }, { "epoch": 6.3108433734939755, "grad_norm": 2.195739269256592, "learning_rate": 4.6839805825242725e-05, "loss": 0.0986, "step": 651 }, { "epoch": 6.320481927710843, "grad_norm": 5.679908752441406, "learning_rate": 4.683495145631068e-05, "loss": 0.4198, "step": 652 }, { "epoch": 6.330120481927711, "grad_norm": 5.280510902404785, "learning_rate": 4.683009708737864e-05, "loss": 0.3206, "step": 653 }, { "epoch": 6.339759036144578, "grad_norm": 4.77690315246582, "learning_rate": 4.6825242718446605e-05, "loss": 0.2998, "step": 654 }, { "epoch": 6.349397590361446, "grad_norm": 4.280205249786377, "learning_rate": 4.682038834951456e-05, "loss": 0.4007, "step": 655 }, { "epoch": 6.3590361445783135, "grad_norm": 2.8505682945251465, "learning_rate": 4.681553398058253e-05, "loss": 0.2765, "step": 656 }, { "epoch": 6.36867469879518, "grad_norm": 6.6777167320251465, "learning_rate": 4.6810679611650485e-05, "loss": 0.3413, "step": 657 }, { "epoch": 6.378313253012048, "grad_norm": 9.24048137664795, "learning_rate": 4.680582524271845e-05, "loss": 0.2228, "step": 658 }, { "epoch": 6.387951807228916, "grad_norm": 3.5978591442108154, "learning_rate": 4.680097087378641e-05, "loss": 0.3754, "step": 659 }, { "epoch": 6.397590361445783, "grad_norm": 2.2877187728881836, "learning_rate": 4.679611650485437e-05, "loss": 0.2241, "step": 660 }, { "epoch": 6.4072289156626505, "grad_norm": 2.453153371810913, "learning_rate": 4.679126213592233e-05, "loss": 0.2425, "step": 661 }, { "epoch": 6.416867469879518, "grad_norm": 4.080256462097168, "learning_rate": 4.6786407766990294e-05, "loss": 0.4131, "step": 662 }, { "epoch": 6.426506024096385, "grad_norm": 2.092622995376587, "learning_rate": 4.678155339805825e-05, "loss": 0.0977, "step": 663 }, { "epoch": 6.436144578313253, "grad_norm": 1.5799105167388916, "learning_rate": 4.677669902912622e-05, "loss": 0.1376, "step": 664 }, { "epoch": 6.445783132530121, "grad_norm": 5.631826877593994, "learning_rate": 4.6771844660194174e-05, "loss": 0.3503, "step": 665 }, { "epoch": 6.455421686746988, "grad_norm": 3.6474742889404297, "learning_rate": 4.676699029126214e-05, "loss": 0.3144, "step": 666 }, { "epoch": 6.465060240963855, "grad_norm": 26.68826675415039, "learning_rate": 4.67621359223301e-05, "loss": 0.4842, "step": 667 }, { "epoch": 6.474698795180723, "grad_norm": 26.203472137451172, "learning_rate": 4.675728155339806e-05, "loss": 0.392, "step": 668 }, { "epoch": 6.48433734939759, "grad_norm": 5.671624183654785, "learning_rate": 4.6752427184466026e-05, "loss": 0.4869, "step": 669 }, { "epoch": 6.493975903614458, "grad_norm": 9.70796012878418, "learning_rate": 4.6747572815533984e-05, "loss": 0.344, "step": 670 }, { "epoch": 6.5036144578313255, "grad_norm": 15.602771759033203, "learning_rate": 4.674271844660195e-05, "loss": 0.3321, "step": 671 }, { "epoch": 6.513253012048192, "grad_norm": 3.386977434158325, "learning_rate": 4.6737864077669906e-05, "loss": 0.4252, "step": 672 }, { "epoch": 6.52289156626506, "grad_norm": 4.324173450469971, "learning_rate": 4.673300970873787e-05, "loss": 0.3967, "step": 673 }, { "epoch": 6.532530120481928, "grad_norm": 3.09676194190979, "learning_rate": 4.672815533980583e-05, "loss": 0.3259, "step": 674 }, { "epoch": 6.542168674698795, "grad_norm": 6.3836588859558105, "learning_rate": 4.672330097087379e-05, "loss": 0.4866, "step": 675 }, { "epoch": 6.551807228915663, "grad_norm": 3.378007173538208, "learning_rate": 4.671844660194175e-05, "loss": 0.2246, "step": 676 }, { "epoch": 6.56144578313253, "grad_norm": 5.501622676849365, "learning_rate": 4.6713592233009715e-05, "loss": 0.4313, "step": 677 }, { "epoch": 6.571084337349397, "grad_norm": 35.951271057128906, "learning_rate": 4.670873786407767e-05, "loss": 0.1944, "step": 678 }, { "epoch": 6.580722891566265, "grad_norm": 4.173672676086426, "learning_rate": 4.670388349514563e-05, "loss": 0.3749, "step": 679 }, { "epoch": 6.590361445783133, "grad_norm": 4.119150638580322, "learning_rate": 4.6699029126213595e-05, "loss": 0.2168, "step": 680 }, { "epoch": 6.6, "grad_norm": 4.698493957519531, "learning_rate": 4.669417475728155e-05, "loss": 0.3677, "step": 681 }, { "epoch": 6.609638554216867, "grad_norm": 2.561603307723999, "learning_rate": 4.668932038834952e-05, "loss": 0.2885, "step": 682 }, { "epoch": 6.619277108433735, "grad_norm": 2.1276426315307617, "learning_rate": 4.6684466019417475e-05, "loss": 0.1859, "step": 683 }, { "epoch": 6.628915662650602, "grad_norm": 3.9785878658294678, "learning_rate": 4.667961165048544e-05, "loss": 0.4042, "step": 684 }, { "epoch": 6.63855421686747, "grad_norm": 14.360816955566406, "learning_rate": 4.66747572815534e-05, "loss": 0.2694, "step": 685 }, { "epoch": 6.648192771084338, "grad_norm": 2.279285430908203, "learning_rate": 4.666990291262136e-05, "loss": 0.2075, "step": 686 }, { "epoch": 6.6578313253012045, "grad_norm": 4.3952860832214355, "learning_rate": 4.666504854368932e-05, "loss": 0.4612, "step": 687 }, { "epoch": 6.667469879518072, "grad_norm": 13.44919490814209, "learning_rate": 4.6660194174757284e-05, "loss": 0.3158, "step": 688 }, { "epoch": 6.67710843373494, "grad_norm": 4.8638014793396, "learning_rate": 4.665533980582524e-05, "loss": 0.3647, "step": 689 }, { "epoch": 6.686746987951807, "grad_norm": 7.963072776794434, "learning_rate": 4.6650485436893207e-05, "loss": 0.4365, "step": 690 }, { "epoch": 6.696385542168675, "grad_norm": 12.648860931396484, "learning_rate": 4.6645631067961164e-05, "loss": 0.4702, "step": 691 }, { "epoch": 6.706024096385542, "grad_norm": 8.27430248260498, "learning_rate": 4.664077669902913e-05, "loss": 0.384, "step": 692 }, { "epoch": 6.715662650602409, "grad_norm": 1.61405611038208, "learning_rate": 4.6635922330097087e-05, "loss": 0.1263, "step": 693 }, { "epoch": 6.725301204819277, "grad_norm": 5.315623760223389, "learning_rate": 4.663106796116505e-05, "loss": 0.3992, "step": 694 }, { "epoch": 6.734939759036145, "grad_norm": 10.996282577514648, "learning_rate": 4.6626213592233016e-05, "loss": 0.3503, "step": 695 }, { "epoch": 6.744578313253012, "grad_norm": 4.747982978820801, "learning_rate": 4.662135922330097e-05, "loss": 0.2359, "step": 696 }, { "epoch": 6.7542168674698795, "grad_norm": 9.64735221862793, "learning_rate": 4.661650485436894e-05, "loss": 0.4141, "step": 697 }, { "epoch": 6.763855421686747, "grad_norm": 7.7083282470703125, "learning_rate": 4.6611650485436896e-05, "loss": 0.3986, "step": 698 }, { "epoch": 6.773493975903614, "grad_norm": 6.988386631011963, "learning_rate": 4.660679611650486e-05, "loss": 0.3085, "step": 699 }, { "epoch": 6.783132530120482, "grad_norm": 14.194134712219238, "learning_rate": 4.660194174757282e-05, "loss": 0.2718, "step": 700 }, { "epoch": 6.79277108433735, "grad_norm": 30.64933967590332, "learning_rate": 4.659708737864078e-05, "loss": 0.3093, "step": 701 }, { "epoch": 6.8024096385542165, "grad_norm": 4.154168128967285, "learning_rate": 4.659223300970874e-05, "loss": 0.2468, "step": 702 }, { "epoch": 6.812048192771084, "grad_norm": 3.0394790172576904, "learning_rate": 4.6587378640776705e-05, "loss": 0.3191, "step": 703 }, { "epoch": 6.821686746987952, "grad_norm": 4.36230993270874, "learning_rate": 4.658252427184466e-05, "loss": 0.3043, "step": 704 }, { "epoch": 6.831325301204819, "grad_norm": 5.122514247894287, "learning_rate": 4.657766990291263e-05, "loss": 0.241, "step": 705 }, { "epoch": 6.840963855421687, "grad_norm": 3.6510119438171387, "learning_rate": 4.6572815533980585e-05, "loss": 0.1767, "step": 706 }, { "epoch": 6.8506024096385545, "grad_norm": 5.779049873352051, "learning_rate": 4.656796116504854e-05, "loss": 0.2994, "step": 707 }, { "epoch": 6.860240963855421, "grad_norm": 5.692295551300049, "learning_rate": 4.656310679611651e-05, "loss": 0.2856, "step": 708 }, { "epoch": 6.869879518072289, "grad_norm": 6.614366054534912, "learning_rate": 4.6558252427184465e-05, "loss": 0.4651, "step": 709 }, { "epoch": 6.879518072289157, "grad_norm": 5.07175874710083, "learning_rate": 4.655339805825243e-05, "loss": 0.1921, "step": 710 }, { "epoch": 6.889156626506024, "grad_norm": 3.417451858520508, "learning_rate": 4.654854368932039e-05, "loss": 0.3676, "step": 711 }, { "epoch": 6.8987951807228916, "grad_norm": 1.7543699741363525, "learning_rate": 4.654368932038835e-05, "loss": 0.14, "step": 712 }, { "epoch": 6.908433734939759, "grad_norm": 4.9626946449279785, "learning_rate": 4.653883495145631e-05, "loss": 0.3837, "step": 713 }, { "epoch": 6.918072289156626, "grad_norm": 3.641284465789795, "learning_rate": 4.6533980582524274e-05, "loss": 0.2326, "step": 714 }, { "epoch": 6.927710843373494, "grad_norm": 3.941368579864502, "learning_rate": 4.652912621359223e-05, "loss": 0.3322, "step": 715 }, { "epoch": 6.937349397590362, "grad_norm": 1.76639986038208, "learning_rate": 4.6524271844660196e-05, "loss": 0.2301, "step": 716 }, { "epoch": 6.946987951807229, "grad_norm": 5.117557525634766, "learning_rate": 4.6519417475728154e-05, "loss": 0.3497, "step": 717 }, { "epoch": 6.956626506024096, "grad_norm": 6.635610103607178, "learning_rate": 4.651456310679612e-05, "loss": 0.4856, "step": 718 }, { "epoch": 6.966265060240964, "grad_norm": 4.491821765899658, "learning_rate": 4.6509708737864076e-05, "loss": 0.3854, "step": 719 }, { "epoch": 6.975903614457831, "grad_norm": 4.756279945373535, "learning_rate": 4.650485436893204e-05, "loss": 0.2308, "step": 720 }, { "epoch": 6.985542168674699, "grad_norm": 5.728425025939941, "learning_rate": 4.6500000000000005e-05, "loss": 0.1469, "step": 721 }, { "epoch": 7.001204819277109, "grad_norm": 3.682901620864868, "learning_rate": 4.649514563106796e-05, "loss": 0.1323, "step": 722 }, { "epoch": 7.010843373493976, "grad_norm": 5.5580902099609375, "learning_rate": 4.649029126213593e-05, "loss": 0.2772, "step": 723 }, { "epoch": 7.0204819277108435, "grad_norm": 7.942682266235352, "learning_rate": 4.6485436893203885e-05, "loss": 0.5424, "step": 724 }, { "epoch": 7.030120481927711, "grad_norm": 9.540014266967773, "learning_rate": 4.648058252427185e-05, "loss": 0.4479, "step": 725 }, { "epoch": 7.039759036144578, "grad_norm": 3.4958643913269043, "learning_rate": 4.647572815533981e-05, "loss": 0.2949, "step": 726 }, { "epoch": 7.049397590361446, "grad_norm": 2.914518117904663, "learning_rate": 4.647087378640777e-05, "loss": 0.3006, "step": 727 }, { "epoch": 7.059036144578314, "grad_norm": 2.548492908477783, "learning_rate": 4.646601941747573e-05, "loss": 0.3638, "step": 728 }, { "epoch": 7.0686746987951805, "grad_norm": 5.2692365646362305, "learning_rate": 4.6461165048543695e-05, "loss": 0.4021, "step": 729 }, { "epoch": 7.078313253012048, "grad_norm": 5.644729137420654, "learning_rate": 4.645631067961165e-05, "loss": 0.262, "step": 730 }, { "epoch": 7.087951807228916, "grad_norm": 5.060497283935547, "learning_rate": 4.645145631067962e-05, "loss": 0.3989, "step": 731 }, { "epoch": 7.097590361445783, "grad_norm": 9.342915534973145, "learning_rate": 4.6446601941747575e-05, "loss": 0.4425, "step": 732 }, { "epoch": 7.107228915662651, "grad_norm": 3.952117443084717, "learning_rate": 4.644174757281554e-05, "loss": 0.3747, "step": 733 }, { "epoch": 7.1168674698795185, "grad_norm": 2.8605847358703613, "learning_rate": 4.64368932038835e-05, "loss": 0.2498, "step": 734 }, { "epoch": 7.126506024096385, "grad_norm": 4.5569024085998535, "learning_rate": 4.6432038834951455e-05, "loss": 0.305, "step": 735 }, { "epoch": 7.136144578313253, "grad_norm": 8.111128807067871, "learning_rate": 4.642718446601942e-05, "loss": 0.4753, "step": 736 }, { "epoch": 7.145783132530121, "grad_norm": 10.900412559509277, "learning_rate": 4.642233009708738e-05, "loss": 0.2118, "step": 737 }, { "epoch": 7.155421686746988, "grad_norm": 3.529585361480713, "learning_rate": 4.641747572815534e-05, "loss": 0.4003, "step": 738 }, { "epoch": 7.1650602409638555, "grad_norm": 18.547353744506836, "learning_rate": 4.64126213592233e-05, "loss": 0.5027, "step": 739 }, { "epoch": 7.174698795180723, "grad_norm": 2.203418254852295, "learning_rate": 4.6407766990291264e-05, "loss": 0.2437, "step": 740 }, { "epoch": 7.18433734939759, "grad_norm": 6.167112827301025, "learning_rate": 4.640291262135922e-05, "loss": 0.3551, "step": 741 }, { "epoch": 7.193975903614458, "grad_norm": 4.296331882476807, "learning_rate": 4.6398058252427186e-05, "loss": 0.2621, "step": 742 }, { "epoch": 7.203614457831326, "grad_norm": 6.301990032196045, "learning_rate": 4.6393203883495144e-05, "loss": 0.2826, "step": 743 }, { "epoch": 7.213253012048193, "grad_norm": 3.3916378021240234, "learning_rate": 4.638834951456311e-05, "loss": 0.5092, "step": 744 }, { "epoch": 7.22289156626506, "grad_norm": 5.5566864013671875, "learning_rate": 4.638349514563107e-05, "loss": 0.1938, "step": 745 }, { "epoch": 7.232530120481928, "grad_norm": 3.9917285442352295, "learning_rate": 4.637864077669903e-05, "loss": 0.2836, "step": 746 }, { "epoch": 7.242168674698795, "grad_norm": 3.9249439239501953, "learning_rate": 4.6373786407766995e-05, "loss": 0.2076, "step": 747 }, { "epoch": 7.251807228915663, "grad_norm": 2.575552225112915, "learning_rate": 4.636893203883495e-05, "loss": 0.3169, "step": 748 }, { "epoch": 7.2614457831325305, "grad_norm": 2.9979746341705322, "learning_rate": 4.636407766990292e-05, "loss": 0.3399, "step": 749 }, { "epoch": 7.271084337349397, "grad_norm": 2.8914151191711426, "learning_rate": 4.6359223300970875e-05, "loss": 0.3223, "step": 750 }, { "epoch": 7.280722891566265, "grad_norm": 4.3989129066467285, "learning_rate": 4.635436893203884e-05, "loss": 0.2907, "step": 751 }, { "epoch": 7.290361445783133, "grad_norm": 4.5147833824157715, "learning_rate": 4.63495145631068e-05, "loss": 0.4329, "step": 752 }, { "epoch": 7.3, "grad_norm": 3.9140822887420654, "learning_rate": 4.634466019417476e-05, "loss": 0.4346, "step": 753 }, { "epoch": 7.309638554216868, "grad_norm": 3.6095454692840576, "learning_rate": 4.633980582524272e-05, "loss": 0.238, "step": 754 }, { "epoch": 7.3192771084337345, "grad_norm": 4.601648330688477, "learning_rate": 4.6334951456310684e-05, "loss": 0.3526, "step": 755 }, { "epoch": 7.328915662650602, "grad_norm": 3.868370532989502, "learning_rate": 4.633009708737864e-05, "loss": 0.2481, "step": 756 }, { "epoch": 7.33855421686747, "grad_norm": 11.141709327697754, "learning_rate": 4.632524271844661e-05, "loss": 0.38, "step": 757 }, { "epoch": 7.348192771084337, "grad_norm": 4.886325836181641, "learning_rate": 4.6320388349514564e-05, "loss": 0.3871, "step": 758 }, { "epoch": 7.357831325301205, "grad_norm": 3.922675848007202, "learning_rate": 4.631553398058253e-05, "loss": 0.3839, "step": 759 }, { "epoch": 7.367469879518072, "grad_norm": 5.227161407470703, "learning_rate": 4.631067961165049e-05, "loss": 0.2868, "step": 760 }, { "epoch": 7.377108433734939, "grad_norm": 3.4774065017700195, "learning_rate": 4.630582524271845e-05, "loss": 0.5116, "step": 761 }, { "epoch": 7.386746987951807, "grad_norm": 2.682976245880127, "learning_rate": 4.630097087378641e-05, "loss": 0.3034, "step": 762 }, { "epoch": 7.396385542168675, "grad_norm": 2.5004987716674805, "learning_rate": 4.629611650485437e-05, "loss": 0.2832, "step": 763 }, { "epoch": 7.406024096385542, "grad_norm": 6.430929183959961, "learning_rate": 4.629126213592233e-05, "loss": 0.5887, "step": 764 }, { "epoch": 7.4156626506024095, "grad_norm": 2.0148351192474365, "learning_rate": 4.628640776699029e-05, "loss": 0.2177, "step": 765 }, { "epoch": 7.425301204819277, "grad_norm": 3.708448648452759, "learning_rate": 4.6281553398058254e-05, "loss": 0.2909, "step": 766 }, { "epoch": 7.434939759036144, "grad_norm": 4.323432445526123, "learning_rate": 4.627669902912621e-05, "loss": 0.4141, "step": 767 }, { "epoch": 7.444578313253012, "grad_norm": 5.83163595199585, "learning_rate": 4.6271844660194176e-05, "loss": 0.2547, "step": 768 }, { "epoch": 7.45421686746988, "grad_norm": 5.339633464813232, "learning_rate": 4.6266990291262134e-05, "loss": 0.3021, "step": 769 }, { "epoch": 7.4638554216867465, "grad_norm": 10.2047758102417, "learning_rate": 4.6262135922330105e-05, "loss": 0.3152, "step": 770 }, { "epoch": 7.473493975903614, "grad_norm": 2.533175468444824, "learning_rate": 4.625728155339806e-05, "loss": 0.1848, "step": 771 }, { "epoch": 7.483132530120482, "grad_norm": 4.144209384918213, "learning_rate": 4.625242718446603e-05, "loss": 0.211, "step": 772 }, { "epoch": 7.492771084337349, "grad_norm": 10.097362518310547, "learning_rate": 4.6247572815533985e-05, "loss": 0.3255, "step": 773 }, { "epoch": 7.502409638554217, "grad_norm": 2.844818353652954, "learning_rate": 4.624271844660194e-05, "loss": 0.1929, "step": 774 }, { "epoch": 7.5120481927710845, "grad_norm": 3.879234552383423, "learning_rate": 4.623786407766991e-05, "loss": 0.6291, "step": 775 }, { "epoch": 7.521686746987951, "grad_norm": 2.335479259490967, "learning_rate": 4.6233009708737865e-05, "loss": 0.1603, "step": 776 }, { "epoch": 7.531325301204819, "grad_norm": 3.739426851272583, "learning_rate": 4.622815533980583e-05, "loss": 0.2823, "step": 777 }, { "epoch": 7.540963855421687, "grad_norm": 3.5187039375305176, "learning_rate": 4.622330097087379e-05, "loss": 0.3442, "step": 778 }, { "epoch": 7.550602409638554, "grad_norm": 6.026670932769775, "learning_rate": 4.621844660194175e-05, "loss": 0.2355, "step": 779 }, { "epoch": 7.5602409638554215, "grad_norm": 4.383297920227051, "learning_rate": 4.621359223300971e-05, "loss": 0.4151, "step": 780 }, { "epoch": 7.569879518072289, "grad_norm": 3.436230421066284, "learning_rate": 4.6208737864077674e-05, "loss": 0.342, "step": 781 }, { "epoch": 7.579518072289156, "grad_norm": 5.340717792510986, "learning_rate": 4.620388349514563e-05, "loss": 0.2707, "step": 782 }, { "epoch": 7.589156626506024, "grad_norm": 3.618803024291992, "learning_rate": 4.6199029126213596e-05, "loss": 0.388, "step": 783 }, { "epoch": 7.598795180722892, "grad_norm": 5.465739727020264, "learning_rate": 4.6194174757281554e-05, "loss": 0.3884, "step": 784 }, { "epoch": 7.608433734939759, "grad_norm": 5.878564834594727, "learning_rate": 4.618932038834952e-05, "loss": 0.5073, "step": 785 }, { "epoch": 7.618072289156626, "grad_norm": 6.272350788116455, "learning_rate": 4.6184466019417476e-05, "loss": 0.3164, "step": 786 }, { "epoch": 7.627710843373494, "grad_norm": 6.417891025543213, "learning_rate": 4.617961165048544e-05, "loss": 0.2786, "step": 787 }, { "epoch": 7.637349397590361, "grad_norm": 3.7346999645233154, "learning_rate": 4.61747572815534e-05, "loss": 0.5479, "step": 788 }, { "epoch": 7.646987951807229, "grad_norm": 3.2343287467956543, "learning_rate": 4.616990291262136e-05, "loss": 0.2715, "step": 789 }, { "epoch": 7.656626506024097, "grad_norm": 6.873208045959473, "learning_rate": 4.616504854368932e-05, "loss": 0.1554, "step": 790 }, { "epoch": 7.666265060240963, "grad_norm": 12.006695747375488, "learning_rate": 4.616019417475728e-05, "loss": 0.3908, "step": 791 }, { "epoch": 7.675903614457831, "grad_norm": 10.370986938476562, "learning_rate": 4.615533980582524e-05, "loss": 0.5045, "step": 792 }, { "epoch": 7.685542168674699, "grad_norm": 2.2931742668151855, "learning_rate": 4.61504854368932e-05, "loss": 0.2077, "step": 793 }, { "epoch": 7.695180722891566, "grad_norm": 3.7515854835510254, "learning_rate": 4.6145631067961166e-05, "loss": 0.3628, "step": 794 }, { "epoch": 7.704819277108434, "grad_norm": 4.140289306640625, "learning_rate": 4.6140776699029123e-05, "loss": 0.2153, "step": 795 }, { "epoch": 7.714457831325301, "grad_norm": 3.429774761199951, "learning_rate": 4.6135922330097095e-05, "loss": 0.3966, "step": 796 }, { "epoch": 7.724096385542168, "grad_norm": 4.3490142822265625, "learning_rate": 4.613106796116505e-05, "loss": 0.4188, "step": 797 }, { "epoch": 7.733734939759036, "grad_norm": 5.8544511795043945, "learning_rate": 4.612621359223302e-05, "loss": 0.2948, "step": 798 }, { "epoch": 7.743373493975904, "grad_norm": 3.136464834213257, "learning_rate": 4.6121359223300975e-05, "loss": 0.2968, "step": 799 }, { "epoch": 7.753012048192771, "grad_norm": 9.258957862854004, "learning_rate": 4.611650485436894e-05, "loss": 0.2036, "step": 800 }, { "epoch": 7.7626506024096384, "grad_norm": 6.527810096740723, "learning_rate": 4.61116504854369e-05, "loss": 0.502, "step": 801 }, { "epoch": 7.772289156626506, "grad_norm": 7.69514274597168, "learning_rate": 4.6106796116504855e-05, "loss": 0.4276, "step": 802 }, { "epoch": 7.781927710843373, "grad_norm": 7.241950511932373, "learning_rate": 4.610194174757282e-05, "loss": 0.2454, "step": 803 }, { "epoch": 7.791566265060241, "grad_norm": 3.204332113265991, "learning_rate": 4.609708737864078e-05, "loss": 0.133, "step": 804 }, { "epoch": 7.801204819277109, "grad_norm": 4.370757102966309, "learning_rate": 4.609223300970874e-05, "loss": 0.247, "step": 805 }, { "epoch": 7.8108433734939755, "grad_norm": 4.772947788238525, "learning_rate": 4.60873786407767e-05, "loss": 0.2219, "step": 806 }, { "epoch": 7.820481927710843, "grad_norm": 3.0462570190429688, "learning_rate": 4.6082524271844664e-05, "loss": 0.19, "step": 807 }, { "epoch": 7.830120481927711, "grad_norm": 1.5521717071533203, "learning_rate": 4.607766990291262e-05, "loss": 0.2034, "step": 808 }, { "epoch": 7.839759036144578, "grad_norm": 2.4125823974609375, "learning_rate": 4.6072815533980586e-05, "loss": 0.2164, "step": 809 }, { "epoch": 7.849397590361446, "grad_norm": 4.258775234222412, "learning_rate": 4.6067961165048544e-05, "loss": 0.1191, "step": 810 }, { "epoch": 7.8590361445783135, "grad_norm": 1.284454584121704, "learning_rate": 4.606310679611651e-05, "loss": 0.095, "step": 811 }, { "epoch": 7.86867469879518, "grad_norm": 12.893623352050781, "learning_rate": 4.6058252427184466e-05, "loss": 0.4549, "step": 812 }, { "epoch": 7.878313253012048, "grad_norm": 3.2103543281555176, "learning_rate": 4.605339805825243e-05, "loss": 0.1714, "step": 813 }, { "epoch": 7.887951807228916, "grad_norm": 2.739760398864746, "learning_rate": 4.604854368932039e-05, "loss": 0.2714, "step": 814 }, { "epoch": 7.897590361445783, "grad_norm": 7.373762607574463, "learning_rate": 4.604368932038835e-05, "loss": 0.1614, "step": 815 }, { "epoch": 7.9072289156626505, "grad_norm": 4.188544750213623, "learning_rate": 4.603883495145631e-05, "loss": 0.4432, "step": 816 }, { "epoch": 7.916867469879518, "grad_norm": 2.6760594844818115, "learning_rate": 4.6033980582524275e-05, "loss": 0.2162, "step": 817 }, { "epoch": 7.926506024096385, "grad_norm": 4.131224632263184, "learning_rate": 4.602912621359223e-05, "loss": 0.2839, "step": 818 }, { "epoch": 7.936144578313253, "grad_norm": 6.559559345245361, "learning_rate": 4.602427184466019e-05, "loss": 0.3225, "step": 819 }, { "epoch": 7.945783132530121, "grad_norm": 6.961268901824951, "learning_rate": 4.6019417475728155e-05, "loss": 0.4208, "step": 820 }, { "epoch": 7.955421686746988, "grad_norm": 2.391653299331665, "learning_rate": 4.601456310679612e-05, "loss": 0.2152, "step": 821 }, { "epoch": 7.965060240963855, "grad_norm": 5.110595703125, "learning_rate": 4.6009708737864085e-05, "loss": 0.5141, "step": 822 }, { "epoch": 7.974698795180723, "grad_norm": 12.036786079406738, "learning_rate": 4.600485436893204e-05, "loss": 0.5765, "step": 823 }, { "epoch": 7.98433734939759, "grad_norm": 3.9520227909088135, "learning_rate": 4.600000000000001e-05, "loss": 0.4785, "step": 824 }, { "epoch": 7.993975903614458, "grad_norm": 5.187422752380371, "learning_rate": 4.5995145631067965e-05, "loss": 0.2407, "step": 825 }, { "epoch": 8.009638554216867, "grad_norm": 5.340340614318848, "learning_rate": 4.599029126213593e-05, "loss": 0.3476, "step": 826 }, { "epoch": 8.019277108433736, "grad_norm": 4.035325050354004, "learning_rate": 4.598543689320389e-05, "loss": 0.4554, "step": 827 }, { "epoch": 8.028915662650602, "grad_norm": 4.300768852233887, "learning_rate": 4.5980582524271845e-05, "loss": 0.3415, "step": 828 }, { "epoch": 8.03855421686747, "grad_norm": 4.386412620544434, "learning_rate": 4.597572815533981e-05, "loss": 0.1901, "step": 829 }, { "epoch": 8.048192771084338, "grad_norm": 2.8211607933044434, "learning_rate": 4.597087378640777e-05, "loss": 0.2364, "step": 830 }, { "epoch": 8.057831325301205, "grad_norm": 3.913257122039795, "learning_rate": 4.596601941747573e-05, "loss": 0.3012, "step": 831 }, { "epoch": 8.067469879518072, "grad_norm": 4.024266719818115, "learning_rate": 4.596116504854369e-05, "loss": 0.1659, "step": 832 }, { "epoch": 8.07710843373494, "grad_norm": 7.771791934967041, "learning_rate": 4.5956310679611654e-05, "loss": 0.3615, "step": 833 }, { "epoch": 8.086746987951807, "grad_norm": 3.1993796825408936, "learning_rate": 4.595145631067961e-05, "loss": 0.1424, "step": 834 }, { "epoch": 8.096385542168674, "grad_norm": 2.486741065979004, "learning_rate": 4.5946601941747576e-05, "loss": 0.1477, "step": 835 }, { "epoch": 8.106024096385543, "grad_norm": 4.33134126663208, "learning_rate": 4.5941747572815534e-05, "loss": 0.4182, "step": 836 }, { "epoch": 8.11566265060241, "grad_norm": 5.949396133422852, "learning_rate": 4.59368932038835e-05, "loss": 0.333, "step": 837 }, { "epoch": 8.125301204819277, "grad_norm": 7.506922245025635, "learning_rate": 4.5932038834951456e-05, "loss": 0.3398, "step": 838 }, { "epoch": 8.134939759036145, "grad_norm": 13.914887428283691, "learning_rate": 4.592718446601942e-05, "loss": 0.3214, "step": 839 }, { "epoch": 8.144578313253012, "grad_norm": 6.7149128913879395, "learning_rate": 4.592233009708738e-05, "loss": 0.4983, "step": 840 }, { "epoch": 8.154216867469879, "grad_norm": 4.720554351806641, "learning_rate": 4.591747572815534e-05, "loss": 0.3474, "step": 841 }, { "epoch": 8.163855421686748, "grad_norm": 4.93611478805542, "learning_rate": 4.59126213592233e-05, "loss": 0.3748, "step": 842 }, { "epoch": 8.173493975903614, "grad_norm": 3.5199012756347656, "learning_rate": 4.5907766990291265e-05, "loss": 0.2183, "step": 843 }, { "epoch": 8.183132530120481, "grad_norm": 3.0759942531585693, "learning_rate": 4.590291262135922e-05, "loss": 0.2195, "step": 844 }, { "epoch": 8.19277108433735, "grad_norm": 1.6256307363510132, "learning_rate": 4.589805825242718e-05, "loss": 0.1053, "step": 845 }, { "epoch": 8.202409638554217, "grad_norm": 6.173670291900635, "learning_rate": 4.589320388349515e-05, "loss": 0.6452, "step": 846 }, { "epoch": 8.212048192771084, "grad_norm": 3.667332410812378, "learning_rate": 4.588834951456311e-05, "loss": 0.1359, "step": 847 }, { "epoch": 8.221686746987952, "grad_norm": 7.426032066345215, "learning_rate": 4.5883495145631074e-05, "loss": 0.216, "step": 848 }, { "epoch": 8.23132530120482, "grad_norm": 4.68475866317749, "learning_rate": 4.587864077669903e-05, "loss": 0.2913, "step": 849 }, { "epoch": 8.240963855421686, "grad_norm": 6.696953296661377, "learning_rate": 4.5873786407766997e-05, "loss": 0.4812, "step": 850 }, { "epoch": 8.250602409638555, "grad_norm": 2.8303661346435547, "learning_rate": 4.5868932038834954e-05, "loss": 0.2899, "step": 851 }, { "epoch": 8.260240963855422, "grad_norm": 3.307779550552368, "learning_rate": 4.586407766990292e-05, "loss": 0.176, "step": 852 }, { "epoch": 8.269879518072289, "grad_norm": 4.573883533477783, "learning_rate": 4.585922330097088e-05, "loss": 0.1867, "step": 853 }, { "epoch": 8.279518072289157, "grad_norm": 2.9917657375335693, "learning_rate": 4.585436893203884e-05, "loss": 0.1517, "step": 854 }, { "epoch": 8.289156626506024, "grad_norm": 5.089465618133545, "learning_rate": 4.58495145631068e-05, "loss": 0.3217, "step": 855 }, { "epoch": 8.298795180722891, "grad_norm": 2.444235324859619, "learning_rate": 4.584466019417476e-05, "loss": 0.2195, "step": 856 }, { "epoch": 8.30843373493976, "grad_norm": 4.909610271453857, "learning_rate": 4.583980582524272e-05, "loss": 0.4832, "step": 857 }, { "epoch": 8.318072289156627, "grad_norm": 4.756667137145996, "learning_rate": 4.583495145631068e-05, "loss": 0.2962, "step": 858 }, { "epoch": 8.327710843373493, "grad_norm": 3.1851611137390137, "learning_rate": 4.5830097087378644e-05, "loss": 0.1939, "step": 859 }, { "epoch": 8.337349397590362, "grad_norm": 1.6897101402282715, "learning_rate": 4.58252427184466e-05, "loss": 0.1726, "step": 860 }, { "epoch": 8.346987951807229, "grad_norm": 6.736721038818359, "learning_rate": 4.5820388349514566e-05, "loss": 0.2095, "step": 861 }, { "epoch": 8.356626506024096, "grad_norm": 5.9112348556518555, "learning_rate": 4.5815533980582524e-05, "loss": 0.5614, "step": 862 }, { "epoch": 8.366265060240965, "grad_norm": 4.913121700286865, "learning_rate": 4.581067961165049e-05, "loss": 0.4182, "step": 863 }, { "epoch": 8.375903614457831, "grad_norm": 6.908644676208496, "learning_rate": 4.5805825242718446e-05, "loss": 0.2367, "step": 864 }, { "epoch": 8.385542168674698, "grad_norm": 2.7665560245513916, "learning_rate": 4.580097087378641e-05, "loss": 0.3511, "step": 865 }, { "epoch": 8.395180722891567, "grad_norm": 5.6984758377075195, "learning_rate": 4.579611650485437e-05, "loss": 0.4377, "step": 866 }, { "epoch": 8.404819277108434, "grad_norm": 3.768256425857544, "learning_rate": 4.579126213592233e-05, "loss": 0.2394, "step": 867 }, { "epoch": 8.4144578313253, "grad_norm": 13.095478057861328, "learning_rate": 4.578640776699029e-05, "loss": 0.2778, "step": 868 }, { "epoch": 8.42409638554217, "grad_norm": 2.8854165077209473, "learning_rate": 4.5781553398058255e-05, "loss": 0.1805, "step": 869 }, { "epoch": 8.433734939759036, "grad_norm": 3.565661668777466, "learning_rate": 4.577669902912621e-05, "loss": 0.2296, "step": 870 }, { "epoch": 8.443373493975903, "grad_norm": 3.6195144653320312, "learning_rate": 4.577184466019418e-05, "loss": 0.2216, "step": 871 }, { "epoch": 8.453012048192772, "grad_norm": 5.54270076751709, "learning_rate": 4.576699029126214e-05, "loss": 0.344, "step": 872 }, { "epoch": 8.462650602409639, "grad_norm": 2.511662483215332, "learning_rate": 4.57621359223301e-05, "loss": 0.249, "step": 873 }, { "epoch": 8.472289156626506, "grad_norm": 2.980402946472168, "learning_rate": 4.5757281553398064e-05, "loss": 0.2715, "step": 874 }, { "epoch": 8.481927710843374, "grad_norm": 2.437671184539795, "learning_rate": 4.575242718446602e-05, "loss": 0.2522, "step": 875 }, { "epoch": 8.491566265060241, "grad_norm": 2.889025926589966, "learning_rate": 4.5747572815533986e-05, "loss": 0.3157, "step": 876 }, { "epoch": 8.501204819277108, "grad_norm": 4.912395477294922, "learning_rate": 4.5742718446601944e-05, "loss": 0.5287, "step": 877 }, { "epoch": 8.510843373493977, "grad_norm": 3.6133780479431152, "learning_rate": 4.573786407766991e-05, "loss": 0.2572, "step": 878 }, { "epoch": 8.520481927710843, "grad_norm": 3.633577346801758, "learning_rate": 4.5733009708737866e-05, "loss": 0.3249, "step": 879 }, { "epoch": 8.53012048192771, "grad_norm": 2.8833229541778564, "learning_rate": 4.572815533980583e-05, "loss": 0.2519, "step": 880 }, { "epoch": 8.539759036144579, "grad_norm": 3.157663106918335, "learning_rate": 4.572330097087379e-05, "loss": 0.1911, "step": 881 }, { "epoch": 8.549397590361446, "grad_norm": 3.29360032081604, "learning_rate": 4.571844660194175e-05, "loss": 0.3256, "step": 882 }, { "epoch": 8.559036144578313, "grad_norm": 3.333625078201294, "learning_rate": 4.571359223300971e-05, "loss": 0.2448, "step": 883 }, { "epoch": 8.568674698795181, "grad_norm": 2.097036600112915, "learning_rate": 4.570873786407767e-05, "loss": 0.1562, "step": 884 }, { "epoch": 8.578313253012048, "grad_norm": 4.719982624053955, "learning_rate": 4.570388349514563e-05, "loss": 0.2413, "step": 885 }, { "epoch": 8.587951807228915, "grad_norm": 2.3329596519470215, "learning_rate": 4.569902912621359e-05, "loss": 0.2306, "step": 886 }, { "epoch": 8.597590361445784, "grad_norm": 2.6987600326538086, "learning_rate": 4.5694174757281556e-05, "loss": 0.1778, "step": 887 }, { "epoch": 8.60722891566265, "grad_norm": 3.8306503295898438, "learning_rate": 4.568932038834951e-05, "loss": 0.2447, "step": 888 }, { "epoch": 8.616867469879518, "grad_norm": 4.26489782333374, "learning_rate": 4.568446601941748e-05, "loss": 0.2794, "step": 889 }, { "epoch": 8.626506024096386, "grad_norm": 4.27765417098999, "learning_rate": 4.5679611650485436e-05, "loss": 0.2414, "step": 890 }, { "epoch": 8.636144578313253, "grad_norm": 5.345401287078857, "learning_rate": 4.56747572815534e-05, "loss": 0.2353, "step": 891 }, { "epoch": 8.64578313253012, "grad_norm": 5.6829400062561035, "learning_rate": 4.566990291262136e-05, "loss": 0.3669, "step": 892 }, { "epoch": 8.655421686746989, "grad_norm": 1.4667030572891235, "learning_rate": 4.566504854368932e-05, "loss": 0.0664, "step": 893 }, { "epoch": 8.665060240963856, "grad_norm": 6.954171657562256, "learning_rate": 4.566019417475728e-05, "loss": 0.2683, "step": 894 }, { "epoch": 8.674698795180722, "grad_norm": 3.6963560581207275, "learning_rate": 4.5655339805825245e-05, "loss": 0.4343, "step": 895 }, { "epoch": 8.684337349397591, "grad_norm": 6.155021667480469, "learning_rate": 4.56504854368932e-05, "loss": 0.2287, "step": 896 }, { "epoch": 8.693975903614458, "grad_norm": 4.947582721710205, "learning_rate": 4.564563106796117e-05, "loss": 0.4337, "step": 897 }, { "epoch": 8.703614457831325, "grad_norm": 6.046788215637207, "learning_rate": 4.564077669902913e-05, "loss": 0.2254, "step": 898 }, { "epoch": 8.713253012048193, "grad_norm": 5.278687000274658, "learning_rate": 4.563592233009709e-05, "loss": 0.2534, "step": 899 }, { "epoch": 8.72289156626506, "grad_norm": 4.357367038726807, "learning_rate": 4.5631067961165054e-05, "loss": 0.1749, "step": 900 }, { "epoch": 8.732530120481927, "grad_norm": 6.613254070281982, "learning_rate": 4.562621359223301e-05, "loss": 0.3581, "step": 901 }, { "epoch": 8.742168674698796, "grad_norm": 3.0899055004119873, "learning_rate": 4.5621359223300976e-05, "loss": 0.294, "step": 902 }, { "epoch": 8.751807228915663, "grad_norm": 2.0291178226470947, "learning_rate": 4.5616504854368934e-05, "loss": 0.1168, "step": 903 }, { "epoch": 8.76144578313253, "grad_norm": 2.973705768585205, "learning_rate": 4.56116504854369e-05, "loss": 0.2041, "step": 904 }, { "epoch": 8.771084337349398, "grad_norm": 5.812280654907227, "learning_rate": 4.5606796116504856e-05, "loss": 0.4039, "step": 905 }, { "epoch": 8.780722891566265, "grad_norm": 5.274932384490967, "learning_rate": 4.560194174757282e-05, "loss": 0.3359, "step": 906 }, { "epoch": 8.790361445783132, "grad_norm": 5.227943420410156, "learning_rate": 4.559708737864078e-05, "loss": 0.2785, "step": 907 }, { "epoch": 8.8, "grad_norm": 4.88953161239624, "learning_rate": 4.559223300970874e-05, "loss": 0.1293, "step": 908 }, { "epoch": 8.809638554216868, "grad_norm": 2.42602276802063, "learning_rate": 4.55873786407767e-05, "loss": 0.2433, "step": 909 }, { "epoch": 8.819277108433734, "grad_norm": 4.379723072052002, "learning_rate": 4.5582524271844665e-05, "loss": 0.2916, "step": 910 }, { "epoch": 8.828915662650603, "grad_norm": 3.498296022415161, "learning_rate": 4.557766990291262e-05, "loss": 0.3385, "step": 911 }, { "epoch": 8.83855421686747, "grad_norm": 4.005378246307373, "learning_rate": 4.557281553398058e-05, "loss": 0.2917, "step": 912 }, { "epoch": 8.848192771084337, "grad_norm": 4.532007217407227, "learning_rate": 4.5567961165048545e-05, "loss": 0.2062, "step": 913 }, { "epoch": 8.857831325301206, "grad_norm": 1.9549345970153809, "learning_rate": 4.55631067961165e-05, "loss": 0.1766, "step": 914 }, { "epoch": 8.867469879518072, "grad_norm": 3.694251537322998, "learning_rate": 4.555825242718447e-05, "loss": 0.335, "step": 915 }, { "epoch": 8.87710843373494, "grad_norm": 4.695824146270752, "learning_rate": 4.5553398058252425e-05, "loss": 0.3521, "step": 916 }, { "epoch": 8.886746987951808, "grad_norm": 5.511168003082275, "learning_rate": 4.554854368932039e-05, "loss": 0.2647, "step": 917 }, { "epoch": 8.896385542168675, "grad_norm": 1.723932147026062, "learning_rate": 4.554368932038835e-05, "loss": 0.1847, "step": 918 }, { "epoch": 8.906024096385542, "grad_norm": 3.082836627960205, "learning_rate": 4.553883495145631e-05, "loss": 0.297, "step": 919 }, { "epoch": 8.91566265060241, "grad_norm": 7.445598125457764, "learning_rate": 4.553398058252427e-05, "loss": 0.3797, "step": 920 }, { "epoch": 8.925301204819277, "grad_norm": 2.9382004737854004, "learning_rate": 4.5529126213592235e-05, "loss": 0.261, "step": 921 }, { "epoch": 8.934939759036144, "grad_norm": 2.4587204456329346, "learning_rate": 4.55242718446602e-05, "loss": 0.1935, "step": 922 }, { "epoch": 8.944578313253013, "grad_norm": 2.4198994636535645, "learning_rate": 4.551941747572816e-05, "loss": 0.1386, "step": 923 }, { "epoch": 8.95421686746988, "grad_norm": 4.925835609436035, "learning_rate": 4.551456310679612e-05, "loss": 0.2113, "step": 924 }, { "epoch": 8.963855421686747, "grad_norm": 2.795072078704834, "learning_rate": 4.550970873786408e-05, "loss": 0.2347, "step": 925 }, { "epoch": 8.973493975903615, "grad_norm": 5.191370010375977, "learning_rate": 4.5504854368932044e-05, "loss": 0.481, "step": 926 }, { "epoch": 8.983132530120482, "grad_norm": 3.3789420127868652, "learning_rate": 4.55e-05, "loss": 0.3431, "step": 927 }, { "epoch": 8.992771084337349, "grad_norm": 5.641687393188477, "learning_rate": 4.5495145631067966e-05, "loss": 0.197, "step": 928 }, { "epoch": 9.008433734939759, "grad_norm": 2.38816499710083, "learning_rate": 4.5490291262135924e-05, "loss": 0.1484, "step": 929 }, { "epoch": 9.018072289156626, "grad_norm": 1.5532459020614624, "learning_rate": 4.548543689320389e-05, "loss": 0.3708, "step": 930 }, { "epoch": 9.027710843373494, "grad_norm": 33.39796829223633, "learning_rate": 4.5480582524271846e-05, "loss": 0.292, "step": 931 }, { "epoch": 9.037349397590361, "grad_norm": 1.5462989807128906, "learning_rate": 4.547572815533981e-05, "loss": 0.2194, "step": 932 }, { "epoch": 9.046987951807228, "grad_norm": 13.2515869140625, "learning_rate": 4.547087378640777e-05, "loss": 0.4005, "step": 933 }, { "epoch": 9.056626506024097, "grad_norm": 19.778968811035156, "learning_rate": 4.546601941747573e-05, "loss": 0.1914, "step": 934 }, { "epoch": 9.066265060240964, "grad_norm": 5.815398216247559, "learning_rate": 4.546116504854369e-05, "loss": 0.2304, "step": 935 }, { "epoch": 9.07590361445783, "grad_norm": 3.2935824394226074, "learning_rate": 4.5456310679611655e-05, "loss": 0.3012, "step": 936 }, { "epoch": 9.0855421686747, "grad_norm": 3.30989408493042, "learning_rate": 4.545145631067961e-05, "loss": 0.2716, "step": 937 }, { "epoch": 9.095180722891566, "grad_norm": 8.87865924835205, "learning_rate": 4.544660194174758e-05, "loss": 0.2166, "step": 938 }, { "epoch": 9.104819277108433, "grad_norm": 4.889070987701416, "learning_rate": 4.5441747572815535e-05, "loss": 0.4182, "step": 939 }, { "epoch": 9.114457831325302, "grad_norm": 5.413609027862549, "learning_rate": 4.543689320388349e-05, "loss": 0.1784, "step": 940 }, { "epoch": 9.124096385542169, "grad_norm": 8.960953712463379, "learning_rate": 4.543203883495146e-05, "loss": 0.4742, "step": 941 }, { "epoch": 9.133734939759035, "grad_norm": 9.00687026977539, "learning_rate": 4.5427184466019415e-05, "loss": 0.4205, "step": 942 }, { "epoch": 9.143373493975904, "grad_norm": 25.474802017211914, "learning_rate": 4.542233009708738e-05, "loss": 0.3744, "step": 943 }, { "epoch": 9.153012048192771, "grad_norm": 8.87845230102539, "learning_rate": 4.541747572815534e-05, "loss": 0.2783, "step": 944 }, { "epoch": 9.162650602409638, "grad_norm": 17.192981719970703, "learning_rate": 4.54126213592233e-05, "loss": 0.2437, "step": 945 }, { "epoch": 9.172289156626507, "grad_norm": 1.9075239896774292, "learning_rate": 4.540776699029126e-05, "loss": 0.1348, "step": 946 }, { "epoch": 9.181927710843373, "grad_norm": 15.389445304870605, "learning_rate": 4.540291262135923e-05, "loss": 0.2728, "step": 947 }, { "epoch": 9.19156626506024, "grad_norm": 3.012521266937256, "learning_rate": 4.539805825242719e-05, "loss": 0.172, "step": 948 }, { "epoch": 9.201204819277109, "grad_norm": 2.3295490741729736, "learning_rate": 4.5393203883495153e-05, "loss": 0.1544, "step": 949 }, { "epoch": 9.210843373493976, "grad_norm": 7.26877498626709, "learning_rate": 4.538834951456311e-05, "loss": 0.1537, "step": 950 }, { "epoch": 9.220481927710843, "grad_norm": 13.533720016479492, "learning_rate": 4.538349514563107e-05, "loss": 0.214, "step": 951 }, { "epoch": 9.230120481927711, "grad_norm": 2.7402822971343994, "learning_rate": 4.5378640776699033e-05, "loss": 0.2625, "step": 952 }, { "epoch": 9.239759036144578, "grad_norm": 18.438405990600586, "learning_rate": 4.537378640776699e-05, "loss": 0.4714, "step": 953 }, { "epoch": 9.249397590361445, "grad_norm": 8.751330375671387, "learning_rate": 4.5368932038834956e-05, "loss": 0.4167, "step": 954 }, { "epoch": 9.259036144578314, "grad_norm": 1.3695905208587646, "learning_rate": 4.5364077669902914e-05, "loss": 0.0746, "step": 955 }, { "epoch": 9.26867469879518, "grad_norm": 8.070869445800781, "learning_rate": 4.535922330097088e-05, "loss": 0.1204, "step": 956 }, { "epoch": 9.278313253012048, "grad_norm": 1.9617106914520264, "learning_rate": 4.5354368932038836e-05, "loss": 0.1685, "step": 957 }, { "epoch": 9.287951807228916, "grad_norm": 11.649812698364258, "learning_rate": 4.53495145631068e-05, "loss": 0.3885, "step": 958 }, { "epoch": 9.297590361445783, "grad_norm": 16.192087173461914, "learning_rate": 4.534466019417476e-05, "loss": 0.286, "step": 959 }, { "epoch": 9.30722891566265, "grad_norm": 6.095143795013428, "learning_rate": 4.533980582524272e-05, "loss": 0.3627, "step": 960 }, { "epoch": 9.316867469879519, "grad_norm": 13.157550811767578, "learning_rate": 4.533495145631068e-05, "loss": 0.2768, "step": 961 }, { "epoch": 9.326506024096386, "grad_norm": 3.1107046604156494, "learning_rate": 4.5330097087378645e-05, "loss": 0.3983, "step": 962 }, { "epoch": 9.336144578313252, "grad_norm": 3.0711660385131836, "learning_rate": 4.53252427184466e-05, "loss": 0.3809, "step": 963 }, { "epoch": 9.345783132530121, "grad_norm": 2.313187837600708, "learning_rate": 4.532038834951457e-05, "loss": 0.3308, "step": 964 }, { "epoch": 9.355421686746988, "grad_norm": 3.4100499153137207, "learning_rate": 4.5315533980582525e-05, "loss": 0.1899, "step": 965 }, { "epoch": 9.365060240963855, "grad_norm": 8.46958065032959, "learning_rate": 4.531067961165049e-05, "loss": 0.2948, "step": 966 }, { "epoch": 9.374698795180723, "grad_norm": 11.496602058410645, "learning_rate": 4.530582524271845e-05, "loss": 0.3122, "step": 967 }, { "epoch": 9.38433734939759, "grad_norm": 12.925472259521484, "learning_rate": 4.5300970873786405e-05, "loss": 0.2971, "step": 968 }, { "epoch": 9.393975903614457, "grad_norm": 14.250012397766113, "learning_rate": 4.529611650485437e-05, "loss": 0.1972, "step": 969 }, { "epoch": 9.403614457831326, "grad_norm": 13.637984275817871, "learning_rate": 4.529126213592233e-05, "loss": 0.1933, "step": 970 }, { "epoch": 9.413253012048193, "grad_norm": 2.234577178955078, "learning_rate": 4.528640776699029e-05, "loss": 0.2176, "step": 971 }, { "epoch": 9.42289156626506, "grad_norm": 13.275362968444824, "learning_rate": 4.5281553398058256e-05, "loss": 0.4155, "step": 972 }, { "epoch": 9.432530120481928, "grad_norm": 4.144031524658203, "learning_rate": 4.527669902912622e-05, "loss": 0.3602, "step": 973 }, { "epoch": 9.442168674698795, "grad_norm": 2.0344741344451904, "learning_rate": 4.527184466019418e-05, "loss": 0.1455, "step": 974 }, { "epoch": 9.451807228915662, "grad_norm": 3.643882989883423, "learning_rate": 4.526699029126214e-05, "loss": 0.2609, "step": 975 }, { "epoch": 9.46144578313253, "grad_norm": 2.4791111946105957, "learning_rate": 4.52621359223301e-05, "loss": 0.2962, "step": 976 }, { "epoch": 9.471084337349398, "grad_norm": 2.4311776161193848, "learning_rate": 4.525728155339806e-05, "loss": 0.3785, "step": 977 }, { "epoch": 9.480722891566264, "grad_norm": 14.352810859680176, "learning_rate": 4.525242718446602e-05, "loss": 0.2611, "step": 978 }, { "epoch": 9.490361445783133, "grad_norm": 0.7387223839759827, "learning_rate": 4.524757281553398e-05, "loss": 0.3137, "step": 979 }, { "epoch": 9.5, "grad_norm": 4.173456192016602, "learning_rate": 4.5242718446601946e-05, "loss": 0.4376, "step": 980 }, { "epoch": 9.509638554216867, "grad_norm": 0.5654353499412537, "learning_rate": 4.52378640776699e-05, "loss": 0.2548, "step": 981 }, { "epoch": 9.519277108433736, "grad_norm": 1.538975477218628, "learning_rate": 4.523300970873787e-05, "loss": 0.2195, "step": 982 }, { "epoch": 9.528915662650602, "grad_norm": 6.1537652015686035, "learning_rate": 4.5228155339805826e-05, "loss": 0.3481, "step": 983 }, { "epoch": 9.53855421686747, "grad_norm": 23.604434967041016, "learning_rate": 4.522330097087379e-05, "loss": 0.3621, "step": 984 }, { "epoch": 9.548192771084338, "grad_norm": 9.096589088439941, "learning_rate": 4.521844660194175e-05, "loss": 0.1524, "step": 985 }, { "epoch": 9.557831325301205, "grad_norm": 4.12644100189209, "learning_rate": 4.521359223300971e-05, "loss": 0.2249, "step": 986 }, { "epoch": 9.567469879518072, "grad_norm": 14.110152244567871, "learning_rate": 4.520873786407767e-05, "loss": 0.4114, "step": 987 }, { "epoch": 9.57710843373494, "grad_norm": 2.6403963565826416, "learning_rate": 4.5203883495145635e-05, "loss": 0.1431, "step": 988 }, { "epoch": 9.586746987951807, "grad_norm": 20.716073989868164, "learning_rate": 4.519902912621359e-05, "loss": 0.2291, "step": 989 }, { "epoch": 9.596385542168674, "grad_norm": 10.437972068786621, "learning_rate": 4.519417475728156e-05, "loss": 0.3846, "step": 990 }, { "epoch": 9.606024096385543, "grad_norm": 4.399903297424316, "learning_rate": 4.5189320388349515e-05, "loss": 0.1701, "step": 991 }, { "epoch": 9.61566265060241, "grad_norm": 6.140345096588135, "learning_rate": 4.518446601941748e-05, "loss": 0.3598, "step": 992 }, { "epoch": 9.625301204819277, "grad_norm": 3.5959293842315674, "learning_rate": 4.517961165048544e-05, "loss": 0.1895, "step": 993 }, { "epoch": 9.634939759036145, "grad_norm": 16.239391326904297, "learning_rate": 4.5174757281553395e-05, "loss": 0.329, "step": 994 }, { "epoch": 9.644578313253012, "grad_norm": 2.5688083171844482, "learning_rate": 4.516990291262136e-05, "loss": 0.2595, "step": 995 }, { "epoch": 9.654216867469879, "grad_norm": 4.481781482696533, "learning_rate": 4.516504854368932e-05, "loss": 0.3319, "step": 996 }, { "epoch": 9.663855421686748, "grad_norm": 6.088756084442139, "learning_rate": 4.516019417475728e-05, "loss": 0.2751, "step": 997 }, { "epoch": 9.673493975903614, "grad_norm": 8.2064790725708, "learning_rate": 4.5155339805825246e-05, "loss": 0.5553, "step": 998 }, { "epoch": 9.683132530120481, "grad_norm": 10.910841941833496, "learning_rate": 4.515048543689321e-05, "loss": 0.4227, "step": 999 }, { "epoch": 9.69277108433735, "grad_norm": 6.8637542724609375, "learning_rate": 4.514563106796117e-05, "loss": 0.4086, "step": 1000 }, { "epoch": 9.702409638554217, "grad_norm": 12.703362464904785, "learning_rate": 4.514077669902913e-05, "loss": 0.1257, "step": 1001 }, { "epoch": 9.712048192771084, "grad_norm": 1.5821634531021118, "learning_rate": 4.513592233009709e-05, "loss": 0.4527, "step": 1002 }, { "epoch": 9.721686746987952, "grad_norm": 6.066076278686523, "learning_rate": 4.5131067961165055e-05, "loss": 0.1806, "step": 1003 }, { "epoch": 9.73132530120482, "grad_norm": 10.270964622497559, "learning_rate": 4.512621359223301e-05, "loss": 0.1915, "step": 1004 }, { "epoch": 9.740963855421686, "grad_norm": 3.1303179264068604, "learning_rate": 4.512135922330097e-05, "loss": 0.1659, "step": 1005 }, { "epoch": 9.750602409638555, "grad_norm": 5.741486549377441, "learning_rate": 4.5116504854368935e-05, "loss": 0.1662, "step": 1006 }, { "epoch": 9.760240963855422, "grad_norm": 13.826476097106934, "learning_rate": 4.511165048543689e-05, "loss": 0.2656, "step": 1007 }, { "epoch": 9.769879518072289, "grad_norm": 1.7395695447921753, "learning_rate": 4.510679611650486e-05, "loss": 0.1664, "step": 1008 }, { "epoch": 9.779518072289157, "grad_norm": 2.687234401702881, "learning_rate": 4.5101941747572815e-05, "loss": 0.2529, "step": 1009 }, { "epoch": 9.789156626506024, "grad_norm": 3.5580673217773438, "learning_rate": 4.509708737864078e-05, "loss": 0.1282, "step": 1010 }, { "epoch": 9.798795180722891, "grad_norm": 2.7859294414520264, "learning_rate": 4.509223300970874e-05, "loss": 0.1851, "step": 1011 }, { "epoch": 9.80843373493976, "grad_norm": 4.671055793762207, "learning_rate": 4.50873786407767e-05, "loss": 0.5283, "step": 1012 }, { "epoch": 9.818072289156627, "grad_norm": 7.03701114654541, "learning_rate": 4.508252427184466e-05, "loss": 0.2494, "step": 1013 }, { "epoch": 9.827710843373493, "grad_norm": 20.109397888183594, "learning_rate": 4.5077669902912625e-05, "loss": 0.2936, "step": 1014 }, { "epoch": 9.837349397590362, "grad_norm": 2.121729850769043, "learning_rate": 4.507281553398058e-05, "loss": 0.2797, "step": 1015 }, { "epoch": 9.846987951807229, "grad_norm": 5.018993377685547, "learning_rate": 4.506796116504855e-05, "loss": 0.4222, "step": 1016 }, { "epoch": 9.856626506024096, "grad_norm": 12.65056324005127, "learning_rate": 4.5063106796116505e-05, "loss": 0.4077, "step": 1017 }, { "epoch": 9.866265060240965, "grad_norm": 6.557310104370117, "learning_rate": 4.505825242718447e-05, "loss": 0.4856, "step": 1018 }, { "epoch": 9.875903614457831, "grad_norm": 1.6660066843032837, "learning_rate": 4.505339805825243e-05, "loss": 0.3092, "step": 1019 }, { "epoch": 9.885542168674698, "grad_norm": 6.542596340179443, "learning_rate": 4.504854368932039e-05, "loss": 0.3733, "step": 1020 }, { "epoch": 9.895180722891567, "grad_norm": 8.795742988586426, "learning_rate": 4.504368932038835e-05, "loss": 0.2079, "step": 1021 }, { "epoch": 9.904819277108434, "grad_norm": 6.676092147827148, "learning_rate": 4.503883495145631e-05, "loss": 0.4291, "step": 1022 }, { "epoch": 9.9144578313253, "grad_norm": 9.160191535949707, "learning_rate": 4.503398058252428e-05, "loss": 0.1934, "step": 1023 }, { "epoch": 9.92409638554217, "grad_norm": 3.0758252143859863, "learning_rate": 4.5029126213592236e-05, "loss": 0.3136, "step": 1024 }, { "epoch": 9.933734939759036, "grad_norm": 9.601761817932129, "learning_rate": 4.50242718446602e-05, "loss": 0.1714, "step": 1025 }, { "epoch": 9.943373493975903, "grad_norm": 2.4072418212890625, "learning_rate": 4.501941747572816e-05, "loss": 0.2561, "step": 1026 }, { "epoch": 9.953012048192772, "grad_norm": 3.0207908153533936, "learning_rate": 4.501456310679612e-05, "loss": 0.1963, "step": 1027 }, { "epoch": 9.962650602409639, "grad_norm": 9.485841751098633, "learning_rate": 4.500970873786408e-05, "loss": 0.4775, "step": 1028 }, { "epoch": 9.972289156626506, "grad_norm": 13.193740844726562, "learning_rate": 4.5004854368932045e-05, "loss": 0.5056, "step": 1029 }, { "epoch": 9.981927710843374, "grad_norm": 2.395433187484741, "learning_rate": 4.5e-05, "loss": 0.3884, "step": 1030 }, { "epoch": 9.991566265060241, "grad_norm": 8.496499061584473, "learning_rate": 4.499514563106797e-05, "loss": 0.3624, "step": 1031 }, { "epoch": 10.007228915662651, "grad_norm": 3.1416077613830566, "learning_rate": 4.4990291262135925e-05, "loss": 0.4397, "step": 1032 }, { "epoch": 10.016867469879518, "grad_norm": 1.943979024887085, "learning_rate": 4.498543689320388e-05, "loss": 0.4032, "step": 1033 }, { "epoch": 10.026506024096385, "grad_norm": 18.830293655395508, "learning_rate": 4.498058252427185e-05, "loss": 0.2818, "step": 1034 }, { "epoch": 10.036144578313253, "grad_norm": 3.0662693977355957, "learning_rate": 4.4975728155339805e-05, "loss": 0.2291, "step": 1035 }, { "epoch": 10.04578313253012, "grad_norm": 16.53871726989746, "learning_rate": 4.497087378640777e-05, "loss": 0.3761, "step": 1036 }, { "epoch": 10.055421686746987, "grad_norm": 18.26026153564453, "learning_rate": 4.496601941747573e-05, "loss": 0.4056, "step": 1037 }, { "epoch": 10.065060240963856, "grad_norm": 3.330749988555908, "learning_rate": 4.496116504854369e-05, "loss": 0.2369, "step": 1038 }, { "epoch": 10.074698795180723, "grad_norm": 3.402003765106201, "learning_rate": 4.495631067961165e-05, "loss": 0.211, "step": 1039 }, { "epoch": 10.08433734939759, "grad_norm": 3.1910033226013184, "learning_rate": 4.4951456310679614e-05, "loss": 0.3937, "step": 1040 }, { "epoch": 10.093975903614458, "grad_norm": 1.5469385385513306, "learning_rate": 4.494660194174757e-05, "loss": 0.1007, "step": 1041 }, { "epoch": 10.103614457831325, "grad_norm": 4.437119483947754, "learning_rate": 4.4941747572815537e-05, "loss": 0.3377, "step": 1042 }, { "epoch": 10.113253012048192, "grad_norm": 2.3382568359375, "learning_rate": 4.4936893203883494e-05, "loss": 0.3165, "step": 1043 }, { "epoch": 10.12289156626506, "grad_norm": 5.052382946014404, "learning_rate": 4.493203883495146e-05, "loss": 0.2518, "step": 1044 }, { "epoch": 10.132530120481928, "grad_norm": 7.033137321472168, "learning_rate": 4.492718446601942e-05, "loss": 0.1843, "step": 1045 }, { "epoch": 10.142168674698794, "grad_norm": 10.213798522949219, "learning_rate": 4.492233009708738e-05, "loss": 0.3212, "step": 1046 }, { "epoch": 10.151807228915663, "grad_norm": 11.091693878173828, "learning_rate": 4.491747572815534e-05, "loss": 0.3076, "step": 1047 }, { "epoch": 10.16144578313253, "grad_norm": 15.784089088439941, "learning_rate": 4.4912621359223303e-05, "loss": 0.1925, "step": 1048 }, { "epoch": 10.171084337349397, "grad_norm": 9.084163665771484, "learning_rate": 4.490776699029127e-05, "loss": 0.3184, "step": 1049 }, { "epoch": 10.180722891566266, "grad_norm": 2.259723424911499, "learning_rate": 4.4902912621359226e-05, "loss": 0.3823, "step": 1050 }, { "epoch": 10.190361445783132, "grad_norm": 9.281780242919922, "learning_rate": 4.489805825242719e-05, "loss": 0.2631, "step": 1051 }, { "epoch": 10.2, "grad_norm": 4.540655136108398, "learning_rate": 4.489320388349515e-05, "loss": 0.3631, "step": 1052 }, { "epoch": 10.209638554216868, "grad_norm": 8.873237609863281, "learning_rate": 4.488834951456311e-05, "loss": 0.2365, "step": 1053 }, { "epoch": 10.219277108433735, "grad_norm": 2.0578415393829346, "learning_rate": 4.488349514563107e-05, "loss": 0.2045, "step": 1054 }, { "epoch": 10.228915662650602, "grad_norm": 6.597756862640381, "learning_rate": 4.4878640776699035e-05, "loss": 0.2879, "step": 1055 }, { "epoch": 10.23855421686747, "grad_norm": 3.8462586402893066, "learning_rate": 4.487378640776699e-05, "loss": 0.3181, "step": 1056 }, { "epoch": 10.248192771084337, "grad_norm": 23.908533096313477, "learning_rate": 4.486893203883496e-05, "loss": 0.2613, "step": 1057 }, { "epoch": 10.257831325301204, "grad_norm": 2.6604559421539307, "learning_rate": 4.4864077669902915e-05, "loss": 0.2341, "step": 1058 }, { "epoch": 10.267469879518073, "grad_norm": 3.4015274047851562, "learning_rate": 4.485922330097088e-05, "loss": 0.3763, "step": 1059 }, { "epoch": 10.27710843373494, "grad_norm": 12.619584083557129, "learning_rate": 4.485436893203884e-05, "loss": 0.3442, "step": 1060 }, { "epoch": 10.286746987951807, "grad_norm": 1.7620635032653809, "learning_rate": 4.4849514563106795e-05, "loss": 0.226, "step": 1061 }, { "epoch": 10.296385542168675, "grad_norm": 12.501205444335938, "learning_rate": 4.484466019417476e-05, "loss": 0.3437, "step": 1062 }, { "epoch": 10.306024096385542, "grad_norm": 5.76066780090332, "learning_rate": 4.483980582524272e-05, "loss": 0.2026, "step": 1063 }, { "epoch": 10.315662650602409, "grad_norm": 6.401462078094482, "learning_rate": 4.483495145631068e-05, "loss": 0.1649, "step": 1064 }, { "epoch": 10.325301204819278, "grad_norm": 2.6603262424468994, "learning_rate": 4.483009708737864e-05, "loss": 0.2217, "step": 1065 }, { "epoch": 10.334939759036144, "grad_norm": 7.071871280670166, "learning_rate": 4.4825242718446604e-05, "loss": 0.2966, "step": 1066 }, { "epoch": 10.344578313253011, "grad_norm": 1.701955795288086, "learning_rate": 4.482038834951456e-05, "loss": 0.2526, "step": 1067 }, { "epoch": 10.35421686746988, "grad_norm": 12.306977272033691, "learning_rate": 4.4815533980582526e-05, "loss": 0.2999, "step": 1068 }, { "epoch": 10.363855421686747, "grad_norm": 2.1271047592163086, "learning_rate": 4.4810679611650484e-05, "loss": 0.272, "step": 1069 }, { "epoch": 10.373493975903614, "grad_norm": 10.045913696289062, "learning_rate": 4.480582524271845e-05, "loss": 0.3771, "step": 1070 }, { "epoch": 10.383132530120482, "grad_norm": 8.322867393493652, "learning_rate": 4.4800970873786406e-05, "loss": 0.1238, "step": 1071 }, { "epoch": 10.39277108433735, "grad_norm": 6.456303596496582, "learning_rate": 4.479611650485437e-05, "loss": 0.1787, "step": 1072 }, { "epoch": 10.402409638554216, "grad_norm": 9.818809509277344, "learning_rate": 4.4791262135922335e-05, "loss": 0.3577, "step": 1073 }, { "epoch": 10.412048192771085, "grad_norm": 3.0295491218566895, "learning_rate": 4.478640776699029e-05, "loss": 0.1871, "step": 1074 }, { "epoch": 10.421686746987952, "grad_norm": 2.4659526348114014, "learning_rate": 4.478155339805826e-05, "loss": 0.4324, "step": 1075 }, { "epoch": 10.431325301204819, "grad_norm": 10.308305740356445, "learning_rate": 4.4776699029126216e-05, "loss": 0.4955, "step": 1076 }, { "epoch": 10.440963855421687, "grad_norm": 18.042322158813477, "learning_rate": 4.477184466019418e-05, "loss": 0.2709, "step": 1077 }, { "epoch": 10.450602409638554, "grad_norm": 10.598848342895508, "learning_rate": 4.476699029126214e-05, "loss": 0.4082, "step": 1078 }, { "epoch": 10.460240963855421, "grad_norm": 3.468268871307373, "learning_rate": 4.47621359223301e-05, "loss": 0.2024, "step": 1079 }, { "epoch": 10.46987951807229, "grad_norm": 4.095434665679932, "learning_rate": 4.475728155339806e-05, "loss": 0.4364, "step": 1080 }, { "epoch": 10.479518072289157, "grad_norm": 5.274271011352539, "learning_rate": 4.4752427184466025e-05, "loss": 0.3066, "step": 1081 }, { "epoch": 10.489156626506023, "grad_norm": 17.760093688964844, "learning_rate": 4.474757281553398e-05, "loss": 0.2989, "step": 1082 }, { "epoch": 10.498795180722892, "grad_norm": 13.076791763305664, "learning_rate": 4.474271844660195e-05, "loss": 0.3352, "step": 1083 }, { "epoch": 10.508433734939759, "grad_norm": 4.198191165924072, "learning_rate": 4.4737864077669905e-05, "loss": 0.2764, "step": 1084 }, { "epoch": 10.518072289156626, "grad_norm": 6.5984320640563965, "learning_rate": 4.473300970873787e-05, "loss": 0.2897, "step": 1085 }, { "epoch": 10.527710843373494, "grad_norm": 6.44931697845459, "learning_rate": 4.472815533980583e-05, "loss": 0.3524, "step": 1086 }, { "epoch": 10.537349397590361, "grad_norm": 13.41800594329834, "learning_rate": 4.472330097087379e-05, "loss": 0.3473, "step": 1087 }, { "epoch": 10.546987951807228, "grad_norm": 2.746154308319092, "learning_rate": 4.471844660194175e-05, "loss": 0.2466, "step": 1088 }, { "epoch": 10.556626506024097, "grad_norm": 15.1647310256958, "learning_rate": 4.471359223300971e-05, "loss": 0.3498, "step": 1089 }, { "epoch": 10.566265060240964, "grad_norm": 2.469665050506592, "learning_rate": 4.470873786407767e-05, "loss": 0.3611, "step": 1090 }, { "epoch": 10.57590361445783, "grad_norm": 8.269415855407715, "learning_rate": 4.470388349514563e-05, "loss": 0.3226, "step": 1091 }, { "epoch": 10.5855421686747, "grad_norm": 2.366837739944458, "learning_rate": 4.4699029126213594e-05, "loss": 0.2204, "step": 1092 }, { "epoch": 10.595180722891566, "grad_norm": 3.6362106800079346, "learning_rate": 4.469417475728155e-05, "loss": 0.2859, "step": 1093 }, { "epoch": 10.604819277108433, "grad_norm": 3.2962615489959717, "learning_rate": 4.4689320388349516e-05, "loss": 0.1865, "step": 1094 }, { "epoch": 10.614457831325302, "grad_norm": 2.1902334690093994, "learning_rate": 4.4684466019417474e-05, "loss": 0.1788, "step": 1095 }, { "epoch": 10.624096385542169, "grad_norm": 3.8165555000305176, "learning_rate": 4.467961165048544e-05, "loss": 0.3852, "step": 1096 }, { "epoch": 10.633734939759035, "grad_norm": 13.269725799560547, "learning_rate": 4.4674757281553396e-05, "loss": 0.481, "step": 1097 }, { "epoch": 10.643373493975904, "grad_norm": 3.618467330932617, "learning_rate": 4.466990291262136e-05, "loss": 0.2854, "step": 1098 }, { "epoch": 10.653012048192771, "grad_norm": 1.3072086572647095, "learning_rate": 4.4665048543689325e-05, "loss": 0.2626, "step": 1099 }, { "epoch": 10.662650602409638, "grad_norm": 14.123997688293457, "learning_rate": 4.466019417475728e-05, "loss": 0.2132, "step": 1100 }, { "epoch": 10.672289156626507, "grad_norm": 8.630019187927246, "learning_rate": 4.465533980582525e-05, "loss": 0.2845, "step": 1101 }, { "epoch": 10.681927710843373, "grad_norm": 3.5724825859069824, "learning_rate": 4.4650485436893205e-05, "loss": 0.4349, "step": 1102 }, { "epoch": 10.69156626506024, "grad_norm": 23.682559967041016, "learning_rate": 4.464563106796117e-05, "loss": 0.4695, "step": 1103 }, { "epoch": 10.701204819277109, "grad_norm": 6.225741386413574, "learning_rate": 4.464077669902913e-05, "loss": 0.3626, "step": 1104 }, { "epoch": 10.710843373493976, "grad_norm": 15.386185646057129, "learning_rate": 4.463592233009709e-05, "loss": 0.1774, "step": 1105 }, { "epoch": 10.720481927710843, "grad_norm": 3.3343923091888428, "learning_rate": 4.463106796116505e-05, "loss": 0.1475, "step": 1106 }, { "epoch": 10.730120481927711, "grad_norm": 13.182147979736328, "learning_rate": 4.4626213592233014e-05, "loss": 0.2793, "step": 1107 }, { "epoch": 10.739759036144578, "grad_norm": 5.711537837982178, "learning_rate": 4.462135922330097e-05, "loss": 0.2136, "step": 1108 }, { "epoch": 10.749397590361445, "grad_norm": 16.617185592651367, "learning_rate": 4.461650485436894e-05, "loss": 0.3886, "step": 1109 }, { "epoch": 10.759036144578314, "grad_norm": 4.628533363342285, "learning_rate": 4.4611650485436894e-05, "loss": 0.3284, "step": 1110 }, { "epoch": 10.76867469879518, "grad_norm": 2.3436472415924072, "learning_rate": 4.460679611650486e-05, "loss": 0.2031, "step": 1111 }, { "epoch": 10.778313253012048, "grad_norm": 5.942230224609375, "learning_rate": 4.460194174757282e-05, "loss": 0.1932, "step": 1112 }, { "epoch": 10.787951807228916, "grad_norm": 6.059369087219238, "learning_rate": 4.459708737864078e-05, "loss": 0.2417, "step": 1113 }, { "epoch": 10.797590361445783, "grad_norm": 13.678839683532715, "learning_rate": 4.459223300970874e-05, "loss": 0.3641, "step": 1114 }, { "epoch": 10.80722891566265, "grad_norm": 2.239344596862793, "learning_rate": 4.4587378640776704e-05, "loss": 0.1037, "step": 1115 }, { "epoch": 10.816867469879519, "grad_norm": 11.460618019104004, "learning_rate": 4.458252427184466e-05, "loss": 0.2918, "step": 1116 }, { "epoch": 10.826506024096386, "grad_norm": 2.613842010498047, "learning_rate": 4.457766990291262e-05, "loss": 0.2011, "step": 1117 }, { "epoch": 10.836144578313252, "grad_norm": 9.033576011657715, "learning_rate": 4.4572815533980584e-05, "loss": 0.3341, "step": 1118 }, { "epoch": 10.845783132530121, "grad_norm": 11.734128952026367, "learning_rate": 4.456796116504854e-05, "loss": 0.1848, "step": 1119 }, { "epoch": 10.855421686746988, "grad_norm": 3.5450868606567383, "learning_rate": 4.4563106796116506e-05, "loss": 0.1807, "step": 1120 }, { "epoch": 10.865060240963855, "grad_norm": 4.560002326965332, "learning_rate": 4.4558252427184464e-05, "loss": 0.2691, "step": 1121 }, { "epoch": 10.874698795180723, "grad_norm": 1.6592360734939575, "learning_rate": 4.455339805825243e-05, "loss": 0.249, "step": 1122 }, { "epoch": 10.88433734939759, "grad_norm": 10.365317344665527, "learning_rate": 4.4548543689320386e-05, "loss": 0.2923, "step": 1123 }, { "epoch": 10.893975903614457, "grad_norm": 6.439486026763916, "learning_rate": 4.454368932038836e-05, "loss": 0.3201, "step": 1124 }, { "epoch": 10.903614457831326, "grad_norm": 9.264551162719727, "learning_rate": 4.4538834951456315e-05, "loss": 0.4325, "step": 1125 }, { "epoch": 10.913253012048193, "grad_norm": 7.6617207527160645, "learning_rate": 4.453398058252427e-05, "loss": 0.4522, "step": 1126 }, { "epoch": 10.92289156626506, "grad_norm": 6.615184307098389, "learning_rate": 4.452912621359224e-05, "loss": 0.1277, "step": 1127 }, { "epoch": 10.932530120481928, "grad_norm": 8.227173805236816, "learning_rate": 4.4524271844660195e-05, "loss": 0.2368, "step": 1128 }, { "epoch": 10.942168674698795, "grad_norm": 4.8020830154418945, "learning_rate": 4.451941747572816e-05, "loss": 0.2742, "step": 1129 }, { "epoch": 10.951807228915662, "grad_norm": 3.1088485717773438, "learning_rate": 4.451456310679612e-05, "loss": 0.3828, "step": 1130 }, { "epoch": 10.96144578313253, "grad_norm": 2.895491123199463, "learning_rate": 4.450970873786408e-05, "loss": 0.2632, "step": 1131 }, { "epoch": 10.971084337349398, "grad_norm": 6.487618923187256, "learning_rate": 4.450485436893204e-05, "loss": 0.1967, "step": 1132 }, { "epoch": 10.980722891566264, "grad_norm": 2.532501697540283, "learning_rate": 4.4500000000000004e-05, "loss": 0.4408, "step": 1133 }, { "epoch": 10.990361445783133, "grad_norm": 39.36408615112305, "learning_rate": 4.449514563106796e-05, "loss": 0.2819, "step": 1134 }, { "epoch": 11.006024096385541, "grad_norm": 8.858583450317383, "learning_rate": 4.4490291262135927e-05, "loss": 0.3854, "step": 1135 }, { "epoch": 11.01566265060241, "grad_norm": 2.262226104736328, "learning_rate": 4.4485436893203884e-05, "loss": 0.1536, "step": 1136 }, { "epoch": 11.025301204819277, "grad_norm": 6.562002182006836, "learning_rate": 4.448058252427185e-05, "loss": 0.5824, "step": 1137 }, { "epoch": 11.034939759036144, "grad_norm": 2.1388282775878906, "learning_rate": 4.4475728155339807e-05, "loss": 0.3027, "step": 1138 }, { "epoch": 11.044578313253012, "grad_norm": 17.162649154663086, "learning_rate": 4.447087378640777e-05, "loss": 0.2529, "step": 1139 }, { "epoch": 11.05421686746988, "grad_norm": 12.308150291442871, "learning_rate": 4.446601941747573e-05, "loss": 0.2775, "step": 1140 }, { "epoch": 11.063855421686746, "grad_norm": 2.169294834136963, "learning_rate": 4.4461165048543693e-05, "loss": 0.4018, "step": 1141 }, { "epoch": 11.073493975903615, "grad_norm": 9.738301277160645, "learning_rate": 4.445631067961165e-05, "loss": 0.3673, "step": 1142 }, { "epoch": 11.083132530120482, "grad_norm": 6.552781105041504, "learning_rate": 4.445145631067961e-05, "loss": 0.1312, "step": 1143 }, { "epoch": 11.092771084337349, "grad_norm": 14.446232795715332, "learning_rate": 4.4446601941747573e-05, "loss": 0.2257, "step": 1144 }, { "epoch": 11.102409638554217, "grad_norm": 6.055996417999268, "learning_rate": 4.444174757281553e-05, "loss": 0.2598, "step": 1145 }, { "epoch": 11.112048192771084, "grad_norm": 4.559475898742676, "learning_rate": 4.4436893203883496e-05, "loss": 0.1032, "step": 1146 }, { "epoch": 11.121686746987951, "grad_norm": 7.875696659088135, "learning_rate": 4.4432038834951453e-05, "loss": 0.1873, "step": 1147 }, { "epoch": 11.13132530120482, "grad_norm": 4.632721424102783, "learning_rate": 4.442718446601942e-05, "loss": 0.3204, "step": 1148 }, { "epoch": 11.140963855421687, "grad_norm": 4.59813928604126, "learning_rate": 4.442233009708738e-05, "loss": 0.3746, "step": 1149 }, { "epoch": 11.150602409638553, "grad_norm": 6.4712324142456055, "learning_rate": 4.441747572815535e-05, "loss": 0.3381, "step": 1150 }, { "epoch": 11.160240963855422, "grad_norm": 10.597518920898438, "learning_rate": 4.4412621359223305e-05, "loss": 0.2629, "step": 1151 }, { "epoch": 11.169879518072289, "grad_norm": 8.922810554504395, "learning_rate": 4.440776699029127e-05, "loss": 0.24, "step": 1152 }, { "epoch": 11.179518072289156, "grad_norm": 3.771597146987915, "learning_rate": 4.440291262135923e-05, "loss": 0.1127, "step": 1153 }, { "epoch": 11.189156626506024, "grad_norm": 0.8511626124382019, "learning_rate": 4.4398058252427185e-05, "loss": 0.178, "step": 1154 }, { "epoch": 11.198795180722891, "grad_norm": 21.037473678588867, "learning_rate": 4.439320388349515e-05, "loss": 0.391, "step": 1155 }, { "epoch": 11.208433734939758, "grad_norm": 4.582850933074951, "learning_rate": 4.438834951456311e-05, "loss": 0.2414, "step": 1156 }, { "epoch": 11.218072289156627, "grad_norm": 2.287994384765625, "learning_rate": 4.438349514563107e-05, "loss": 0.3228, "step": 1157 }, { "epoch": 11.227710843373494, "grad_norm": 5.268229961395264, "learning_rate": 4.437864077669903e-05, "loss": 0.4567, "step": 1158 }, { "epoch": 11.23734939759036, "grad_norm": 17.525129318237305, "learning_rate": 4.4373786407766994e-05, "loss": 0.5887, "step": 1159 }, { "epoch": 11.24698795180723, "grad_norm": 9.824392318725586, "learning_rate": 4.436893203883495e-05, "loss": 0.4364, "step": 1160 }, { "epoch": 11.256626506024096, "grad_norm": 3.238231658935547, "learning_rate": 4.4364077669902916e-05, "loss": 0.3857, "step": 1161 }, { "epoch": 11.266265060240963, "grad_norm": 9.741973876953125, "learning_rate": 4.4359223300970874e-05, "loss": 0.313, "step": 1162 }, { "epoch": 11.275903614457832, "grad_norm": 2.61279034614563, "learning_rate": 4.435436893203884e-05, "loss": 0.1919, "step": 1163 }, { "epoch": 11.285542168674699, "grad_norm": 18.138320922851562, "learning_rate": 4.4349514563106796e-05, "loss": 0.3535, "step": 1164 }, { "epoch": 11.295180722891565, "grad_norm": 2.925952434539795, "learning_rate": 4.434466019417476e-05, "loss": 0.254, "step": 1165 }, { "epoch": 11.304819277108434, "grad_norm": 14.660362243652344, "learning_rate": 4.433980582524272e-05, "loss": 0.3233, "step": 1166 }, { "epoch": 11.314457831325301, "grad_norm": 10.474801063537598, "learning_rate": 4.433495145631068e-05, "loss": 0.3325, "step": 1167 }, { "epoch": 11.324096385542168, "grad_norm": 2.8798234462738037, "learning_rate": 4.433009708737864e-05, "loss": 0.1685, "step": 1168 }, { "epoch": 11.333734939759037, "grad_norm": 8.876236915588379, "learning_rate": 4.4325242718446605e-05, "loss": 0.3544, "step": 1169 }, { "epoch": 11.343373493975903, "grad_norm": 8.352685928344727, "learning_rate": 4.432038834951456e-05, "loss": 0.3564, "step": 1170 }, { "epoch": 11.35301204819277, "grad_norm": 3.722421407699585, "learning_rate": 4.431553398058252e-05, "loss": 0.219, "step": 1171 }, { "epoch": 11.362650602409639, "grad_norm": 2.2120871543884277, "learning_rate": 4.4310679611650486e-05, "loss": 0.1285, "step": 1172 }, { "epoch": 11.372289156626506, "grad_norm": 9.644623756408691, "learning_rate": 4.430582524271844e-05, "loss": 0.3142, "step": 1173 }, { "epoch": 11.381927710843373, "grad_norm": 6.0193657875061035, "learning_rate": 4.4300970873786415e-05, "loss": 0.26, "step": 1174 }, { "epoch": 11.391566265060241, "grad_norm": 2.4347646236419678, "learning_rate": 4.429611650485437e-05, "loss": 0.1707, "step": 1175 }, { "epoch": 11.401204819277108, "grad_norm": 5.61464786529541, "learning_rate": 4.429126213592234e-05, "loss": 0.3639, "step": 1176 }, { "epoch": 11.410843373493975, "grad_norm": 13.207756996154785, "learning_rate": 4.4286407766990295e-05, "loss": 0.3126, "step": 1177 }, { "epoch": 11.420481927710844, "grad_norm": 2.1387319564819336, "learning_rate": 4.428155339805826e-05, "loss": 0.1983, "step": 1178 }, { "epoch": 11.43012048192771, "grad_norm": 11.132611274719238, "learning_rate": 4.427669902912622e-05, "loss": 0.3827, "step": 1179 }, { "epoch": 11.439759036144578, "grad_norm": 9.771248817443848, "learning_rate": 4.427184466019418e-05, "loss": 0.2935, "step": 1180 }, { "epoch": 11.449397590361446, "grad_norm": 2.931187629699707, "learning_rate": 4.426699029126214e-05, "loss": 0.1263, "step": 1181 }, { "epoch": 11.459036144578313, "grad_norm": 3.9331414699554443, "learning_rate": 4.42621359223301e-05, "loss": 0.3784, "step": 1182 }, { "epoch": 11.46867469879518, "grad_norm": 2.8191325664520264, "learning_rate": 4.425728155339806e-05, "loss": 0.11, "step": 1183 }, { "epoch": 11.478313253012049, "grad_norm": 2.0256125926971436, "learning_rate": 4.425242718446602e-05, "loss": 0.1463, "step": 1184 }, { "epoch": 11.487951807228916, "grad_norm": 3.6101465225219727, "learning_rate": 4.4247572815533984e-05, "loss": 0.3793, "step": 1185 }, { "epoch": 11.497590361445782, "grad_norm": 2.5774152278900146, "learning_rate": 4.424271844660194e-05, "loss": 0.1617, "step": 1186 }, { "epoch": 11.507228915662651, "grad_norm": 26.7233943939209, "learning_rate": 4.4237864077669906e-05, "loss": 0.2693, "step": 1187 }, { "epoch": 11.516867469879518, "grad_norm": 15.650619506835938, "learning_rate": 4.4233009708737864e-05, "loss": 0.3842, "step": 1188 }, { "epoch": 11.526506024096385, "grad_norm": 3.93143630027771, "learning_rate": 4.422815533980583e-05, "loss": 0.4423, "step": 1189 }, { "epoch": 11.536144578313253, "grad_norm": 43.137725830078125, "learning_rate": 4.4223300970873786e-05, "loss": 0.2272, "step": 1190 }, { "epoch": 11.54578313253012, "grad_norm": 3.0414721965789795, "learning_rate": 4.421844660194175e-05, "loss": 0.3348, "step": 1191 }, { "epoch": 11.555421686746987, "grad_norm": 18.749202728271484, "learning_rate": 4.421359223300971e-05, "loss": 0.4165, "step": 1192 }, { "epoch": 11.565060240963856, "grad_norm": 9.934557914733887, "learning_rate": 4.420873786407767e-05, "loss": 0.3071, "step": 1193 }, { "epoch": 11.574698795180723, "grad_norm": 11.307592391967773, "learning_rate": 4.420388349514563e-05, "loss": 0.1903, "step": 1194 }, { "epoch": 11.58433734939759, "grad_norm": 7.435123443603516, "learning_rate": 4.4199029126213595e-05, "loss": 0.2645, "step": 1195 }, { "epoch": 11.593975903614458, "grad_norm": 35.1541633605957, "learning_rate": 4.419417475728155e-05, "loss": 0.2665, "step": 1196 }, { "epoch": 11.603614457831325, "grad_norm": 2.3021347522735596, "learning_rate": 4.418932038834952e-05, "loss": 0.3148, "step": 1197 }, { "epoch": 11.613253012048192, "grad_norm": 10.533269882202148, "learning_rate": 4.4184466019417475e-05, "loss": 0.5076, "step": 1198 }, { "epoch": 11.62289156626506, "grad_norm": 36.48335266113281, "learning_rate": 4.417961165048543e-05, "loss": 0.1498, "step": 1199 }, { "epoch": 11.632530120481928, "grad_norm": 9.24891185760498, "learning_rate": 4.4174757281553404e-05, "loss": 0.4279, "step": 1200 }, { "epoch": 11.642168674698794, "grad_norm": 8.30649185180664, "learning_rate": 4.416990291262136e-05, "loss": 0.2624, "step": 1201 }, { "epoch": 11.651807228915663, "grad_norm": 4.288065433502197, "learning_rate": 4.416504854368933e-05, "loss": 0.2488, "step": 1202 }, { "epoch": 11.66144578313253, "grad_norm": 5.478787422180176, "learning_rate": 4.4160194174757284e-05, "loss": 0.2591, "step": 1203 }, { "epoch": 11.671084337349397, "grad_norm": 1.7044034004211426, "learning_rate": 4.415533980582525e-05, "loss": 0.1934, "step": 1204 }, { "epoch": 11.680722891566266, "grad_norm": 4.986504077911377, "learning_rate": 4.415048543689321e-05, "loss": 0.3316, "step": 1205 }, { "epoch": 11.690361445783132, "grad_norm": 6.729953289031982, "learning_rate": 4.414563106796117e-05, "loss": 0.6176, "step": 1206 }, { "epoch": 11.7, "grad_norm": 5.7840728759765625, "learning_rate": 4.414077669902913e-05, "loss": 0.3225, "step": 1207 }, { "epoch": 11.709638554216868, "grad_norm": 4.307219982147217, "learning_rate": 4.4135922330097094e-05, "loss": 0.2641, "step": 1208 }, { "epoch": 11.719277108433735, "grad_norm": 10.055490493774414, "learning_rate": 4.413106796116505e-05, "loss": 0.2627, "step": 1209 }, { "epoch": 11.728915662650602, "grad_norm": 8.973368644714355, "learning_rate": 4.412621359223301e-05, "loss": 0.2535, "step": 1210 }, { "epoch": 11.73855421686747, "grad_norm": 2.0141217708587646, "learning_rate": 4.4121359223300974e-05, "loss": 0.3192, "step": 1211 }, { "epoch": 11.748192771084337, "grad_norm": 8.289809226989746, "learning_rate": 4.411650485436893e-05, "loss": 0.3891, "step": 1212 }, { "epoch": 11.757831325301204, "grad_norm": 5.2513346672058105, "learning_rate": 4.4111650485436896e-05, "loss": 0.4066, "step": 1213 }, { "epoch": 11.767469879518073, "grad_norm": 9.7924222946167, "learning_rate": 4.4106796116504854e-05, "loss": 0.4168, "step": 1214 }, { "epoch": 11.77710843373494, "grad_norm": 1.6464431285858154, "learning_rate": 4.410194174757282e-05, "loss": 0.2304, "step": 1215 }, { "epoch": 11.786746987951807, "grad_norm": 2.466865301132202, "learning_rate": 4.4097087378640776e-05, "loss": 0.1309, "step": 1216 }, { "epoch": 11.796385542168675, "grad_norm": 3.7587175369262695, "learning_rate": 4.409223300970874e-05, "loss": 0.2768, "step": 1217 }, { "epoch": 11.806024096385542, "grad_norm": 3.5312278270721436, "learning_rate": 4.40873786407767e-05, "loss": 0.4256, "step": 1218 }, { "epoch": 11.815662650602409, "grad_norm": 8.443449974060059, "learning_rate": 4.408252427184466e-05, "loss": 0.263, "step": 1219 }, { "epoch": 11.825301204819278, "grad_norm": 2.5530197620391846, "learning_rate": 4.407766990291262e-05, "loss": 0.3007, "step": 1220 }, { "epoch": 11.834939759036144, "grad_norm": 2.366086959838867, "learning_rate": 4.4072815533980585e-05, "loss": 0.2364, "step": 1221 }, { "epoch": 11.844578313253011, "grad_norm": 2.344722032546997, "learning_rate": 4.406796116504854e-05, "loss": 0.5523, "step": 1222 }, { "epoch": 11.85421686746988, "grad_norm": 11.197891235351562, "learning_rate": 4.406310679611651e-05, "loss": 0.377, "step": 1223 }, { "epoch": 11.863855421686747, "grad_norm": 4.98854923248291, "learning_rate": 4.4058252427184465e-05, "loss": 0.2386, "step": 1224 }, { "epoch": 11.873493975903614, "grad_norm": 5.4037394523620605, "learning_rate": 4.405339805825243e-05, "loss": 0.3705, "step": 1225 }, { "epoch": 11.883132530120482, "grad_norm": 2.0433871746063232, "learning_rate": 4.4048543689320394e-05, "loss": 0.1823, "step": 1226 }, { "epoch": 11.89277108433735, "grad_norm": 8.152776718139648, "learning_rate": 4.404368932038835e-05, "loss": 0.3732, "step": 1227 }, { "epoch": 11.902409638554216, "grad_norm": 5.265354156494141, "learning_rate": 4.4038834951456316e-05, "loss": 0.2465, "step": 1228 }, { "epoch": 11.912048192771085, "grad_norm": 5.9933180809021, "learning_rate": 4.4033980582524274e-05, "loss": 0.4072, "step": 1229 }, { "epoch": 11.921686746987952, "grad_norm": 14.961624145507812, "learning_rate": 4.402912621359224e-05, "loss": 0.2355, "step": 1230 }, { "epoch": 11.931325301204819, "grad_norm": 9.502731323242188, "learning_rate": 4.4024271844660197e-05, "loss": 0.4248, "step": 1231 }, { "epoch": 11.940963855421687, "grad_norm": 1.3887333869934082, "learning_rate": 4.401941747572816e-05, "loss": 0.1882, "step": 1232 }, { "epoch": 11.950602409638554, "grad_norm": 3.976956367492676, "learning_rate": 4.401456310679612e-05, "loss": 0.4261, "step": 1233 }, { "epoch": 11.960240963855421, "grad_norm": 16.552339553833008, "learning_rate": 4.400970873786408e-05, "loss": 0.2604, "step": 1234 }, { "epoch": 11.96987951807229, "grad_norm": 5.28723669052124, "learning_rate": 4.400485436893204e-05, "loss": 0.5358, "step": 1235 }, { "epoch": 11.979518072289157, "grad_norm": 14.14853572845459, "learning_rate": 4.4000000000000006e-05, "loss": 0.7041, "step": 1236 }, { "epoch": 11.989156626506023, "grad_norm": 7.282172679901123, "learning_rate": 4.3995145631067963e-05, "loss": 0.2491, "step": 1237 }, { "epoch": 12.004819277108433, "grad_norm": 2.9922327995300293, "learning_rate": 4.399029126213592e-05, "loss": 0.2927, "step": 1238 }, { "epoch": 12.0144578313253, "grad_norm": 7.461159706115723, "learning_rate": 4.3985436893203886e-05, "loss": 0.1637, "step": 1239 }, { "epoch": 12.024096385542169, "grad_norm": 2.8126564025878906, "learning_rate": 4.3980582524271843e-05, "loss": 0.317, "step": 1240 }, { "epoch": 12.033734939759036, "grad_norm": 2.1303179264068604, "learning_rate": 4.397572815533981e-05, "loss": 0.137, "step": 1241 }, { "epoch": 12.043373493975903, "grad_norm": 2.7368807792663574, "learning_rate": 4.3970873786407766e-05, "loss": 0.1286, "step": 1242 }, { "epoch": 12.053012048192771, "grad_norm": 6.892124652862549, "learning_rate": 4.396601941747573e-05, "loss": 0.442, "step": 1243 }, { "epoch": 12.062650602409638, "grad_norm": 3.5895791053771973, "learning_rate": 4.396116504854369e-05, "loss": 0.3698, "step": 1244 }, { "epoch": 12.072289156626505, "grad_norm": 4.445648670196533, "learning_rate": 4.395631067961165e-05, "loss": 0.2156, "step": 1245 }, { "epoch": 12.081927710843374, "grad_norm": 5.0899810791015625, "learning_rate": 4.395145631067961e-05, "loss": 0.144, "step": 1246 }, { "epoch": 12.09156626506024, "grad_norm": 2.620093822479248, "learning_rate": 4.3946601941747575e-05, "loss": 0.2911, "step": 1247 }, { "epoch": 12.101204819277108, "grad_norm": 2.2984232902526855, "learning_rate": 4.394174757281553e-05, "loss": 0.1819, "step": 1248 }, { "epoch": 12.110843373493976, "grad_norm": 4.448767185211182, "learning_rate": 4.39368932038835e-05, "loss": 0.4195, "step": 1249 }, { "epoch": 12.120481927710843, "grad_norm": 8.705835342407227, "learning_rate": 4.393203883495146e-05, "loss": 0.1874, "step": 1250 }, { "epoch": 12.13012048192771, "grad_norm": 3.2974133491516113, "learning_rate": 4.392718446601942e-05, "loss": 0.3317, "step": 1251 }, { "epoch": 12.139759036144579, "grad_norm": 13.106084823608398, "learning_rate": 4.3922330097087384e-05, "loss": 0.2227, "step": 1252 }, { "epoch": 12.149397590361446, "grad_norm": 5.010156631469727, "learning_rate": 4.391747572815534e-05, "loss": 0.4202, "step": 1253 }, { "epoch": 12.159036144578312, "grad_norm": 9.34679889678955, "learning_rate": 4.3912621359223306e-05, "loss": 0.3019, "step": 1254 }, { "epoch": 12.168674698795181, "grad_norm": 2.929569721221924, "learning_rate": 4.3907766990291264e-05, "loss": 0.2655, "step": 1255 }, { "epoch": 12.178313253012048, "grad_norm": 3.096043825149536, "learning_rate": 4.390291262135923e-05, "loss": 0.3091, "step": 1256 }, { "epoch": 12.187951807228915, "grad_norm": 4.779329299926758, "learning_rate": 4.3898058252427186e-05, "loss": 0.3075, "step": 1257 }, { "epoch": 12.197590361445783, "grad_norm": 4.026431560516357, "learning_rate": 4.389320388349515e-05, "loss": 0.4783, "step": 1258 }, { "epoch": 12.20722891566265, "grad_norm": 3.839351177215576, "learning_rate": 4.388834951456311e-05, "loss": 0.2957, "step": 1259 }, { "epoch": 12.216867469879517, "grad_norm": 8.327289581298828, "learning_rate": 4.388349514563107e-05, "loss": 0.3265, "step": 1260 }, { "epoch": 12.226506024096386, "grad_norm": 9.247779846191406, "learning_rate": 4.387864077669903e-05, "loss": 0.3915, "step": 1261 }, { "epoch": 12.236144578313253, "grad_norm": 3.1057024002075195, "learning_rate": 4.3873786407766995e-05, "loss": 0.2265, "step": 1262 }, { "epoch": 12.24578313253012, "grad_norm": 7.338630199432373, "learning_rate": 4.386893203883495e-05, "loss": 0.3379, "step": 1263 }, { "epoch": 12.255421686746988, "grad_norm": 5.677476406097412, "learning_rate": 4.386407766990292e-05, "loss": 0.3126, "step": 1264 }, { "epoch": 12.265060240963855, "grad_norm": 3.9681637287139893, "learning_rate": 4.3859223300970875e-05, "loss": 0.3033, "step": 1265 }, { "epoch": 12.274698795180722, "grad_norm": 1.793045997619629, "learning_rate": 4.385436893203883e-05, "loss": 0.2131, "step": 1266 }, { "epoch": 12.28433734939759, "grad_norm": 5.233617782592773, "learning_rate": 4.38495145631068e-05, "loss": 0.2801, "step": 1267 }, { "epoch": 12.293975903614458, "grad_norm": 5.36616325378418, "learning_rate": 4.3844660194174756e-05, "loss": 0.3421, "step": 1268 }, { "epoch": 12.303614457831324, "grad_norm": 3.5607750415802, "learning_rate": 4.383980582524272e-05, "loss": 0.2521, "step": 1269 }, { "epoch": 12.313253012048193, "grad_norm": 2.9687955379486084, "learning_rate": 4.383495145631068e-05, "loss": 0.3923, "step": 1270 }, { "epoch": 12.32289156626506, "grad_norm": 2.0053601264953613, "learning_rate": 4.383009708737864e-05, "loss": 0.1879, "step": 1271 }, { "epoch": 12.332530120481927, "grad_norm": 5.211769104003906, "learning_rate": 4.38252427184466e-05, "loss": 0.5357, "step": 1272 }, { "epoch": 12.342168674698796, "grad_norm": 4.1479949951171875, "learning_rate": 4.3820388349514565e-05, "loss": 0.3942, "step": 1273 }, { "epoch": 12.351807228915662, "grad_norm": 4.849013328552246, "learning_rate": 4.381553398058252e-05, "loss": 0.1944, "step": 1274 }, { "epoch": 12.36144578313253, "grad_norm": 2.256784439086914, "learning_rate": 4.381067961165049e-05, "loss": 0.2248, "step": 1275 }, { "epoch": 12.371084337349398, "grad_norm": 4.831053256988525, "learning_rate": 4.380582524271845e-05, "loss": 0.4613, "step": 1276 }, { "epoch": 12.380722891566265, "grad_norm": 14.168327331542969, "learning_rate": 4.380097087378641e-05, "loss": 0.3375, "step": 1277 }, { "epoch": 12.390361445783132, "grad_norm": 2.8806073665618896, "learning_rate": 4.3796116504854374e-05, "loss": 0.1364, "step": 1278 }, { "epoch": 12.4, "grad_norm": 5.25223445892334, "learning_rate": 4.379126213592233e-05, "loss": 0.3001, "step": 1279 }, { "epoch": 12.409638554216867, "grad_norm": 13.92043685913086, "learning_rate": 4.3786407766990296e-05, "loss": 0.6213, "step": 1280 }, { "epoch": 12.419277108433734, "grad_norm": 2.894956350326538, "learning_rate": 4.3781553398058254e-05, "loss": 0.2365, "step": 1281 }, { "epoch": 12.428915662650603, "grad_norm": 8.316850662231445, "learning_rate": 4.377669902912622e-05, "loss": 0.2702, "step": 1282 }, { "epoch": 12.43855421686747, "grad_norm": 31.428171157836914, "learning_rate": 4.3771844660194176e-05, "loss": 0.4505, "step": 1283 }, { "epoch": 12.448192771084337, "grad_norm": 2.826986312866211, "learning_rate": 4.376699029126214e-05, "loss": 0.2627, "step": 1284 }, { "epoch": 12.457831325301205, "grad_norm": 5.865420818328857, "learning_rate": 4.37621359223301e-05, "loss": 0.1464, "step": 1285 }, { "epoch": 12.467469879518072, "grad_norm": 3.3665599822998047, "learning_rate": 4.375728155339806e-05, "loss": 0.194, "step": 1286 }, { "epoch": 12.477108433734939, "grad_norm": 3.540764570236206, "learning_rate": 4.375242718446602e-05, "loss": 0.237, "step": 1287 }, { "epoch": 12.486746987951808, "grad_norm": 2.1545093059539795, "learning_rate": 4.3747572815533985e-05, "loss": 0.3583, "step": 1288 }, { "epoch": 12.496385542168674, "grad_norm": 10.785856246948242, "learning_rate": 4.374271844660194e-05, "loss": 0.4941, "step": 1289 }, { "epoch": 12.506024096385541, "grad_norm": 8.146798133850098, "learning_rate": 4.373786407766991e-05, "loss": 0.2916, "step": 1290 }, { "epoch": 12.51566265060241, "grad_norm": 17.572216033935547, "learning_rate": 4.3733009708737865e-05, "loss": 0.1509, "step": 1291 }, { "epoch": 12.525301204819277, "grad_norm": 13.1482515335083, "learning_rate": 4.372815533980582e-05, "loss": 0.3893, "step": 1292 }, { "epoch": 12.534939759036144, "grad_norm": 2.4846973419189453, "learning_rate": 4.372330097087379e-05, "loss": 0.2791, "step": 1293 }, { "epoch": 12.544578313253012, "grad_norm": 7.301854610443115, "learning_rate": 4.3718446601941745e-05, "loss": 0.3064, "step": 1294 }, { "epoch": 12.55421686746988, "grad_norm": 3.9696271419525146, "learning_rate": 4.371359223300971e-05, "loss": 0.2364, "step": 1295 }, { "epoch": 12.563855421686746, "grad_norm": 2.773906946182251, "learning_rate": 4.370873786407767e-05, "loss": 0.2797, "step": 1296 }, { "epoch": 12.573493975903615, "grad_norm": 4.325764179229736, "learning_rate": 4.370388349514563e-05, "loss": 0.2241, "step": 1297 }, { "epoch": 12.583132530120482, "grad_norm": 9.744473457336426, "learning_rate": 4.369902912621359e-05, "loss": 0.3045, "step": 1298 }, { "epoch": 12.592771084337349, "grad_norm": 1.9170900583267212, "learning_rate": 4.3694174757281554e-05, "loss": 0.2368, "step": 1299 }, { "epoch": 12.602409638554217, "grad_norm": 3.0737972259521484, "learning_rate": 4.368932038834951e-05, "loss": 0.2901, "step": 1300 }, { "epoch": 12.612048192771084, "grad_norm": 4.998560428619385, "learning_rate": 4.3684466019417483e-05, "loss": 0.3346, "step": 1301 }, { "epoch": 12.621686746987951, "grad_norm": 4.375753879547119, "learning_rate": 4.367961165048544e-05, "loss": 0.3255, "step": 1302 }, { "epoch": 12.63132530120482, "grad_norm": 5.446784973144531, "learning_rate": 4.36747572815534e-05, "loss": 0.1542, "step": 1303 }, { "epoch": 12.640963855421687, "grad_norm": 6.52820348739624, "learning_rate": 4.3669902912621364e-05, "loss": 0.249, "step": 1304 }, { "epoch": 12.650602409638553, "grad_norm": 15.424198150634766, "learning_rate": 4.366504854368932e-05, "loss": 0.3791, "step": 1305 }, { "epoch": 12.660240963855422, "grad_norm": 5.102181434631348, "learning_rate": 4.3660194174757286e-05, "loss": 0.1358, "step": 1306 }, { "epoch": 12.669879518072289, "grad_norm": 3.321359872817993, "learning_rate": 4.3655339805825244e-05, "loss": 0.1225, "step": 1307 }, { "epoch": 12.679518072289156, "grad_norm": 3.508632183074951, "learning_rate": 4.365048543689321e-05, "loss": 0.3062, "step": 1308 }, { "epoch": 12.689156626506024, "grad_norm": 6.3403120040893555, "learning_rate": 4.3645631067961166e-05, "loss": 0.2064, "step": 1309 }, { "epoch": 12.698795180722891, "grad_norm": 2.9074223041534424, "learning_rate": 4.364077669902913e-05, "loss": 0.2903, "step": 1310 }, { "epoch": 12.708433734939758, "grad_norm": 3.0327796936035156, "learning_rate": 4.363592233009709e-05, "loss": 0.3685, "step": 1311 }, { "epoch": 12.718072289156627, "grad_norm": 4.006925582885742, "learning_rate": 4.363106796116505e-05, "loss": 0.4058, "step": 1312 }, { "epoch": 12.727710843373494, "grad_norm": 2.325636863708496, "learning_rate": 4.362621359223301e-05, "loss": 0.2097, "step": 1313 }, { "epoch": 12.73734939759036, "grad_norm": 2.259495973587036, "learning_rate": 4.3621359223300975e-05, "loss": 0.2027, "step": 1314 }, { "epoch": 12.74698795180723, "grad_norm": 6.807149410247803, "learning_rate": 4.361650485436893e-05, "loss": 0.215, "step": 1315 }, { "epoch": 12.756626506024096, "grad_norm": 1.8460570573806763, "learning_rate": 4.36116504854369e-05, "loss": 0.2029, "step": 1316 }, { "epoch": 12.766265060240963, "grad_norm": 3.609384536743164, "learning_rate": 4.3606796116504855e-05, "loss": 0.207, "step": 1317 }, { "epoch": 12.775903614457832, "grad_norm": 2.4740359783172607, "learning_rate": 4.360194174757282e-05, "loss": 0.1533, "step": 1318 }, { "epoch": 12.785542168674699, "grad_norm": 2.8870456218719482, "learning_rate": 4.359708737864078e-05, "loss": 0.2693, "step": 1319 }, { "epoch": 12.795180722891565, "grad_norm": 8.586925506591797, "learning_rate": 4.3592233009708735e-05, "loss": 0.2596, "step": 1320 }, { "epoch": 12.804819277108434, "grad_norm": 3.9612009525299072, "learning_rate": 4.35873786407767e-05, "loss": 0.3641, "step": 1321 }, { "epoch": 12.814457831325301, "grad_norm": 8.717135429382324, "learning_rate": 4.358252427184466e-05, "loss": 0.3367, "step": 1322 }, { "epoch": 12.824096385542168, "grad_norm": 5.400077819824219, "learning_rate": 4.357766990291262e-05, "loss": 0.1123, "step": 1323 }, { "epoch": 12.833734939759037, "grad_norm": 30.755510330200195, "learning_rate": 4.357281553398058e-05, "loss": 0.3005, "step": 1324 }, { "epoch": 12.843373493975903, "grad_norm": 9.9277982711792, "learning_rate": 4.3567961165048544e-05, "loss": 0.3448, "step": 1325 }, { "epoch": 12.85301204819277, "grad_norm": 2.25848388671875, "learning_rate": 4.356310679611651e-05, "loss": 0.2606, "step": 1326 }, { "epoch": 12.862650602409639, "grad_norm": 2.045881986618042, "learning_rate": 4.355825242718447e-05, "loss": 0.2482, "step": 1327 }, { "epoch": 12.872289156626506, "grad_norm": 3.4170026779174805, "learning_rate": 4.355339805825243e-05, "loss": 0.3212, "step": 1328 }, { "epoch": 12.881927710843373, "grad_norm": 5.730910778045654, "learning_rate": 4.3548543689320396e-05, "loss": 0.3698, "step": 1329 }, { "epoch": 12.891566265060241, "grad_norm": 8.608059883117676, "learning_rate": 4.354368932038835e-05, "loss": 0.4109, "step": 1330 }, { "epoch": 12.901204819277108, "grad_norm": 5.007160186767578, "learning_rate": 4.353883495145631e-05, "loss": 0.3317, "step": 1331 }, { "epoch": 12.910843373493975, "grad_norm": 4.3434529304504395, "learning_rate": 4.3533980582524276e-05, "loss": 0.2648, "step": 1332 }, { "epoch": 12.920481927710844, "grad_norm": 9.07950210571289, "learning_rate": 4.352912621359223e-05, "loss": 0.194, "step": 1333 }, { "epoch": 12.93012048192771, "grad_norm": 2.944021224975586, "learning_rate": 4.35242718446602e-05, "loss": 0.2791, "step": 1334 }, { "epoch": 12.939759036144578, "grad_norm": 1.9710150957107544, "learning_rate": 4.3519417475728156e-05, "loss": 0.2321, "step": 1335 }, { "epoch": 12.949397590361446, "grad_norm": 10.41323471069336, "learning_rate": 4.351456310679612e-05, "loss": 0.2692, "step": 1336 }, { "epoch": 12.959036144578313, "grad_norm": 9.59897518157959, "learning_rate": 4.350970873786408e-05, "loss": 0.3333, "step": 1337 }, { "epoch": 12.96867469879518, "grad_norm": 3.026297092437744, "learning_rate": 4.350485436893204e-05, "loss": 0.2474, "step": 1338 }, { "epoch": 12.978313253012049, "grad_norm": 2.554626941680908, "learning_rate": 4.35e-05, "loss": 0.1976, "step": 1339 }, { "epoch": 12.987951807228916, "grad_norm": 4.550163269042969, "learning_rate": 4.3495145631067965e-05, "loss": 0.2306, "step": 1340 }, { "epoch": 13.003614457831326, "grad_norm": 3.7305850982666016, "learning_rate": 4.349029126213592e-05, "loss": 0.3406, "step": 1341 }, { "epoch": 13.013253012048192, "grad_norm": 7.844744682312012, "learning_rate": 4.348543689320389e-05, "loss": 0.429, "step": 1342 }, { "epoch": 13.022891566265061, "grad_norm": 11.641255378723145, "learning_rate": 4.3480582524271845e-05, "loss": 0.5333, "step": 1343 }, { "epoch": 13.032530120481928, "grad_norm": 5.650651454925537, "learning_rate": 4.347572815533981e-05, "loss": 0.2823, "step": 1344 }, { "epoch": 13.042168674698795, "grad_norm": 13.35332202911377, "learning_rate": 4.347087378640777e-05, "loss": 0.224, "step": 1345 }, { "epoch": 13.051807228915663, "grad_norm": 6.7152557373046875, "learning_rate": 4.346601941747573e-05, "loss": 0.4139, "step": 1346 }, { "epoch": 13.06144578313253, "grad_norm": 2.4846367835998535, "learning_rate": 4.346116504854369e-05, "loss": 0.2042, "step": 1347 }, { "epoch": 13.071084337349397, "grad_norm": 9.070926666259766, "learning_rate": 4.345631067961165e-05, "loss": 0.2083, "step": 1348 }, { "epoch": 13.080722891566266, "grad_norm": 3.817695379257202, "learning_rate": 4.345145631067961e-05, "loss": 0.2139, "step": 1349 }, { "epoch": 13.090361445783133, "grad_norm": 4.302340030670166, "learning_rate": 4.344660194174757e-05, "loss": 0.3006, "step": 1350 }, { "epoch": 13.1, "grad_norm": 2.713517427444458, "learning_rate": 4.344174757281554e-05, "loss": 0.4301, "step": 1351 }, { "epoch": 13.109638554216868, "grad_norm": 8.4620361328125, "learning_rate": 4.34368932038835e-05, "loss": 0.2845, "step": 1352 }, { "epoch": 13.119277108433735, "grad_norm": 6.738032341003418, "learning_rate": 4.343203883495146e-05, "loss": 0.3187, "step": 1353 }, { "epoch": 13.128915662650602, "grad_norm": 5.060258865356445, "learning_rate": 4.342718446601942e-05, "loss": 0.4145, "step": 1354 }, { "epoch": 13.13855421686747, "grad_norm": 6.7430853843688965, "learning_rate": 4.3422330097087385e-05, "loss": 0.2609, "step": 1355 }, { "epoch": 13.148192771084338, "grad_norm": 17.81772804260254, "learning_rate": 4.341747572815534e-05, "loss": 0.3161, "step": 1356 }, { "epoch": 13.157831325301204, "grad_norm": 12.32593059539795, "learning_rate": 4.341262135922331e-05, "loss": 0.3373, "step": 1357 }, { "epoch": 13.167469879518073, "grad_norm": 17.77837562561035, "learning_rate": 4.3407766990291265e-05, "loss": 0.4215, "step": 1358 }, { "epoch": 13.17710843373494, "grad_norm": 4.500743865966797, "learning_rate": 4.340291262135922e-05, "loss": 0.2563, "step": 1359 }, { "epoch": 13.186746987951807, "grad_norm": 8.256168365478516, "learning_rate": 4.339805825242719e-05, "loss": 0.3384, "step": 1360 }, { "epoch": 13.196385542168676, "grad_norm": 2.8174636363983154, "learning_rate": 4.3393203883495145e-05, "loss": 0.0782, "step": 1361 }, { "epoch": 13.206024096385542, "grad_norm": 8.05516242980957, "learning_rate": 4.338834951456311e-05, "loss": 0.3922, "step": 1362 }, { "epoch": 13.21566265060241, "grad_norm": 2.4557816982269287, "learning_rate": 4.338349514563107e-05, "loss": 0.1363, "step": 1363 }, { "epoch": 13.225301204819278, "grad_norm": 3.921882390975952, "learning_rate": 4.337864077669903e-05, "loss": 0.4025, "step": 1364 }, { "epoch": 13.234939759036145, "grad_norm": 7.104881286621094, "learning_rate": 4.337378640776699e-05, "loss": 0.3607, "step": 1365 }, { "epoch": 13.244578313253012, "grad_norm": 5.497311592102051, "learning_rate": 4.3368932038834955e-05, "loss": 0.3543, "step": 1366 }, { "epoch": 13.25421686746988, "grad_norm": 2.80433988571167, "learning_rate": 4.336407766990291e-05, "loss": 0.3333, "step": 1367 }, { "epoch": 13.263855421686747, "grad_norm": 3.9636623859405518, "learning_rate": 4.335922330097088e-05, "loss": 0.6112, "step": 1368 }, { "epoch": 13.273493975903614, "grad_norm": 7.003512382507324, "learning_rate": 4.3354368932038835e-05, "loss": 0.2917, "step": 1369 }, { "epoch": 13.283132530120483, "grad_norm": 5.249645233154297, "learning_rate": 4.33495145631068e-05, "loss": 0.3155, "step": 1370 }, { "epoch": 13.29277108433735, "grad_norm": 10.39197063446045, "learning_rate": 4.334466019417476e-05, "loss": 0.2892, "step": 1371 }, { "epoch": 13.302409638554217, "grad_norm": 12.477277755737305, "learning_rate": 4.333980582524272e-05, "loss": 0.3169, "step": 1372 }, { "epoch": 13.312048192771085, "grad_norm": 10.184113502502441, "learning_rate": 4.333495145631068e-05, "loss": 0.4342, "step": 1373 }, { "epoch": 13.321686746987952, "grad_norm": 2.4871561527252197, "learning_rate": 4.3330097087378644e-05, "loss": 0.1891, "step": 1374 }, { "epoch": 13.331325301204819, "grad_norm": 3.5629920959472656, "learning_rate": 4.33252427184466e-05, "loss": 0.1755, "step": 1375 }, { "epoch": 13.340963855421688, "grad_norm": 5.192362308502197, "learning_rate": 4.3320388349514566e-05, "loss": 0.4898, "step": 1376 }, { "epoch": 13.350602409638554, "grad_norm": 4.054602146148682, "learning_rate": 4.331553398058253e-05, "loss": 0.3833, "step": 1377 }, { "epoch": 13.360240963855421, "grad_norm": 13.228586196899414, "learning_rate": 4.331067961165049e-05, "loss": 0.3399, "step": 1378 }, { "epoch": 13.369879518072288, "grad_norm": 3.23008131980896, "learning_rate": 4.330582524271845e-05, "loss": 0.3613, "step": 1379 }, { "epoch": 13.379518072289157, "grad_norm": 4.33438777923584, "learning_rate": 4.330097087378641e-05, "loss": 0.4587, "step": 1380 }, { "epoch": 13.389156626506024, "grad_norm": 2.4035000801086426, "learning_rate": 4.3296116504854375e-05, "loss": 0.1659, "step": 1381 }, { "epoch": 13.398795180722892, "grad_norm": 2.4458730220794678, "learning_rate": 4.329126213592233e-05, "loss": 0.1844, "step": 1382 }, { "epoch": 13.40843373493976, "grad_norm": 4.194186210632324, "learning_rate": 4.32864077669903e-05, "loss": 0.3445, "step": 1383 }, { "epoch": 13.418072289156626, "grad_norm": 9.71375846862793, "learning_rate": 4.3281553398058255e-05, "loss": 0.2524, "step": 1384 }, { "epoch": 13.427710843373493, "grad_norm": 4.196449279785156, "learning_rate": 4.327669902912622e-05, "loss": 0.3745, "step": 1385 }, { "epoch": 13.437349397590362, "grad_norm": 5.910686016082764, "learning_rate": 4.327184466019418e-05, "loss": 0.3258, "step": 1386 }, { "epoch": 13.446987951807229, "grad_norm": 6.015864372253418, "learning_rate": 4.3266990291262135e-05, "loss": 0.4835, "step": 1387 }, { "epoch": 13.456626506024097, "grad_norm": 11.996626853942871, "learning_rate": 4.32621359223301e-05, "loss": 0.2786, "step": 1388 }, { "epoch": 13.466265060240964, "grad_norm": 3.0677855014801025, "learning_rate": 4.325728155339806e-05, "loss": 0.1158, "step": 1389 }, { "epoch": 13.475903614457831, "grad_norm": 4.202532768249512, "learning_rate": 4.325242718446602e-05, "loss": 0.2711, "step": 1390 }, { "epoch": 13.485542168674698, "grad_norm": 9.420111656188965, "learning_rate": 4.324757281553398e-05, "loss": 0.2674, "step": 1391 }, { "epoch": 13.495180722891567, "grad_norm": 2.3857035636901855, "learning_rate": 4.3242718446601944e-05, "loss": 0.1481, "step": 1392 }, { "epoch": 13.504819277108433, "grad_norm": 2.866504669189453, "learning_rate": 4.32378640776699e-05, "loss": 0.3635, "step": 1393 }, { "epoch": 13.514457831325302, "grad_norm": 5.218486309051514, "learning_rate": 4.323300970873787e-05, "loss": 0.3498, "step": 1394 }, { "epoch": 13.524096385542169, "grad_norm": 13.200650215148926, "learning_rate": 4.3228155339805824e-05, "loss": 0.2589, "step": 1395 }, { "epoch": 13.533734939759036, "grad_norm": 2.3951761722564697, "learning_rate": 4.322330097087379e-05, "loss": 0.2718, "step": 1396 }, { "epoch": 13.543373493975903, "grad_norm": 3.044778347015381, "learning_rate": 4.321844660194175e-05, "loss": 0.3053, "step": 1397 }, { "epoch": 13.553012048192771, "grad_norm": 3.747107982635498, "learning_rate": 4.321359223300971e-05, "loss": 0.333, "step": 1398 }, { "epoch": 13.562650602409638, "grad_norm": 3.657884120941162, "learning_rate": 4.320873786407767e-05, "loss": 0.1955, "step": 1399 }, { "epoch": 13.572289156626507, "grad_norm": 2.447831392288208, "learning_rate": 4.3203883495145634e-05, "loss": 0.2546, "step": 1400 }, { "epoch": 13.581927710843374, "grad_norm": 8.210075378417969, "learning_rate": 4.319902912621359e-05, "loss": 0.4162, "step": 1401 }, { "epoch": 13.59156626506024, "grad_norm": 3.082150459289551, "learning_rate": 4.3194174757281556e-05, "loss": 0.3837, "step": 1402 }, { "epoch": 13.601204819277108, "grad_norm": 4.964865684509277, "learning_rate": 4.318932038834952e-05, "loss": 0.2982, "step": 1403 }, { "epoch": 13.610843373493976, "grad_norm": 8.222696304321289, "learning_rate": 4.318446601941748e-05, "loss": 0.1776, "step": 1404 }, { "epoch": 13.620481927710843, "grad_norm": 3.5699896812438965, "learning_rate": 4.317961165048544e-05, "loss": 0.2317, "step": 1405 }, { "epoch": 13.630120481927712, "grad_norm": 5.445492267608643, "learning_rate": 4.31747572815534e-05, "loss": 0.1951, "step": 1406 }, { "epoch": 13.639759036144579, "grad_norm": 9.57612419128418, "learning_rate": 4.3169902912621365e-05, "loss": 0.481, "step": 1407 }, { "epoch": 13.649397590361446, "grad_norm": 11.63134765625, "learning_rate": 4.316504854368932e-05, "loss": 0.2349, "step": 1408 }, { "epoch": 13.659036144578312, "grad_norm": 9.36464786529541, "learning_rate": 4.316019417475729e-05, "loss": 0.316, "step": 1409 }, { "epoch": 13.668674698795181, "grad_norm": 5.0132622718811035, "learning_rate": 4.3155339805825245e-05, "loss": 0.4226, "step": 1410 }, { "epoch": 13.678313253012048, "grad_norm": 3.2228686809539795, "learning_rate": 4.315048543689321e-05, "loss": 0.2854, "step": 1411 }, { "epoch": 13.687951807228917, "grad_norm": 2.083735227584839, "learning_rate": 4.314563106796117e-05, "loss": 0.2634, "step": 1412 }, { "epoch": 13.697590361445783, "grad_norm": 7.852909088134766, "learning_rate": 4.314077669902913e-05, "loss": 0.3757, "step": 1413 }, { "epoch": 13.70722891566265, "grad_norm": 4.26548433303833, "learning_rate": 4.313592233009709e-05, "loss": 0.3566, "step": 1414 }, { "epoch": 13.716867469879517, "grad_norm": 2.4416444301605225, "learning_rate": 4.313106796116505e-05, "loss": 0.2179, "step": 1415 }, { "epoch": 13.726506024096386, "grad_norm": 14.823098182678223, "learning_rate": 4.312621359223301e-05, "loss": 0.2581, "step": 1416 }, { "epoch": 13.736144578313253, "grad_norm": 5.480705738067627, "learning_rate": 4.312135922330097e-05, "loss": 0.3131, "step": 1417 }, { "epoch": 13.745783132530121, "grad_norm": 7.294772624969482, "learning_rate": 4.3116504854368934e-05, "loss": 0.2749, "step": 1418 }, { "epoch": 13.755421686746988, "grad_norm": 4.060771942138672, "learning_rate": 4.311165048543689e-05, "loss": 0.3987, "step": 1419 }, { "epoch": 13.765060240963855, "grad_norm": 15.334219932556152, "learning_rate": 4.3106796116504856e-05, "loss": 0.6581, "step": 1420 }, { "epoch": 13.774698795180722, "grad_norm": 13.097867012023926, "learning_rate": 4.3101941747572814e-05, "loss": 0.2905, "step": 1421 }, { "epoch": 13.78433734939759, "grad_norm": 4.444243431091309, "learning_rate": 4.309708737864078e-05, "loss": 0.2512, "step": 1422 }, { "epoch": 13.793975903614458, "grad_norm": 4.158855438232422, "learning_rate": 4.3092233009708736e-05, "loss": 0.139, "step": 1423 }, { "epoch": 13.803614457831326, "grad_norm": 2.0029220581054688, "learning_rate": 4.30873786407767e-05, "loss": 0.194, "step": 1424 }, { "epoch": 13.813253012048193, "grad_norm": 6.380977630615234, "learning_rate": 4.308252427184466e-05, "loss": 0.2617, "step": 1425 }, { "epoch": 13.82289156626506, "grad_norm": 3.8463237285614014, "learning_rate": 4.307766990291262e-05, "loss": 0.1418, "step": 1426 }, { "epoch": 13.832530120481927, "grad_norm": 9.954597473144531, "learning_rate": 4.307281553398059e-05, "loss": 0.3461, "step": 1427 }, { "epoch": 13.842168674698796, "grad_norm": 6.162082672119141, "learning_rate": 4.3067961165048546e-05, "loss": 0.2402, "step": 1428 }, { "epoch": 13.851807228915662, "grad_norm": 9.83614730834961, "learning_rate": 4.306310679611651e-05, "loss": 0.416, "step": 1429 }, { "epoch": 13.861445783132531, "grad_norm": 4.037912845611572, "learning_rate": 4.305825242718447e-05, "loss": 0.2553, "step": 1430 }, { "epoch": 13.871084337349398, "grad_norm": 4.144782066345215, "learning_rate": 4.305339805825243e-05, "loss": 0.1745, "step": 1431 }, { "epoch": 13.880722891566265, "grad_norm": 6.474467754364014, "learning_rate": 4.304854368932039e-05, "loss": 0.2617, "step": 1432 }, { "epoch": 13.890361445783132, "grad_norm": 19.684436798095703, "learning_rate": 4.3043689320388355e-05, "loss": 0.3961, "step": 1433 }, { "epoch": 13.9, "grad_norm": 3.5968828201293945, "learning_rate": 4.303883495145631e-05, "loss": 0.2019, "step": 1434 }, { "epoch": 13.909638554216867, "grad_norm": 11.741619110107422, "learning_rate": 4.303398058252428e-05, "loss": 0.2758, "step": 1435 }, { "epoch": 13.919277108433734, "grad_norm": 20.36610984802246, "learning_rate": 4.3029126213592235e-05, "loss": 0.2442, "step": 1436 }, { "epoch": 13.928915662650603, "grad_norm": 2.102804183959961, "learning_rate": 4.30242718446602e-05, "loss": 0.2224, "step": 1437 }, { "epoch": 13.93855421686747, "grad_norm": 5.358251094818115, "learning_rate": 4.301941747572816e-05, "loss": 0.2289, "step": 1438 }, { "epoch": 13.948192771084337, "grad_norm": 14.420398712158203, "learning_rate": 4.301456310679612e-05, "loss": 0.3149, "step": 1439 }, { "epoch": 13.957831325301205, "grad_norm": 35.72264099121094, "learning_rate": 4.300970873786408e-05, "loss": 0.3837, "step": 1440 }, { "epoch": 13.967469879518072, "grad_norm": 10.644685745239258, "learning_rate": 4.300485436893204e-05, "loss": 0.3367, "step": 1441 }, { "epoch": 13.977108433734939, "grad_norm": 9.777430534362793, "learning_rate": 4.3e-05, "loss": 0.3814, "step": 1442 }, { "epoch": 13.986746987951808, "grad_norm": 6.151583194732666, "learning_rate": 4.299514563106796e-05, "loss": 0.388, "step": 1443 }, { "epoch": 14.002409638554218, "grad_norm": 3.4281692504882812, "learning_rate": 4.2990291262135924e-05, "loss": 0.1362, "step": 1444 }, { "epoch": 14.012048192771084, "grad_norm": 5.338442325592041, "learning_rate": 4.298543689320388e-05, "loss": 0.263, "step": 1445 }, { "epoch": 14.021686746987951, "grad_norm": 5.821629524230957, "learning_rate": 4.2980582524271846e-05, "loss": 0.2871, "step": 1446 }, { "epoch": 14.03132530120482, "grad_norm": 26.95039939880371, "learning_rate": 4.2975728155339804e-05, "loss": 0.5443, "step": 1447 }, { "epoch": 14.040963855421687, "grad_norm": 2.644512414932251, "learning_rate": 4.297087378640777e-05, "loss": 0.1372, "step": 1448 }, { "epoch": 14.050602409638554, "grad_norm": 5.198383331298828, "learning_rate": 4.2966019417475726e-05, "loss": 0.3511, "step": 1449 }, { "epoch": 14.060240963855422, "grad_norm": 5.122847557067871, "learning_rate": 4.296116504854369e-05, "loss": 0.3752, "step": 1450 }, { "epoch": 14.06987951807229, "grad_norm": 6.1802897453308105, "learning_rate": 4.295631067961165e-05, "loss": 0.5573, "step": 1451 }, { "epoch": 14.079518072289156, "grad_norm": 7.375117778778076, "learning_rate": 4.295145631067961e-05, "loss": 0.5266, "step": 1452 }, { "epoch": 14.089156626506025, "grad_norm": 10.417753219604492, "learning_rate": 4.294660194174758e-05, "loss": 0.2486, "step": 1453 }, { "epoch": 14.098795180722892, "grad_norm": 5.1110992431640625, "learning_rate": 4.2941747572815535e-05, "loss": 0.3001, "step": 1454 }, { "epoch": 14.108433734939759, "grad_norm": 3.193697452545166, "learning_rate": 4.29368932038835e-05, "loss": 0.2004, "step": 1455 }, { "epoch": 14.118072289156627, "grad_norm": 11.450945854187012, "learning_rate": 4.293203883495146e-05, "loss": 0.3471, "step": 1456 }, { "epoch": 14.127710843373494, "grad_norm": 3.141791820526123, "learning_rate": 4.292718446601942e-05, "loss": 0.2821, "step": 1457 }, { "epoch": 14.137349397590361, "grad_norm": 5.425412654876709, "learning_rate": 4.292233009708738e-05, "loss": 0.3824, "step": 1458 }, { "epoch": 14.14698795180723, "grad_norm": 2.586618185043335, "learning_rate": 4.2917475728155345e-05, "loss": 0.1305, "step": 1459 }, { "epoch": 14.156626506024097, "grad_norm": 5.474615097045898, "learning_rate": 4.29126213592233e-05, "loss": 0.2127, "step": 1460 }, { "epoch": 14.166265060240963, "grad_norm": 9.757685661315918, "learning_rate": 4.290776699029127e-05, "loss": 0.5229, "step": 1461 }, { "epoch": 14.175903614457832, "grad_norm": 2.6775121688842773, "learning_rate": 4.2902912621359225e-05, "loss": 0.0692, "step": 1462 }, { "epoch": 14.185542168674699, "grad_norm": 5.288668155670166, "learning_rate": 4.289805825242719e-05, "loss": 0.3269, "step": 1463 }, { "epoch": 14.195180722891566, "grad_norm": 2.2866859436035156, "learning_rate": 4.289320388349515e-05, "loss": 0.2555, "step": 1464 }, { "epoch": 14.204819277108435, "grad_norm": 3.526240587234497, "learning_rate": 4.288834951456311e-05, "loss": 0.2723, "step": 1465 }, { "epoch": 14.214457831325301, "grad_norm": 3.7782790660858154, "learning_rate": 4.288349514563107e-05, "loss": 0.3962, "step": 1466 }, { "epoch": 14.224096385542168, "grad_norm": 2.9260828495025635, "learning_rate": 4.2878640776699034e-05, "loss": 0.1445, "step": 1467 }, { "epoch": 14.233734939759037, "grad_norm": 6.08974552154541, "learning_rate": 4.287378640776699e-05, "loss": 0.2098, "step": 1468 }, { "epoch": 14.243373493975904, "grad_norm": 2.691927433013916, "learning_rate": 4.286893203883495e-05, "loss": 0.3423, "step": 1469 }, { "epoch": 14.25301204819277, "grad_norm": 4.065399169921875, "learning_rate": 4.2864077669902914e-05, "loss": 0.3747, "step": 1470 }, { "epoch": 14.26265060240964, "grad_norm": 6.444619178771973, "learning_rate": 4.285922330097087e-05, "loss": 0.219, "step": 1471 }, { "epoch": 14.272289156626506, "grad_norm": 10.658798217773438, "learning_rate": 4.2854368932038836e-05, "loss": 0.2642, "step": 1472 }, { "epoch": 14.281927710843373, "grad_norm": 2.0417325496673584, "learning_rate": 4.2849514563106794e-05, "loss": 0.2751, "step": 1473 }, { "epoch": 14.291566265060242, "grad_norm": 4.441100597381592, "learning_rate": 4.284466019417476e-05, "loss": 0.394, "step": 1474 }, { "epoch": 14.301204819277109, "grad_norm": 5.742561340332031, "learning_rate": 4.2839805825242716e-05, "loss": 0.5943, "step": 1475 }, { "epoch": 14.310843373493976, "grad_norm": 5.35231351852417, "learning_rate": 4.283495145631068e-05, "loss": 0.4055, "step": 1476 }, { "epoch": 14.320481927710844, "grad_norm": 3.2872989177703857, "learning_rate": 4.2830097087378645e-05, "loss": 0.1635, "step": 1477 }, { "epoch": 14.330120481927711, "grad_norm": 6.1835455894470215, "learning_rate": 4.282524271844661e-05, "loss": 0.4177, "step": 1478 }, { "epoch": 14.339759036144578, "grad_norm": 7.017383575439453, "learning_rate": 4.282038834951457e-05, "loss": 0.4425, "step": 1479 }, { "epoch": 14.349397590361447, "grad_norm": 3.8251326084136963, "learning_rate": 4.2815533980582525e-05, "loss": 0.2658, "step": 1480 }, { "epoch": 14.359036144578313, "grad_norm": 3.6814568042755127, "learning_rate": 4.281067961165049e-05, "loss": 0.2546, "step": 1481 }, { "epoch": 14.36867469879518, "grad_norm": 7.109790325164795, "learning_rate": 4.280582524271845e-05, "loss": 0.2109, "step": 1482 }, { "epoch": 14.378313253012049, "grad_norm": 2.8891372680664062, "learning_rate": 4.280097087378641e-05, "loss": 0.1552, "step": 1483 }, { "epoch": 14.387951807228916, "grad_norm": 4.276645183563232, "learning_rate": 4.279611650485437e-05, "loss": 0.2687, "step": 1484 }, { "epoch": 14.397590361445783, "grad_norm": 13.888004302978516, "learning_rate": 4.2791262135922334e-05, "loss": 0.4926, "step": 1485 }, { "epoch": 14.407228915662651, "grad_norm": 28.122610092163086, "learning_rate": 4.278640776699029e-05, "loss": 0.2179, "step": 1486 }, { "epoch": 14.416867469879518, "grad_norm": 15.117776870727539, "learning_rate": 4.2781553398058257e-05, "loss": 0.3391, "step": 1487 }, { "epoch": 14.426506024096385, "grad_norm": 2.3811850547790527, "learning_rate": 4.2776699029126214e-05, "loss": 0.1391, "step": 1488 }, { "epoch": 14.436144578313254, "grad_norm": 5.300633907318115, "learning_rate": 4.277184466019418e-05, "loss": 0.2061, "step": 1489 }, { "epoch": 14.44578313253012, "grad_norm": 4.325374126434326, "learning_rate": 4.276699029126214e-05, "loss": 0.378, "step": 1490 }, { "epoch": 14.455421686746988, "grad_norm": 1.3981537818908691, "learning_rate": 4.27621359223301e-05, "loss": 0.098, "step": 1491 }, { "epoch": 14.465060240963856, "grad_norm": 3.6983368396759033, "learning_rate": 4.275728155339806e-05, "loss": 0.2519, "step": 1492 }, { "epoch": 14.474698795180723, "grad_norm": 4.259254455566406, "learning_rate": 4.2752427184466023e-05, "loss": 0.3343, "step": 1493 }, { "epoch": 14.48433734939759, "grad_norm": 2.44279146194458, "learning_rate": 4.274757281553398e-05, "loss": 0.1417, "step": 1494 }, { "epoch": 14.493975903614459, "grad_norm": 6.093776226043701, "learning_rate": 4.2742718446601946e-05, "loss": 0.1596, "step": 1495 }, { "epoch": 14.503614457831326, "grad_norm": 4.491542339324951, "learning_rate": 4.2737864077669904e-05, "loss": 0.1783, "step": 1496 }, { "epoch": 14.513253012048192, "grad_norm": 8.904022216796875, "learning_rate": 4.273300970873786e-05, "loss": 0.3855, "step": 1497 }, { "epoch": 14.522891566265061, "grad_norm": 8.812294006347656, "learning_rate": 4.2728155339805826e-05, "loss": 0.2674, "step": 1498 }, { "epoch": 14.532530120481928, "grad_norm": 4.642425060272217, "learning_rate": 4.2723300970873784e-05, "loss": 0.4054, "step": 1499 }, { "epoch": 14.542168674698795, "grad_norm": 10.76279354095459, "learning_rate": 4.271844660194175e-05, "loss": 0.4289, "step": 1500 }, { "epoch": 14.551807228915663, "grad_norm": 5.029416561126709, "learning_rate": 4.2713592233009706e-05, "loss": 0.4274, "step": 1501 }, { "epoch": 14.56144578313253, "grad_norm": 3.781352996826172, "learning_rate": 4.270873786407767e-05, "loss": 0.3205, "step": 1502 }, { "epoch": 14.571084337349397, "grad_norm": 3.4454565048217773, "learning_rate": 4.2703883495145635e-05, "loss": 0.3085, "step": 1503 }, { "epoch": 14.580722891566266, "grad_norm": 7.76377534866333, "learning_rate": 4.26990291262136e-05, "loss": 0.4729, "step": 1504 }, { "epoch": 14.590361445783133, "grad_norm": 3.4630377292633057, "learning_rate": 4.269417475728156e-05, "loss": 0.1602, "step": 1505 }, { "epoch": 14.6, "grad_norm": 11.581520080566406, "learning_rate": 4.268932038834952e-05, "loss": 0.3091, "step": 1506 }, { "epoch": 14.609638554216868, "grad_norm": 3.0865917205810547, "learning_rate": 4.268446601941748e-05, "loss": 0.2422, "step": 1507 }, { "epoch": 14.619277108433735, "grad_norm": 5.15256929397583, "learning_rate": 4.267961165048544e-05, "loss": 0.3617, "step": 1508 }, { "epoch": 14.628915662650602, "grad_norm": 7.044839382171631, "learning_rate": 4.26747572815534e-05, "loss": 0.4394, "step": 1509 }, { "epoch": 14.638554216867469, "grad_norm": 2.155980110168457, "learning_rate": 4.266990291262136e-05, "loss": 0.2134, "step": 1510 }, { "epoch": 14.648192771084338, "grad_norm": 3.8976802825927734, "learning_rate": 4.2665048543689324e-05, "loss": 0.2864, "step": 1511 }, { "epoch": 14.657831325301204, "grad_norm": 4.34669303894043, "learning_rate": 4.266019417475728e-05, "loss": 0.2936, "step": 1512 }, { "epoch": 14.667469879518073, "grad_norm": 5.971386432647705, "learning_rate": 4.2655339805825246e-05, "loss": 0.2784, "step": 1513 }, { "epoch": 14.67710843373494, "grad_norm": 4.643065929412842, "learning_rate": 4.2650485436893204e-05, "loss": 0.2589, "step": 1514 }, { "epoch": 14.686746987951807, "grad_norm": 4.94956111907959, "learning_rate": 4.264563106796117e-05, "loss": 0.3987, "step": 1515 }, { "epoch": 14.696385542168674, "grad_norm": 2.8228797912597656, "learning_rate": 4.2640776699029126e-05, "loss": 0.2614, "step": 1516 }, { "epoch": 14.706024096385542, "grad_norm": 2.176898956298828, "learning_rate": 4.263592233009709e-05, "loss": 0.219, "step": 1517 }, { "epoch": 14.71566265060241, "grad_norm": 3.131605386734009, "learning_rate": 4.263106796116505e-05, "loss": 0.2027, "step": 1518 }, { "epoch": 14.725301204819278, "grad_norm": 5.324614524841309, "learning_rate": 4.262621359223301e-05, "loss": 0.3442, "step": 1519 }, { "epoch": 14.734939759036145, "grad_norm": 2.3831229209899902, "learning_rate": 4.262135922330097e-05, "loss": 0.3106, "step": 1520 }, { "epoch": 14.744578313253012, "grad_norm": 8.97327995300293, "learning_rate": 4.2616504854368936e-05, "loss": 0.5655, "step": 1521 }, { "epoch": 14.754216867469879, "grad_norm": 1.9136532545089722, "learning_rate": 4.261165048543689e-05, "loss": 0.0689, "step": 1522 }, { "epoch": 14.763855421686747, "grad_norm": 7.9613518714904785, "learning_rate": 4.260679611650486e-05, "loss": 0.3037, "step": 1523 }, { "epoch": 14.773493975903614, "grad_norm": 2.407546281814575, "learning_rate": 4.2601941747572816e-05, "loss": 0.2269, "step": 1524 }, { "epoch": 14.783132530120483, "grad_norm": 5.359671115875244, "learning_rate": 4.259708737864077e-05, "loss": 0.2708, "step": 1525 }, { "epoch": 14.79277108433735, "grad_norm": 1.5903102159500122, "learning_rate": 4.259223300970874e-05, "loss": 0.2057, "step": 1526 }, { "epoch": 14.802409638554217, "grad_norm": 2.4708046913146973, "learning_rate": 4.2587378640776696e-05, "loss": 0.2637, "step": 1527 }, { "epoch": 14.812048192771083, "grad_norm": 4.286201000213623, "learning_rate": 4.258252427184467e-05, "loss": 0.3768, "step": 1528 }, { "epoch": 14.821686746987952, "grad_norm": 4.10324239730835, "learning_rate": 4.2577669902912625e-05, "loss": 0.2374, "step": 1529 }, { "epoch": 14.831325301204819, "grad_norm": 5.307155609130859, "learning_rate": 4.257281553398059e-05, "loss": 0.1816, "step": 1530 }, { "epoch": 14.840963855421688, "grad_norm": 4.727719306945801, "learning_rate": 4.256796116504855e-05, "loss": 0.304, "step": 1531 }, { "epoch": 14.850602409638554, "grad_norm": 1.9019590616226196, "learning_rate": 4.256310679611651e-05, "loss": 0.3329, "step": 1532 }, { "epoch": 14.860240963855421, "grad_norm": 7.589446544647217, "learning_rate": 4.255825242718447e-05, "loss": 0.1858, "step": 1533 }, { "epoch": 14.869879518072288, "grad_norm": 7.763722896575928, "learning_rate": 4.2553398058252434e-05, "loss": 0.3266, "step": 1534 }, { "epoch": 14.879518072289157, "grad_norm": 3.6566760540008545, "learning_rate": 4.254854368932039e-05, "loss": 0.3697, "step": 1535 }, { "epoch": 14.889156626506024, "grad_norm": 2.521949052810669, "learning_rate": 4.254368932038835e-05, "loss": 0.2708, "step": 1536 }, { "epoch": 14.898795180722892, "grad_norm": 6.145908832550049, "learning_rate": 4.2538834951456314e-05, "loss": 0.2289, "step": 1537 }, { "epoch": 14.90843373493976, "grad_norm": 1.7557132244110107, "learning_rate": 4.253398058252427e-05, "loss": 0.26, "step": 1538 }, { "epoch": 14.918072289156626, "grad_norm": 3.2025272846221924, "learning_rate": 4.2529126213592236e-05, "loss": 0.224, "step": 1539 }, { "epoch": 14.927710843373493, "grad_norm": 8.34815502166748, "learning_rate": 4.2524271844660194e-05, "loss": 0.2936, "step": 1540 }, { "epoch": 14.937349397590362, "grad_norm": 28.491180419921875, "learning_rate": 4.251941747572816e-05, "loss": 0.6197, "step": 1541 }, { "epoch": 14.946987951807229, "grad_norm": 2.9605493545532227, "learning_rate": 4.2514563106796116e-05, "loss": 0.2419, "step": 1542 }, { "epoch": 14.956626506024097, "grad_norm": 7.829331874847412, "learning_rate": 4.250970873786408e-05, "loss": 0.403, "step": 1543 }, { "epoch": 14.966265060240964, "grad_norm": 2.865412712097168, "learning_rate": 4.250485436893204e-05, "loss": 0.3616, "step": 1544 }, { "epoch": 14.975903614457831, "grad_norm": 4.335886001586914, "learning_rate": 4.25e-05, "loss": 0.4819, "step": 1545 }, { "epoch": 14.985542168674698, "grad_norm": 5.160926818847656, "learning_rate": 4.249514563106796e-05, "loss": 0.4722, "step": 1546 }, { "epoch": 15.001204819277108, "grad_norm": 6.468801021575928, "learning_rate": 4.2490291262135925e-05, "loss": 0.3653, "step": 1547 }, { "epoch": 15.010843373493977, "grad_norm": 2.373668670654297, "learning_rate": 4.248543689320388e-05, "loss": 0.2769, "step": 1548 }, { "epoch": 15.020481927710843, "grad_norm": 3.1085355281829834, "learning_rate": 4.248058252427185e-05, "loss": 0.2726, "step": 1549 }, { "epoch": 15.03012048192771, "grad_norm": 7.815611362457275, "learning_rate": 4.2475728155339805e-05, "loss": 0.3598, "step": 1550 }, { "epoch": 15.039759036144579, "grad_norm": 5.196824550628662, "learning_rate": 4.247087378640777e-05, "loss": 0.2272, "step": 1551 }, { "epoch": 15.049397590361446, "grad_norm": 3.40863037109375, "learning_rate": 4.246601941747573e-05, "loss": 0.2691, "step": 1552 }, { "epoch": 15.059036144578313, "grad_norm": 1.7247956991195679, "learning_rate": 4.246116504854369e-05, "loss": 0.1522, "step": 1553 }, { "epoch": 15.068674698795181, "grad_norm": 1.7815775871276855, "learning_rate": 4.245631067961166e-05, "loss": 0.1845, "step": 1554 }, { "epoch": 15.078313253012048, "grad_norm": 3.3340935707092285, "learning_rate": 4.2451456310679615e-05, "loss": 0.2852, "step": 1555 }, { "epoch": 15.087951807228915, "grad_norm": 5.224653244018555, "learning_rate": 4.244660194174758e-05, "loss": 0.6092, "step": 1556 }, { "epoch": 15.097590361445784, "grad_norm": 3.907064437866211, "learning_rate": 4.244174757281554e-05, "loss": 0.3688, "step": 1557 }, { "epoch": 15.10722891566265, "grad_norm": 3.131211280822754, "learning_rate": 4.24368932038835e-05, "loss": 0.2161, "step": 1558 }, { "epoch": 15.116867469879518, "grad_norm": 4.370959281921387, "learning_rate": 4.243203883495146e-05, "loss": 0.2808, "step": 1559 }, { "epoch": 15.126506024096386, "grad_norm": 4.900076389312744, "learning_rate": 4.2427184466019424e-05, "loss": 0.2468, "step": 1560 }, { "epoch": 15.136144578313253, "grad_norm": 3.9109997749328613, "learning_rate": 4.242233009708738e-05, "loss": 0.2687, "step": 1561 }, { "epoch": 15.14578313253012, "grad_norm": 3.8591856956481934, "learning_rate": 4.2417475728155346e-05, "loss": 0.2303, "step": 1562 }, { "epoch": 15.155421686746989, "grad_norm": 2.6286709308624268, "learning_rate": 4.2412621359223304e-05, "loss": 0.2137, "step": 1563 }, { "epoch": 15.165060240963856, "grad_norm": 2.469343662261963, "learning_rate": 4.240776699029126e-05, "loss": 0.1574, "step": 1564 }, { "epoch": 15.174698795180722, "grad_norm": 2.2267003059387207, "learning_rate": 4.2402912621359226e-05, "loss": 0.269, "step": 1565 }, { "epoch": 15.184337349397591, "grad_norm": 3.947646379470825, "learning_rate": 4.2398058252427184e-05, "loss": 0.4103, "step": 1566 }, { "epoch": 15.193975903614458, "grad_norm": 1.9661476612091064, "learning_rate": 4.239320388349515e-05, "loss": 0.1282, "step": 1567 }, { "epoch": 15.203614457831325, "grad_norm": 5.513066291809082, "learning_rate": 4.2388349514563106e-05, "loss": 0.2599, "step": 1568 }, { "epoch": 15.213253012048193, "grad_norm": 7.91662073135376, "learning_rate": 4.238349514563107e-05, "loss": 0.2235, "step": 1569 }, { "epoch": 15.22289156626506, "grad_norm": 7.9073920249938965, "learning_rate": 4.237864077669903e-05, "loss": 0.2319, "step": 1570 }, { "epoch": 15.232530120481927, "grad_norm": 2.631032705307007, "learning_rate": 4.237378640776699e-05, "loss": 0.1477, "step": 1571 }, { "epoch": 15.242168674698796, "grad_norm": 3.104292154312134, "learning_rate": 4.236893203883495e-05, "loss": 0.1963, "step": 1572 }, { "epoch": 15.251807228915663, "grad_norm": 3.923687219619751, "learning_rate": 4.2364077669902915e-05, "loss": 0.163, "step": 1573 }, { "epoch": 15.26144578313253, "grad_norm": 6.995447158813477, "learning_rate": 4.235922330097087e-05, "loss": 0.4801, "step": 1574 }, { "epoch": 15.271084337349398, "grad_norm": 7.212172508239746, "learning_rate": 4.235436893203884e-05, "loss": 0.2446, "step": 1575 }, { "epoch": 15.280722891566265, "grad_norm": 3.752587080001831, "learning_rate": 4.2349514563106795e-05, "loss": 0.4543, "step": 1576 }, { "epoch": 15.290361445783132, "grad_norm": 11.5226411819458, "learning_rate": 4.234466019417476e-05, "loss": 0.1783, "step": 1577 }, { "epoch": 15.3, "grad_norm": 4.263845443725586, "learning_rate": 4.2339805825242724e-05, "loss": 0.4424, "step": 1578 }, { "epoch": 15.309638554216868, "grad_norm": 3.046701669692993, "learning_rate": 4.233495145631068e-05, "loss": 0.1947, "step": 1579 }, { "epoch": 15.319277108433734, "grad_norm": 5.692399024963379, "learning_rate": 4.2330097087378647e-05, "loss": 0.2214, "step": 1580 }, { "epoch": 15.328915662650603, "grad_norm": 4.297634601593018, "learning_rate": 4.2325242718446604e-05, "loss": 0.3089, "step": 1581 }, { "epoch": 15.33855421686747, "grad_norm": 8.248534202575684, "learning_rate": 4.232038834951457e-05, "loss": 0.3102, "step": 1582 }, { "epoch": 15.348192771084337, "grad_norm": 3.4630720615386963, "learning_rate": 4.2315533980582527e-05, "loss": 0.2419, "step": 1583 }, { "epoch": 15.357831325301206, "grad_norm": 3.770287036895752, "learning_rate": 4.231067961165049e-05, "loss": 0.3148, "step": 1584 }, { "epoch": 15.367469879518072, "grad_norm": 4.0637359619140625, "learning_rate": 4.230582524271845e-05, "loss": 0.2135, "step": 1585 }, { "epoch": 15.37710843373494, "grad_norm": 1.9295246601104736, "learning_rate": 4.2300970873786413e-05, "loss": 0.252, "step": 1586 }, { "epoch": 15.386746987951808, "grad_norm": 3.8612523078918457, "learning_rate": 4.229611650485437e-05, "loss": 0.453, "step": 1587 }, { "epoch": 15.396385542168675, "grad_norm": 4.874102592468262, "learning_rate": 4.2291262135922336e-05, "loss": 0.2203, "step": 1588 }, { "epoch": 15.406024096385542, "grad_norm": 4.300978183746338, "learning_rate": 4.2286407766990293e-05, "loss": 0.2845, "step": 1589 }, { "epoch": 15.41566265060241, "grad_norm": 3.419494867324829, "learning_rate": 4.228155339805825e-05, "loss": 0.3101, "step": 1590 }, { "epoch": 15.425301204819277, "grad_norm": 7.673949241638184, "learning_rate": 4.2276699029126216e-05, "loss": 0.3573, "step": 1591 }, { "epoch": 15.434939759036144, "grad_norm": 5.757706165313721, "learning_rate": 4.2271844660194174e-05, "loss": 0.4642, "step": 1592 }, { "epoch": 15.444578313253013, "grad_norm": 3.7434699535369873, "learning_rate": 4.226699029126214e-05, "loss": 0.2675, "step": 1593 }, { "epoch": 15.45421686746988, "grad_norm": 5.3699116706848145, "learning_rate": 4.2262135922330096e-05, "loss": 0.1746, "step": 1594 }, { "epoch": 15.463855421686747, "grad_norm": 4.634495735168457, "learning_rate": 4.225728155339806e-05, "loss": 0.2024, "step": 1595 }, { "epoch": 15.473493975903615, "grad_norm": 10.456535339355469, "learning_rate": 4.225242718446602e-05, "loss": 0.3963, "step": 1596 }, { "epoch": 15.483132530120482, "grad_norm": 4.107558250427246, "learning_rate": 4.224757281553398e-05, "loss": 0.2633, "step": 1597 }, { "epoch": 15.492771084337349, "grad_norm": 2.9329569339752197, "learning_rate": 4.224271844660194e-05, "loss": 0.2445, "step": 1598 }, { "epoch": 15.502409638554218, "grad_norm": 4.577664375305176, "learning_rate": 4.2237864077669905e-05, "loss": 0.2481, "step": 1599 }, { "epoch": 15.512048192771084, "grad_norm": 4.464812278747559, "learning_rate": 4.223300970873786e-05, "loss": 0.3217, "step": 1600 }, { "epoch": 15.521686746987951, "grad_norm": 5.302940368652344, "learning_rate": 4.222815533980583e-05, "loss": 0.3452, "step": 1601 }, { "epoch": 15.53132530120482, "grad_norm": 7.617700576782227, "learning_rate": 4.2223300970873785e-05, "loss": 0.2705, "step": 1602 }, { "epoch": 15.540963855421687, "grad_norm": 4.9627227783203125, "learning_rate": 4.221844660194175e-05, "loss": 0.301, "step": 1603 }, { "epoch": 15.550602409638554, "grad_norm": 3.7766387462615967, "learning_rate": 4.2213592233009714e-05, "loss": 0.3658, "step": 1604 }, { "epoch": 15.560240963855422, "grad_norm": 3.4317433834075928, "learning_rate": 4.220873786407767e-05, "loss": 0.2604, "step": 1605 }, { "epoch": 15.56987951807229, "grad_norm": 3.1739490032196045, "learning_rate": 4.2203883495145636e-05, "loss": 0.289, "step": 1606 }, { "epoch": 15.579518072289156, "grad_norm": 2.862150192260742, "learning_rate": 4.2199029126213594e-05, "loss": 0.1493, "step": 1607 }, { "epoch": 15.589156626506025, "grad_norm": 7.4081010818481445, "learning_rate": 4.219417475728156e-05, "loss": 0.2992, "step": 1608 }, { "epoch": 15.598795180722892, "grad_norm": 5.2179412841796875, "learning_rate": 4.2189320388349516e-05, "loss": 0.2404, "step": 1609 }, { "epoch": 15.608433734939759, "grad_norm": 1.8332279920578003, "learning_rate": 4.218446601941748e-05, "loss": 0.1129, "step": 1610 }, { "epoch": 15.618072289156627, "grad_norm": 1.805989146232605, "learning_rate": 4.217961165048544e-05, "loss": 0.173, "step": 1611 }, { "epoch": 15.627710843373494, "grad_norm": 8.617989540100098, "learning_rate": 4.21747572815534e-05, "loss": 0.3241, "step": 1612 }, { "epoch": 15.637349397590361, "grad_norm": 3.941425323486328, "learning_rate": 4.216990291262136e-05, "loss": 0.221, "step": 1613 }, { "epoch": 15.64698795180723, "grad_norm": 3.4659924507141113, "learning_rate": 4.2165048543689325e-05, "loss": 0.2204, "step": 1614 }, { "epoch": 15.656626506024097, "grad_norm": 8.728107452392578, "learning_rate": 4.216019417475728e-05, "loss": 0.3419, "step": 1615 }, { "epoch": 15.666265060240963, "grad_norm": 2.7557694911956787, "learning_rate": 4.215533980582525e-05, "loss": 0.2589, "step": 1616 }, { "epoch": 15.675903614457832, "grad_norm": 5.7849440574646, "learning_rate": 4.2150485436893206e-05, "loss": 0.2819, "step": 1617 }, { "epoch": 15.685542168674699, "grad_norm": 7.632493495941162, "learning_rate": 4.214563106796116e-05, "loss": 0.201, "step": 1618 }, { "epoch": 15.695180722891566, "grad_norm": 10.751009941101074, "learning_rate": 4.214077669902913e-05, "loss": 0.1548, "step": 1619 }, { "epoch": 15.704819277108435, "grad_norm": 2.985536575317383, "learning_rate": 4.2135922330097086e-05, "loss": 0.1577, "step": 1620 }, { "epoch": 15.714457831325301, "grad_norm": 5.094631195068359, "learning_rate": 4.213106796116505e-05, "loss": 0.32, "step": 1621 }, { "epoch": 15.724096385542168, "grad_norm": 5.954336166381836, "learning_rate": 4.212621359223301e-05, "loss": 0.3136, "step": 1622 }, { "epoch": 15.733734939759037, "grad_norm": 4.757638931274414, "learning_rate": 4.212135922330097e-05, "loss": 0.2769, "step": 1623 }, { "epoch": 15.743373493975904, "grad_norm": 2.3613874912261963, "learning_rate": 4.211650485436893e-05, "loss": 0.1564, "step": 1624 }, { "epoch": 15.75301204819277, "grad_norm": 3.6484973430633545, "learning_rate": 4.2111650485436895e-05, "loss": 0.1914, "step": 1625 }, { "epoch": 15.76265060240964, "grad_norm": 2.6014888286590576, "learning_rate": 4.210679611650485e-05, "loss": 0.2023, "step": 1626 }, { "epoch": 15.772289156626506, "grad_norm": 4.149190425872803, "learning_rate": 4.210194174757282e-05, "loss": 0.2087, "step": 1627 }, { "epoch": 15.781927710843373, "grad_norm": 3.587914228439331, "learning_rate": 4.2097087378640775e-05, "loss": 0.2318, "step": 1628 }, { "epoch": 15.791566265060242, "grad_norm": 5.8907151222229, "learning_rate": 4.209223300970874e-05, "loss": 0.2941, "step": 1629 }, { "epoch": 15.801204819277109, "grad_norm": 3.211050271987915, "learning_rate": 4.2087378640776704e-05, "loss": 0.1942, "step": 1630 }, { "epoch": 15.810843373493976, "grad_norm": 13.015957832336426, "learning_rate": 4.208252427184466e-05, "loss": 0.2135, "step": 1631 }, { "epoch": 15.820481927710844, "grad_norm": 3.3661117553710938, "learning_rate": 4.2077669902912626e-05, "loss": 0.2993, "step": 1632 }, { "epoch": 15.830120481927711, "grad_norm": 4.755326271057129, "learning_rate": 4.2072815533980584e-05, "loss": 0.3577, "step": 1633 }, { "epoch": 15.839759036144578, "grad_norm": 6.058792591094971, "learning_rate": 4.206796116504855e-05, "loss": 0.3985, "step": 1634 }, { "epoch": 15.849397590361447, "grad_norm": 3.8264386653900146, "learning_rate": 4.2063106796116506e-05, "loss": 0.3418, "step": 1635 }, { "epoch": 15.859036144578313, "grad_norm": 2.1782877445220947, "learning_rate": 4.205825242718447e-05, "loss": 0.1123, "step": 1636 }, { "epoch": 15.86867469879518, "grad_norm": 2.8059515953063965, "learning_rate": 4.205339805825243e-05, "loss": 0.1933, "step": 1637 }, { "epoch": 15.878313253012049, "grad_norm": 5.832171440124512, "learning_rate": 4.204854368932039e-05, "loss": 0.2896, "step": 1638 }, { "epoch": 15.887951807228916, "grad_norm": 3.7548086643218994, "learning_rate": 4.204368932038835e-05, "loss": 0.2794, "step": 1639 }, { "epoch": 15.897590361445783, "grad_norm": 4.300349235534668, "learning_rate": 4.2038834951456315e-05, "loss": 0.4294, "step": 1640 }, { "epoch": 15.907228915662651, "grad_norm": 2.9356932640075684, "learning_rate": 4.203398058252427e-05, "loss": 0.3077, "step": 1641 }, { "epoch": 15.916867469879518, "grad_norm": 5.017303943634033, "learning_rate": 4.202912621359224e-05, "loss": 0.4967, "step": 1642 }, { "epoch": 15.926506024096385, "grad_norm": 3.688091993331909, "learning_rate": 4.2024271844660195e-05, "loss": 0.2819, "step": 1643 }, { "epoch": 15.936144578313254, "grad_norm": 2.1623411178588867, "learning_rate": 4.201941747572816e-05, "loss": 0.1478, "step": 1644 }, { "epoch": 15.94578313253012, "grad_norm": 3.718977451324463, "learning_rate": 4.201456310679612e-05, "loss": 0.1695, "step": 1645 }, { "epoch": 15.955421686746988, "grad_norm": 2.1188769340515137, "learning_rate": 4.2009708737864075e-05, "loss": 0.1523, "step": 1646 }, { "epoch": 15.965060240963856, "grad_norm": 3.2334392070770264, "learning_rate": 4.200485436893204e-05, "loss": 0.222, "step": 1647 }, { "epoch": 15.974698795180723, "grad_norm": 5.066359043121338, "learning_rate": 4.2e-05, "loss": 0.2768, "step": 1648 }, { "epoch": 15.98433734939759, "grad_norm": 7.135161399841309, "learning_rate": 4.199514563106796e-05, "loss": 0.2294, "step": 1649 }, { "epoch": 15.993975903614459, "grad_norm": 6.367162227630615, "learning_rate": 4.199029126213592e-05, "loss": 0.3567, "step": 1650 }, { "epoch": 16.009638554216867, "grad_norm": 3.050764322280884, "learning_rate": 4.1985436893203885e-05, "loss": 0.2289, "step": 1651 }, { "epoch": 16.019277108433734, "grad_norm": 4.793081283569336, "learning_rate": 4.198058252427184e-05, "loss": 0.1901, "step": 1652 }, { "epoch": 16.0289156626506, "grad_norm": 3.8082447052001953, "learning_rate": 4.197572815533981e-05, "loss": 0.3211, "step": 1653 }, { "epoch": 16.03855421686747, "grad_norm": 2.8503384590148926, "learning_rate": 4.197087378640777e-05, "loss": 0.1866, "step": 1654 }, { "epoch": 16.048192771084338, "grad_norm": 2.9925007820129395, "learning_rate": 4.1966019417475736e-05, "loss": 0.2258, "step": 1655 }, { "epoch": 16.057831325301205, "grad_norm": 3.337667942047119, "learning_rate": 4.1961165048543694e-05, "loss": 0.214, "step": 1656 }, { "epoch": 16.06746987951807, "grad_norm": 3.9848220348358154, "learning_rate": 4.195631067961165e-05, "loss": 0.4684, "step": 1657 }, { "epoch": 16.07710843373494, "grad_norm": 5.324105739593506, "learning_rate": 4.1951456310679616e-05, "loss": 0.2038, "step": 1658 }, { "epoch": 16.086746987951805, "grad_norm": 4.952088356018066, "learning_rate": 4.1946601941747574e-05, "loss": 0.372, "step": 1659 }, { "epoch": 16.096385542168676, "grad_norm": 5.101113796234131, "learning_rate": 4.194174757281554e-05, "loss": 0.3212, "step": 1660 }, { "epoch": 16.106024096385543, "grad_norm": 1.1101526021957397, "learning_rate": 4.1936893203883496e-05, "loss": 0.0473, "step": 1661 }, { "epoch": 16.11566265060241, "grad_norm": 5.535130023956299, "learning_rate": 4.193203883495146e-05, "loss": 0.4361, "step": 1662 }, { "epoch": 16.125301204819277, "grad_norm": 8.682282447814941, "learning_rate": 4.192718446601942e-05, "loss": 0.2132, "step": 1663 }, { "epoch": 16.134939759036143, "grad_norm": 5.415472507476807, "learning_rate": 4.192233009708738e-05, "loss": 0.3628, "step": 1664 }, { "epoch": 16.14457831325301, "grad_norm": 4.641010284423828, "learning_rate": 4.191747572815534e-05, "loss": 0.3753, "step": 1665 }, { "epoch": 16.15421686746988, "grad_norm": 2.4633865356445312, "learning_rate": 4.1912621359223305e-05, "loss": 0.258, "step": 1666 }, { "epoch": 16.163855421686748, "grad_norm": 7.993576526641846, "learning_rate": 4.190776699029126e-05, "loss": 0.3237, "step": 1667 }, { "epoch": 16.173493975903614, "grad_norm": 6.56874418258667, "learning_rate": 4.190291262135923e-05, "loss": 0.4305, "step": 1668 }, { "epoch": 16.18313253012048, "grad_norm": 9.9616060256958, "learning_rate": 4.1898058252427185e-05, "loss": 0.3639, "step": 1669 }, { "epoch": 16.19277108433735, "grad_norm": 5.479694843292236, "learning_rate": 4.189320388349515e-05, "loss": 0.3865, "step": 1670 }, { "epoch": 16.202409638554215, "grad_norm": 2.2532119750976562, "learning_rate": 4.188834951456311e-05, "loss": 0.2078, "step": 1671 }, { "epoch": 16.212048192771086, "grad_norm": 8.573551177978516, "learning_rate": 4.188349514563107e-05, "loss": 0.501, "step": 1672 }, { "epoch": 16.221686746987952, "grad_norm": 4.457943439483643, "learning_rate": 4.187864077669903e-05, "loss": 0.3321, "step": 1673 }, { "epoch": 16.23132530120482, "grad_norm": 4.816295146942139, "learning_rate": 4.187378640776699e-05, "loss": 0.317, "step": 1674 }, { "epoch": 16.240963855421686, "grad_norm": 4.74305534362793, "learning_rate": 4.186893203883495e-05, "loss": 0.1833, "step": 1675 }, { "epoch": 16.250602409638553, "grad_norm": 3.7044730186462402, "learning_rate": 4.186407766990291e-05, "loss": 0.4051, "step": 1676 }, { "epoch": 16.26024096385542, "grad_norm": 3.829765796661377, "learning_rate": 4.1859223300970874e-05, "loss": 0.2856, "step": 1677 }, { "epoch": 16.26987951807229, "grad_norm": 4.672906398773193, "learning_rate": 4.185436893203883e-05, "loss": 0.4037, "step": 1678 }, { "epoch": 16.279518072289157, "grad_norm": 3.561879873275757, "learning_rate": 4.18495145631068e-05, "loss": 0.2358, "step": 1679 }, { "epoch": 16.289156626506024, "grad_norm": 5.809507846832275, "learning_rate": 4.184466019417476e-05, "loss": 0.3285, "step": 1680 }, { "epoch": 16.29879518072289, "grad_norm": 4.434474468231201, "learning_rate": 4.1839805825242726e-05, "loss": 0.297, "step": 1681 }, { "epoch": 16.308433734939758, "grad_norm": 2.995824098587036, "learning_rate": 4.1834951456310683e-05, "loss": 0.2451, "step": 1682 }, { "epoch": 16.318072289156625, "grad_norm": 3.9545254707336426, "learning_rate": 4.183009708737865e-05, "loss": 0.1588, "step": 1683 }, { "epoch": 16.327710843373495, "grad_norm": 1.8880223035812378, "learning_rate": 4.1825242718446606e-05, "loss": 0.1549, "step": 1684 }, { "epoch": 16.337349397590362, "grad_norm": 5.724233627319336, "learning_rate": 4.1820388349514563e-05, "loss": 0.4054, "step": 1685 }, { "epoch": 16.34698795180723, "grad_norm": 7.647642612457275, "learning_rate": 4.181553398058253e-05, "loss": 0.2598, "step": 1686 }, { "epoch": 16.356626506024096, "grad_norm": 3.0359139442443848, "learning_rate": 4.1810679611650486e-05, "loss": 0.1906, "step": 1687 }, { "epoch": 16.366265060240963, "grad_norm": 4.490041732788086, "learning_rate": 4.180582524271845e-05, "loss": 0.2916, "step": 1688 }, { "epoch": 16.37590361445783, "grad_norm": 4.512575149536133, "learning_rate": 4.180097087378641e-05, "loss": 0.2475, "step": 1689 }, { "epoch": 16.3855421686747, "grad_norm": 4.7230963706970215, "learning_rate": 4.179611650485437e-05, "loss": 0.3702, "step": 1690 }, { "epoch": 16.395180722891567, "grad_norm": 3.3045883178710938, "learning_rate": 4.179126213592233e-05, "loss": 0.1769, "step": 1691 }, { "epoch": 16.404819277108434, "grad_norm": 5.650477409362793, "learning_rate": 4.1786407766990295e-05, "loss": 0.3201, "step": 1692 }, { "epoch": 16.4144578313253, "grad_norm": 5.234586715698242, "learning_rate": 4.178155339805825e-05, "loss": 0.4132, "step": 1693 }, { "epoch": 16.424096385542168, "grad_norm": 3.4096105098724365, "learning_rate": 4.177669902912622e-05, "loss": 0.129, "step": 1694 }, { "epoch": 16.433734939759034, "grad_norm": 4.01957893371582, "learning_rate": 4.1771844660194175e-05, "loss": 0.2909, "step": 1695 }, { "epoch": 16.443373493975905, "grad_norm": 4.714247226715088, "learning_rate": 4.176699029126214e-05, "loss": 0.2376, "step": 1696 }, { "epoch": 16.45301204819277, "grad_norm": 4.439007759094238, "learning_rate": 4.17621359223301e-05, "loss": 0.4647, "step": 1697 }, { "epoch": 16.46265060240964, "grad_norm": 3.743422031402588, "learning_rate": 4.175728155339806e-05, "loss": 0.2192, "step": 1698 }, { "epoch": 16.472289156626506, "grad_norm": 2.2620880603790283, "learning_rate": 4.175242718446602e-05, "loss": 0.1821, "step": 1699 }, { "epoch": 16.481927710843372, "grad_norm": 2.3759584426879883, "learning_rate": 4.1747572815533984e-05, "loss": 0.2026, "step": 1700 }, { "epoch": 16.49156626506024, "grad_norm": 2.883589744567871, "learning_rate": 4.174271844660194e-05, "loss": 0.1586, "step": 1701 }, { "epoch": 16.50120481927711, "grad_norm": 7.0012946128845215, "learning_rate": 4.17378640776699e-05, "loss": 0.4854, "step": 1702 }, { "epoch": 16.510843373493977, "grad_norm": 11.150012969970703, "learning_rate": 4.1733009708737864e-05, "loss": 0.4544, "step": 1703 }, { "epoch": 16.520481927710843, "grad_norm": 4.4080810546875, "learning_rate": 4.172815533980582e-05, "loss": 0.2106, "step": 1704 }, { "epoch": 16.53012048192771, "grad_norm": 4.005725860595703, "learning_rate": 4.172330097087379e-05, "loss": 0.3916, "step": 1705 }, { "epoch": 16.539759036144577, "grad_norm": 3.793181896209717, "learning_rate": 4.171844660194175e-05, "loss": 0.2012, "step": 1706 }, { "epoch": 16.549397590361444, "grad_norm": 5.510863780975342, "learning_rate": 4.1713592233009715e-05, "loss": 0.3869, "step": 1707 }, { "epoch": 16.559036144578315, "grad_norm": 5.345274448394775, "learning_rate": 4.170873786407767e-05, "loss": 0.4216, "step": 1708 }, { "epoch": 16.56867469879518, "grad_norm": 2.978846549987793, "learning_rate": 4.170388349514564e-05, "loss": 0.3487, "step": 1709 }, { "epoch": 16.57831325301205, "grad_norm": 3.8116261959075928, "learning_rate": 4.1699029126213595e-05, "loss": 0.2783, "step": 1710 }, { "epoch": 16.587951807228915, "grad_norm": 7.567784786224365, "learning_rate": 4.169417475728156e-05, "loss": 0.2508, "step": 1711 }, { "epoch": 16.597590361445782, "grad_norm": 8.180745124816895, "learning_rate": 4.168932038834952e-05, "loss": 0.3799, "step": 1712 }, { "epoch": 16.60722891566265, "grad_norm": 7.350423336029053, "learning_rate": 4.1684466019417476e-05, "loss": 0.3158, "step": 1713 }, { "epoch": 16.61686746987952, "grad_norm": 4.257702827453613, "learning_rate": 4.167961165048544e-05, "loss": 0.2503, "step": 1714 }, { "epoch": 16.626506024096386, "grad_norm": 3.6540334224700928, "learning_rate": 4.16747572815534e-05, "loss": 0.3128, "step": 1715 }, { "epoch": 16.636144578313253, "grad_norm": 3.332963228225708, "learning_rate": 4.166990291262136e-05, "loss": 0.3691, "step": 1716 }, { "epoch": 16.64578313253012, "grad_norm": 3.6046793460845947, "learning_rate": 4.166504854368932e-05, "loss": 0.3471, "step": 1717 }, { "epoch": 16.655421686746987, "grad_norm": 2.742831230163574, "learning_rate": 4.1660194174757285e-05, "loss": 0.2468, "step": 1718 }, { "epoch": 16.665060240963854, "grad_norm": 3.9554309844970703, "learning_rate": 4.165533980582524e-05, "loss": 0.2995, "step": 1719 }, { "epoch": 16.674698795180724, "grad_norm": 6.886241912841797, "learning_rate": 4.165048543689321e-05, "loss": 0.5819, "step": 1720 }, { "epoch": 16.68433734939759, "grad_norm": 4.735328197479248, "learning_rate": 4.1645631067961165e-05, "loss": 0.3309, "step": 1721 }, { "epoch": 16.693975903614458, "grad_norm": 5.221620082855225, "learning_rate": 4.164077669902913e-05, "loss": 0.4815, "step": 1722 }, { "epoch": 16.703614457831325, "grad_norm": 3.763521909713745, "learning_rate": 4.163592233009709e-05, "loss": 0.3882, "step": 1723 }, { "epoch": 16.71325301204819, "grad_norm": 5.079242706298828, "learning_rate": 4.163106796116505e-05, "loss": 0.3665, "step": 1724 }, { "epoch": 16.72289156626506, "grad_norm": 3.333984613418579, "learning_rate": 4.162621359223301e-05, "loss": 0.1802, "step": 1725 }, { "epoch": 16.73253012048193, "grad_norm": 2.5003561973571777, "learning_rate": 4.1621359223300974e-05, "loss": 0.1905, "step": 1726 }, { "epoch": 16.742168674698796, "grad_norm": 6.747567653656006, "learning_rate": 4.161650485436893e-05, "loss": 0.3865, "step": 1727 }, { "epoch": 16.751807228915663, "grad_norm": 6.126522541046143, "learning_rate": 4.161165048543689e-05, "loss": 0.3653, "step": 1728 }, { "epoch": 16.76144578313253, "grad_norm": 2.439336061477661, "learning_rate": 4.1606796116504854e-05, "loss": 0.167, "step": 1729 }, { "epoch": 16.771084337349397, "grad_norm": 5.383912086486816, "learning_rate": 4.160194174757282e-05, "loss": 0.2658, "step": 1730 }, { "epoch": 16.780722891566263, "grad_norm": 5.709712505340576, "learning_rate": 4.159708737864078e-05, "loss": 0.2434, "step": 1731 }, { "epoch": 16.790361445783134, "grad_norm": 9.764297485351562, "learning_rate": 4.159223300970874e-05, "loss": 0.4702, "step": 1732 }, { "epoch": 16.8, "grad_norm": 2.6010499000549316, "learning_rate": 4.1587378640776705e-05, "loss": 0.1303, "step": 1733 }, { "epoch": 16.809638554216868, "grad_norm": 3.022085189819336, "learning_rate": 4.158252427184466e-05, "loss": 0.1349, "step": 1734 }, { "epoch": 16.819277108433734, "grad_norm": 5.043005466461182, "learning_rate": 4.157766990291263e-05, "loss": 0.3285, "step": 1735 }, { "epoch": 16.8289156626506, "grad_norm": 7.696630001068115, "learning_rate": 4.1572815533980585e-05, "loss": 0.2631, "step": 1736 }, { "epoch": 16.83855421686747, "grad_norm": 3.988751173019409, "learning_rate": 4.156796116504855e-05, "loss": 0.2873, "step": 1737 }, { "epoch": 16.84819277108434, "grad_norm": 10.217570304870605, "learning_rate": 4.156310679611651e-05, "loss": 0.2573, "step": 1738 }, { "epoch": 16.857831325301206, "grad_norm": 4.76560640335083, "learning_rate": 4.1558252427184465e-05, "loss": 0.2846, "step": 1739 }, { "epoch": 16.867469879518072, "grad_norm": 3.9476521015167236, "learning_rate": 4.155339805825243e-05, "loss": 0.3071, "step": 1740 }, { "epoch": 16.87710843373494, "grad_norm": 5.520322322845459, "learning_rate": 4.154854368932039e-05, "loss": 0.2491, "step": 1741 }, { "epoch": 16.886746987951806, "grad_norm": 5.492001533508301, "learning_rate": 4.154368932038835e-05, "loss": 0.236, "step": 1742 }, { "epoch": 16.896385542168673, "grad_norm": 3.6768317222595215, "learning_rate": 4.153883495145631e-05, "loss": 0.2098, "step": 1743 }, { "epoch": 16.906024096385543, "grad_norm": 5.831479072570801, "learning_rate": 4.1533980582524274e-05, "loss": 0.3949, "step": 1744 }, { "epoch": 16.91566265060241, "grad_norm": 2.9996986389160156, "learning_rate": 4.152912621359223e-05, "loss": 0.2154, "step": 1745 }, { "epoch": 16.925301204819277, "grad_norm": 4.564676761627197, "learning_rate": 4.15242718446602e-05, "loss": 0.2924, "step": 1746 }, { "epoch": 16.934939759036144, "grad_norm": 1.9728026390075684, "learning_rate": 4.1519417475728154e-05, "loss": 0.1342, "step": 1747 }, { "epoch": 16.94457831325301, "grad_norm": 6.887441635131836, "learning_rate": 4.151456310679612e-05, "loss": 0.2724, "step": 1748 }, { "epoch": 16.954216867469878, "grad_norm": 2.433074951171875, "learning_rate": 4.150970873786408e-05, "loss": 0.1888, "step": 1749 }, { "epoch": 16.96385542168675, "grad_norm": 3.3613078594207764, "learning_rate": 4.150485436893204e-05, "loss": 0.2164, "step": 1750 }, { "epoch": 16.973493975903615, "grad_norm": 1.724467158317566, "learning_rate": 4.15e-05, "loss": 0.1478, "step": 1751 }, { "epoch": 16.983132530120482, "grad_norm": 3.9536008834838867, "learning_rate": 4.1495145631067964e-05, "loss": 0.2674, "step": 1752 }, { "epoch": 16.99277108433735, "grad_norm": 1.2600703239440918, "learning_rate": 4.149029126213592e-05, "loss": 0.1021, "step": 1753 }, { "epoch": 17.00843373493976, "grad_norm": 6.267336368560791, "learning_rate": 4.1485436893203886e-05, "loss": 0.2849, "step": 1754 }, { "epoch": 17.018072289156628, "grad_norm": 10.410480499267578, "learning_rate": 4.148058252427185e-05, "loss": 0.381, "step": 1755 }, { "epoch": 17.027710843373494, "grad_norm": 3.2425973415374756, "learning_rate": 4.147572815533981e-05, "loss": 0.3304, "step": 1756 }, { "epoch": 17.03734939759036, "grad_norm": 15.215202331542969, "learning_rate": 4.147087378640777e-05, "loss": 0.2535, "step": 1757 }, { "epoch": 17.04698795180723, "grad_norm": 5.930279731750488, "learning_rate": 4.146601941747573e-05, "loss": 0.5524, "step": 1758 }, { "epoch": 17.056626506024095, "grad_norm": 9.33897876739502, "learning_rate": 4.1461165048543695e-05, "loss": 0.197, "step": 1759 }, { "epoch": 17.066265060240966, "grad_norm": 10.023988723754883, "learning_rate": 4.145631067961165e-05, "loss": 0.2632, "step": 1760 }, { "epoch": 17.075903614457832, "grad_norm": 7.136229991912842, "learning_rate": 4.145145631067962e-05, "loss": 0.3345, "step": 1761 }, { "epoch": 17.0855421686747, "grad_norm": 24.740612030029297, "learning_rate": 4.1446601941747575e-05, "loss": 0.4784, "step": 1762 }, { "epoch": 17.095180722891566, "grad_norm": 4.8354411125183105, "learning_rate": 4.144174757281554e-05, "loss": 0.2752, "step": 1763 }, { "epoch": 17.104819277108433, "grad_norm": 4.975378036499023, "learning_rate": 4.14368932038835e-05, "loss": 0.2163, "step": 1764 }, { "epoch": 17.1144578313253, "grad_norm": 11.896659851074219, "learning_rate": 4.143203883495146e-05, "loss": 0.2925, "step": 1765 }, { "epoch": 17.12409638554217, "grad_norm": 3.420233964920044, "learning_rate": 4.142718446601942e-05, "loss": 0.3967, "step": 1766 }, { "epoch": 17.133734939759037, "grad_norm": 18.549114227294922, "learning_rate": 4.142233009708738e-05, "loss": 0.3446, "step": 1767 }, { "epoch": 17.143373493975904, "grad_norm": 11.611833572387695, "learning_rate": 4.141747572815534e-05, "loss": 0.2066, "step": 1768 }, { "epoch": 17.15301204819277, "grad_norm": 10.19420337677002, "learning_rate": 4.14126213592233e-05, "loss": 0.2694, "step": 1769 }, { "epoch": 17.162650602409638, "grad_norm": 7.775139331817627, "learning_rate": 4.1407766990291264e-05, "loss": 0.2816, "step": 1770 }, { "epoch": 17.172289156626505, "grad_norm": 3.7579569816589355, "learning_rate": 4.140291262135922e-05, "loss": 0.3021, "step": 1771 }, { "epoch": 17.181927710843375, "grad_norm": 13.230438232421875, "learning_rate": 4.1398058252427187e-05, "loss": 0.2719, "step": 1772 }, { "epoch": 17.191566265060242, "grad_norm": 11.513103485107422, "learning_rate": 4.1393203883495144e-05, "loss": 0.1568, "step": 1773 }, { "epoch": 17.20120481927711, "grad_norm": 6.460350513458252, "learning_rate": 4.138834951456311e-05, "loss": 0.2564, "step": 1774 }, { "epoch": 17.210843373493976, "grad_norm": 6.556314468383789, "learning_rate": 4.1383495145631067e-05, "loss": 0.2816, "step": 1775 }, { "epoch": 17.220481927710843, "grad_norm": 1.0792016983032227, "learning_rate": 4.137864077669903e-05, "loss": 0.2837, "step": 1776 }, { "epoch": 17.23012048192771, "grad_norm": 1.747038722038269, "learning_rate": 4.137378640776699e-05, "loss": 0.1442, "step": 1777 }, { "epoch": 17.23975903614458, "grad_norm": 11.894841194152832, "learning_rate": 4.1368932038834953e-05, "loss": 0.3681, "step": 1778 }, { "epoch": 17.249397590361447, "grad_norm": 4.778874397277832, "learning_rate": 4.136407766990291e-05, "loss": 0.4774, "step": 1779 }, { "epoch": 17.259036144578314, "grad_norm": 3.9203147888183594, "learning_rate": 4.1359223300970876e-05, "loss": 0.2224, "step": 1780 }, { "epoch": 17.26867469879518, "grad_norm": 6.043986797332764, "learning_rate": 4.135436893203884e-05, "loss": 0.3496, "step": 1781 }, { "epoch": 17.278313253012048, "grad_norm": 19.19528579711914, "learning_rate": 4.13495145631068e-05, "loss": 0.2581, "step": 1782 }, { "epoch": 17.287951807228914, "grad_norm": 5.890816688537598, "learning_rate": 4.134466019417476e-05, "loss": 0.2319, "step": 1783 }, { "epoch": 17.297590361445785, "grad_norm": 0.9688854813575745, "learning_rate": 4.133980582524272e-05, "loss": 0.2353, "step": 1784 }, { "epoch": 17.30722891566265, "grad_norm": 8.012612342834473, "learning_rate": 4.1334951456310685e-05, "loss": 0.4912, "step": 1785 }, { "epoch": 17.31686746987952, "grad_norm": 17.46922492980957, "learning_rate": 4.133009708737864e-05, "loss": 0.3775, "step": 1786 }, { "epoch": 17.326506024096386, "grad_norm": 1.8928964138031006, "learning_rate": 4.132524271844661e-05, "loss": 0.1679, "step": 1787 }, { "epoch": 17.336144578313252, "grad_norm": 3.8785085678100586, "learning_rate": 4.1320388349514565e-05, "loss": 0.1931, "step": 1788 }, { "epoch": 17.34578313253012, "grad_norm": 2.0623152256011963, "learning_rate": 4.131553398058253e-05, "loss": 0.208, "step": 1789 }, { "epoch": 17.355421686746986, "grad_norm": 4.3932623863220215, "learning_rate": 4.131067961165049e-05, "loss": 0.2863, "step": 1790 }, { "epoch": 17.365060240963857, "grad_norm": 4.314972400665283, "learning_rate": 4.130582524271845e-05, "loss": 0.3304, "step": 1791 }, { "epoch": 17.374698795180723, "grad_norm": 2.928340196609497, "learning_rate": 4.130097087378641e-05, "loss": 0.2176, "step": 1792 }, { "epoch": 17.38433734939759, "grad_norm": 2.2026875019073486, "learning_rate": 4.1296116504854374e-05, "loss": 0.4285, "step": 1793 }, { "epoch": 17.393975903614457, "grad_norm": 2.8146731853485107, "learning_rate": 4.129126213592233e-05, "loss": 0.1399, "step": 1794 }, { "epoch": 17.403614457831324, "grad_norm": 24.176860809326172, "learning_rate": 4.128640776699029e-05, "loss": 0.2818, "step": 1795 }, { "epoch": 17.413253012048195, "grad_norm": 8.404706954956055, "learning_rate": 4.1281553398058254e-05, "loss": 0.2279, "step": 1796 }, { "epoch": 17.42289156626506, "grad_norm": 7.703736782073975, "learning_rate": 4.127669902912621e-05, "loss": 0.5032, "step": 1797 }, { "epoch": 17.43253012048193, "grad_norm": 12.661428451538086, "learning_rate": 4.1271844660194176e-05, "loss": 0.2393, "step": 1798 }, { "epoch": 17.442168674698795, "grad_norm": 37.20208740234375, "learning_rate": 4.1266990291262134e-05, "loss": 0.333, "step": 1799 }, { "epoch": 17.451807228915662, "grad_norm": 17.03099822998047, "learning_rate": 4.12621359223301e-05, "loss": 0.4476, "step": 1800 }, { "epoch": 17.46144578313253, "grad_norm": 4.104007720947266, "learning_rate": 4.1257281553398056e-05, "loss": 0.334, "step": 1801 }, { "epoch": 17.471084337349396, "grad_norm": 4.271883487701416, "learning_rate": 4.125242718446602e-05, "loss": 0.3291, "step": 1802 }, { "epoch": 17.480722891566266, "grad_norm": 14.027079582214355, "learning_rate": 4.124757281553398e-05, "loss": 0.1857, "step": 1803 }, { "epoch": 17.490361445783133, "grad_norm": 15.83582592010498, "learning_rate": 4.124271844660194e-05, "loss": 0.1652, "step": 1804 }, { "epoch": 17.5, "grad_norm": 51.302494049072266, "learning_rate": 4.12378640776699e-05, "loss": 0.2924, "step": 1805 }, { "epoch": 17.509638554216867, "grad_norm": 4.384829521179199, "learning_rate": 4.1233009708737865e-05, "loss": 0.1862, "step": 1806 }, { "epoch": 17.519277108433734, "grad_norm": 4.451680660247803, "learning_rate": 4.122815533980583e-05, "loss": 0.1909, "step": 1807 }, { "epoch": 17.528915662650604, "grad_norm": 1.1713736057281494, "learning_rate": 4.122330097087379e-05, "loss": 0.2213, "step": 1808 }, { "epoch": 17.53855421686747, "grad_norm": 4.367386817932129, "learning_rate": 4.121844660194175e-05, "loss": 0.2245, "step": 1809 }, { "epoch": 17.548192771084338, "grad_norm": 5.902531147003174, "learning_rate": 4.121359223300971e-05, "loss": 0.3228, "step": 1810 }, { "epoch": 17.557831325301205, "grad_norm": 4.793532371520996, "learning_rate": 4.1208737864077675e-05, "loss": 0.4909, "step": 1811 }, { "epoch": 17.56746987951807, "grad_norm": 2.456507682800293, "learning_rate": 4.120388349514563e-05, "loss": 0.1332, "step": 1812 }, { "epoch": 17.57710843373494, "grad_norm": 11.501250267028809, "learning_rate": 4.11990291262136e-05, "loss": 0.3462, "step": 1813 }, { "epoch": 17.586746987951805, "grad_norm": 15.325340270996094, "learning_rate": 4.1194174757281555e-05, "loss": 0.365, "step": 1814 }, { "epoch": 17.596385542168676, "grad_norm": 6.239866256713867, "learning_rate": 4.118932038834952e-05, "loss": 0.1961, "step": 1815 }, { "epoch": 17.606024096385543, "grad_norm": 7.851386070251465, "learning_rate": 4.118446601941748e-05, "loss": 0.3087, "step": 1816 }, { "epoch": 17.61566265060241, "grad_norm": 4.405789375305176, "learning_rate": 4.117961165048544e-05, "loss": 0.3194, "step": 1817 }, { "epoch": 17.625301204819277, "grad_norm": 9.446488380432129, "learning_rate": 4.11747572815534e-05, "loss": 0.2834, "step": 1818 }, { "epoch": 17.634939759036143, "grad_norm": 11.849323272705078, "learning_rate": 4.1169902912621364e-05, "loss": 0.2264, "step": 1819 }, { "epoch": 17.644578313253014, "grad_norm": 1.6107006072998047, "learning_rate": 4.116504854368932e-05, "loss": 0.1543, "step": 1820 }, { "epoch": 17.65421686746988, "grad_norm": 10.461980819702148, "learning_rate": 4.1160194174757286e-05, "loss": 0.2921, "step": 1821 }, { "epoch": 17.663855421686748, "grad_norm": 7.818498134613037, "learning_rate": 4.1155339805825244e-05, "loss": 0.3363, "step": 1822 }, { "epoch": 17.673493975903614, "grad_norm": 8.401054382324219, "learning_rate": 4.11504854368932e-05, "loss": 0.2388, "step": 1823 }, { "epoch": 17.68313253012048, "grad_norm": 4.723536014556885, "learning_rate": 4.1145631067961166e-05, "loss": 0.2718, "step": 1824 }, { "epoch": 17.69277108433735, "grad_norm": 15.956716537475586, "learning_rate": 4.1140776699029124e-05, "loss": 0.2476, "step": 1825 }, { "epoch": 17.702409638554215, "grad_norm": 6.146286487579346, "learning_rate": 4.113592233009709e-05, "loss": 0.4857, "step": 1826 }, { "epoch": 17.712048192771086, "grad_norm": 10.137688636779785, "learning_rate": 4.1131067961165046e-05, "loss": 0.2563, "step": 1827 }, { "epoch": 17.721686746987952, "grad_norm": 4.289064407348633, "learning_rate": 4.112621359223301e-05, "loss": 0.2094, "step": 1828 }, { "epoch": 17.73132530120482, "grad_norm": 10.592137336730957, "learning_rate": 4.112135922330097e-05, "loss": 0.3319, "step": 1829 }, { "epoch": 17.740963855421686, "grad_norm": 11.023401260375977, "learning_rate": 4.111650485436893e-05, "loss": 0.2192, "step": 1830 }, { "epoch": 17.750602409638553, "grad_norm": 9.014571189880371, "learning_rate": 4.11116504854369e-05, "loss": 0.2512, "step": 1831 }, { "epoch": 17.760240963855424, "grad_norm": 9.301403045654297, "learning_rate": 4.110679611650486e-05, "loss": 0.4627, "step": 1832 }, { "epoch": 17.76987951807229, "grad_norm": 11.641212463378906, "learning_rate": 4.110194174757282e-05, "loss": 0.2802, "step": 1833 }, { "epoch": 17.779518072289157, "grad_norm": 3.2242698669433594, "learning_rate": 4.109708737864078e-05, "loss": 0.1304, "step": 1834 }, { "epoch": 17.789156626506024, "grad_norm": 2.889517068862915, "learning_rate": 4.109223300970874e-05, "loss": 0.104, "step": 1835 }, { "epoch": 17.79879518072289, "grad_norm": 10.864859580993652, "learning_rate": 4.10873786407767e-05, "loss": 0.4073, "step": 1836 }, { "epoch": 17.808433734939758, "grad_norm": 1.4903881549835205, "learning_rate": 4.1082524271844664e-05, "loss": 0.2717, "step": 1837 }, { "epoch": 17.818072289156625, "grad_norm": 2.1822094917297363, "learning_rate": 4.107766990291262e-05, "loss": 0.3371, "step": 1838 }, { "epoch": 17.827710843373495, "grad_norm": 1.7255260944366455, "learning_rate": 4.107281553398059e-05, "loss": 0.3848, "step": 1839 }, { "epoch": 17.837349397590362, "grad_norm": 5.051955223083496, "learning_rate": 4.1067961165048544e-05, "loss": 0.3369, "step": 1840 }, { "epoch": 17.84698795180723, "grad_norm": 15.712580680847168, "learning_rate": 4.106310679611651e-05, "loss": 0.2476, "step": 1841 }, { "epoch": 17.856626506024096, "grad_norm": 5.608750820159912, "learning_rate": 4.105825242718447e-05, "loss": 0.4326, "step": 1842 }, { "epoch": 17.866265060240963, "grad_norm": 10.935677528381348, "learning_rate": 4.105339805825243e-05, "loss": 0.1857, "step": 1843 }, { "epoch": 17.87590361445783, "grad_norm": 18.880220413208008, "learning_rate": 4.104854368932039e-05, "loss": 0.4194, "step": 1844 }, { "epoch": 17.8855421686747, "grad_norm": 5.205605506896973, "learning_rate": 4.1043689320388354e-05, "loss": 0.2398, "step": 1845 }, { "epoch": 17.895180722891567, "grad_norm": 0.9815051555633545, "learning_rate": 4.103883495145631e-05, "loss": 0.0627, "step": 1846 }, { "epoch": 17.904819277108434, "grad_norm": 11.315879821777344, "learning_rate": 4.1033980582524276e-05, "loss": 0.1341, "step": 1847 }, { "epoch": 17.9144578313253, "grad_norm": 9.440986633300781, "learning_rate": 4.1029126213592234e-05, "loss": 0.2657, "step": 1848 }, { "epoch": 17.924096385542168, "grad_norm": 3.1934008598327637, "learning_rate": 4.10242718446602e-05, "loss": 0.1791, "step": 1849 }, { "epoch": 17.933734939759034, "grad_norm": 30.985750198364258, "learning_rate": 4.1019417475728156e-05, "loss": 0.4708, "step": 1850 }, { "epoch": 17.943373493975905, "grad_norm": 2.008300304412842, "learning_rate": 4.1014563106796114e-05, "loss": 0.2428, "step": 1851 }, { "epoch": 17.95301204819277, "grad_norm": 7.354302883148193, "learning_rate": 4.100970873786408e-05, "loss": 0.296, "step": 1852 }, { "epoch": 17.96265060240964, "grad_norm": 16.26437759399414, "learning_rate": 4.1004854368932036e-05, "loss": 0.3673, "step": 1853 }, { "epoch": 17.972289156626506, "grad_norm": 9.445116996765137, "learning_rate": 4.1e-05, "loss": 0.2969, "step": 1854 }, { "epoch": 17.981927710843372, "grad_norm": 2.565066337585449, "learning_rate": 4.099514563106796e-05, "loss": 0.1837, "step": 1855 }, { "epoch": 17.99156626506024, "grad_norm": 9.607579231262207, "learning_rate": 4.099029126213593e-05, "loss": 0.2281, "step": 1856 }, { "epoch": 18.00722891566265, "grad_norm": 11.829873085021973, "learning_rate": 4.098543689320389e-05, "loss": 0.2299, "step": 1857 }, { "epoch": 18.016867469879518, "grad_norm": 12.249272346496582, "learning_rate": 4.098058252427185e-05, "loss": 0.3507, "step": 1858 }, { "epoch": 18.026506024096385, "grad_norm": 3.5948314666748047, "learning_rate": 4.097572815533981e-05, "loss": 0.1817, "step": 1859 }, { "epoch": 18.03614457831325, "grad_norm": 11.622495651245117, "learning_rate": 4.0970873786407774e-05, "loss": 0.2152, "step": 1860 }, { "epoch": 18.045783132530122, "grad_norm": 4.545793056488037, "learning_rate": 4.096601941747573e-05, "loss": 0.1749, "step": 1861 }, { "epoch": 18.05542168674699, "grad_norm": 9.744099617004395, "learning_rate": 4.096116504854369e-05, "loss": 0.241, "step": 1862 }, { "epoch": 18.065060240963856, "grad_norm": 9.163779258728027, "learning_rate": 4.0956310679611654e-05, "loss": 0.4928, "step": 1863 }, { "epoch": 18.074698795180723, "grad_norm": 4.5958967208862305, "learning_rate": 4.095145631067961e-05, "loss": 0.2124, "step": 1864 }, { "epoch": 18.08433734939759, "grad_norm": 9.6128568649292, "learning_rate": 4.0946601941747576e-05, "loss": 0.2372, "step": 1865 }, { "epoch": 18.093975903614457, "grad_norm": 2.289186954498291, "learning_rate": 4.0941747572815534e-05, "loss": 0.3256, "step": 1866 }, { "epoch": 18.103614457831327, "grad_norm": 12.58114242553711, "learning_rate": 4.09368932038835e-05, "loss": 0.3808, "step": 1867 }, { "epoch": 18.113253012048194, "grad_norm": 9.54028034210205, "learning_rate": 4.0932038834951457e-05, "loss": 0.2817, "step": 1868 }, { "epoch": 18.12289156626506, "grad_norm": 10.806303977966309, "learning_rate": 4.092718446601942e-05, "loss": 0.3983, "step": 1869 }, { "epoch": 18.132530120481928, "grad_norm": 3.6236298084259033, "learning_rate": 4.092233009708738e-05, "loss": 0.1839, "step": 1870 }, { "epoch": 18.142168674698794, "grad_norm": 6.081019401550293, "learning_rate": 4.091747572815534e-05, "loss": 0.2675, "step": 1871 }, { "epoch": 18.15180722891566, "grad_norm": 3.574050188064575, "learning_rate": 4.09126213592233e-05, "loss": 0.1773, "step": 1872 }, { "epoch": 18.16144578313253, "grad_norm": 6.410555839538574, "learning_rate": 4.0907766990291266e-05, "loss": 0.3589, "step": 1873 }, { "epoch": 18.1710843373494, "grad_norm": 1.1696534156799316, "learning_rate": 4.0902912621359223e-05, "loss": 0.3319, "step": 1874 }, { "epoch": 18.180722891566266, "grad_norm": 3.6754982471466064, "learning_rate": 4.089805825242719e-05, "loss": 0.3639, "step": 1875 }, { "epoch": 18.190361445783132, "grad_norm": 0.7443215250968933, "learning_rate": 4.0893203883495146e-05, "loss": 0.2888, "step": 1876 }, { "epoch": 18.2, "grad_norm": 8.127094268798828, "learning_rate": 4.0888349514563103e-05, "loss": 0.3043, "step": 1877 }, { "epoch": 18.209638554216866, "grad_norm": 2.76833176612854, "learning_rate": 4.088349514563107e-05, "loss": 0.2817, "step": 1878 }, { "epoch": 18.219277108433737, "grad_norm": 3.980786085128784, "learning_rate": 4.0878640776699026e-05, "loss": 0.4566, "step": 1879 }, { "epoch": 18.228915662650603, "grad_norm": 4.554115295410156, "learning_rate": 4.087378640776699e-05, "loss": 0.2673, "step": 1880 }, { "epoch": 18.23855421686747, "grad_norm": 4.287446975708008, "learning_rate": 4.0868932038834955e-05, "loss": 0.272, "step": 1881 }, { "epoch": 18.248192771084337, "grad_norm": 10.451662063598633, "learning_rate": 4.086407766990292e-05, "loss": 0.485, "step": 1882 }, { "epoch": 18.257831325301204, "grad_norm": 10.036189079284668, "learning_rate": 4.085922330097088e-05, "loss": 0.4676, "step": 1883 }, { "epoch": 18.26746987951807, "grad_norm": 2.7257368564605713, "learning_rate": 4.085436893203884e-05, "loss": 0.1752, "step": 1884 }, { "epoch": 18.27710843373494, "grad_norm": 7.835417747497559, "learning_rate": 4.08495145631068e-05, "loss": 0.3435, "step": 1885 }, { "epoch": 18.28674698795181, "grad_norm": 6.310956001281738, "learning_rate": 4.0844660194174764e-05, "loss": 0.3228, "step": 1886 }, { "epoch": 18.296385542168675, "grad_norm": 9.197566986083984, "learning_rate": 4.083980582524272e-05, "loss": 0.3507, "step": 1887 }, { "epoch": 18.306024096385542, "grad_norm": 30.112422943115234, "learning_rate": 4.083495145631068e-05, "loss": 0.3093, "step": 1888 }, { "epoch": 18.31566265060241, "grad_norm": 22.205385208129883, "learning_rate": 4.0830097087378644e-05, "loss": 0.2598, "step": 1889 }, { "epoch": 18.325301204819276, "grad_norm": 9.42130184173584, "learning_rate": 4.08252427184466e-05, "loss": 0.3048, "step": 1890 }, { "epoch": 18.334939759036146, "grad_norm": 1.6749236583709717, "learning_rate": 4.0820388349514566e-05, "loss": 0.1916, "step": 1891 }, { "epoch": 18.344578313253013, "grad_norm": 3.848090887069702, "learning_rate": 4.0815533980582524e-05, "loss": 0.3051, "step": 1892 }, { "epoch": 18.35421686746988, "grad_norm": 10.565592765808105, "learning_rate": 4.081067961165049e-05, "loss": 0.3239, "step": 1893 }, { "epoch": 18.363855421686747, "grad_norm": 1.7469252347946167, "learning_rate": 4.0805825242718446e-05, "loss": 0.5279, "step": 1894 }, { "epoch": 18.373493975903614, "grad_norm": 2.95633864402771, "learning_rate": 4.080097087378641e-05, "loss": 0.2756, "step": 1895 }, { "epoch": 18.38313253012048, "grad_norm": 14.258072853088379, "learning_rate": 4.079611650485437e-05, "loss": 0.3608, "step": 1896 }, { "epoch": 18.39277108433735, "grad_norm": 5.398010730743408, "learning_rate": 4.079126213592233e-05, "loss": 0.398, "step": 1897 }, { "epoch": 18.402409638554218, "grad_norm": 6.0432915687561035, "learning_rate": 4.078640776699029e-05, "loss": 0.1627, "step": 1898 }, { "epoch": 18.412048192771085, "grad_norm": 3.6670398712158203, "learning_rate": 4.0781553398058255e-05, "loss": 0.3632, "step": 1899 }, { "epoch": 18.42168674698795, "grad_norm": 0.7338554859161377, "learning_rate": 4.077669902912621e-05, "loss": 0.0943, "step": 1900 }, { "epoch": 18.43132530120482, "grad_norm": 6.716762065887451, "learning_rate": 4.077184466019418e-05, "loss": 0.1609, "step": 1901 }, { "epoch": 18.440963855421685, "grad_norm": 2.802267074584961, "learning_rate": 4.0766990291262135e-05, "loss": 0.286, "step": 1902 }, { "epoch": 18.450602409638556, "grad_norm": 4.657084941864014, "learning_rate": 4.07621359223301e-05, "loss": 0.1591, "step": 1903 }, { "epoch": 18.460240963855423, "grad_norm": 16.487560272216797, "learning_rate": 4.075728155339806e-05, "loss": 0.229, "step": 1904 }, { "epoch": 18.46987951807229, "grad_norm": 13.751959800720215, "learning_rate": 4.0752427184466016e-05, "loss": 0.2673, "step": 1905 }, { "epoch": 18.479518072289157, "grad_norm": 1.805694580078125, "learning_rate": 4.074757281553398e-05, "loss": 0.2216, "step": 1906 }, { "epoch": 18.489156626506023, "grad_norm": 14.767699241638184, "learning_rate": 4.0742718446601945e-05, "loss": 0.2522, "step": 1907 }, { "epoch": 18.49879518072289, "grad_norm": 6.470164775848389, "learning_rate": 4.073786407766991e-05, "loss": 0.3697, "step": 1908 }, { "epoch": 18.50843373493976, "grad_norm": 4.875903606414795, "learning_rate": 4.073300970873787e-05, "loss": 0.1434, "step": 1909 }, { "epoch": 18.518072289156628, "grad_norm": 2.2073121070861816, "learning_rate": 4.072815533980583e-05, "loss": 0.163, "step": 1910 }, { "epoch": 18.527710843373494, "grad_norm": 4.881293773651123, "learning_rate": 4.072330097087379e-05, "loss": 0.4349, "step": 1911 }, { "epoch": 18.53734939759036, "grad_norm": 3.3135366439819336, "learning_rate": 4.0718446601941754e-05, "loss": 0.283, "step": 1912 }, { "epoch": 18.54698795180723, "grad_norm": 6.551750659942627, "learning_rate": 4.071359223300971e-05, "loss": 0.204, "step": 1913 }, { "epoch": 18.556626506024095, "grad_norm": 5.8429155349731445, "learning_rate": 4.0708737864077676e-05, "loss": 0.4725, "step": 1914 }, { "epoch": 18.566265060240966, "grad_norm": 9.497761726379395, "learning_rate": 4.0703883495145634e-05, "loss": 0.2749, "step": 1915 }, { "epoch": 18.575903614457832, "grad_norm": 13.822434425354004, "learning_rate": 4.069902912621359e-05, "loss": 0.4899, "step": 1916 }, { "epoch": 18.5855421686747, "grad_norm": 5.604675769805908, "learning_rate": 4.0694174757281556e-05, "loss": 0.2488, "step": 1917 }, { "epoch": 18.595180722891566, "grad_norm": 3.424990653991699, "learning_rate": 4.0689320388349514e-05, "loss": 0.244, "step": 1918 }, { "epoch": 18.604819277108433, "grad_norm": 8.4946928024292, "learning_rate": 4.068446601941748e-05, "loss": 0.2331, "step": 1919 }, { "epoch": 18.6144578313253, "grad_norm": 32.87291717529297, "learning_rate": 4.0679611650485436e-05, "loss": 0.1974, "step": 1920 }, { "epoch": 18.62409638554217, "grad_norm": 12.039647102355957, "learning_rate": 4.06747572815534e-05, "loss": 0.2404, "step": 1921 }, { "epoch": 18.633734939759037, "grad_norm": 25.020641326904297, "learning_rate": 4.066990291262136e-05, "loss": 0.5496, "step": 1922 }, { "epoch": 18.643373493975904, "grad_norm": 7.099429130554199, "learning_rate": 4.066504854368932e-05, "loss": 0.1989, "step": 1923 }, { "epoch": 18.65301204819277, "grad_norm": 14.871482849121094, "learning_rate": 4.066019417475728e-05, "loss": 0.1199, "step": 1924 }, { "epoch": 18.662650602409638, "grad_norm": 18.770597457885742, "learning_rate": 4.0655339805825245e-05, "loss": 0.542, "step": 1925 }, { "epoch": 18.672289156626505, "grad_norm": 16.155067443847656, "learning_rate": 4.06504854368932e-05, "loss": 0.2808, "step": 1926 }, { "epoch": 18.681927710843375, "grad_norm": 9.25912094116211, "learning_rate": 4.064563106796117e-05, "loss": 0.1887, "step": 1927 }, { "epoch": 18.691566265060242, "grad_norm": 8.549727439880371, "learning_rate": 4.0640776699029125e-05, "loss": 0.1619, "step": 1928 }, { "epoch": 18.70120481927711, "grad_norm": 8.837327003479004, "learning_rate": 4.063592233009709e-05, "loss": 0.2567, "step": 1929 }, { "epoch": 18.710843373493976, "grad_norm": 8.098593711853027, "learning_rate": 4.063106796116505e-05, "loss": 0.3012, "step": 1930 }, { "epoch": 18.720481927710843, "grad_norm": 3.262629508972168, "learning_rate": 4.062621359223301e-05, "loss": 0.1541, "step": 1931 }, { "epoch": 18.73012048192771, "grad_norm": 12.388605117797852, "learning_rate": 4.062135922330098e-05, "loss": 0.4954, "step": 1932 }, { "epoch": 18.739759036144576, "grad_norm": 2.7993216514587402, "learning_rate": 4.0616504854368934e-05, "loss": 0.1291, "step": 1933 }, { "epoch": 18.749397590361447, "grad_norm": 2.2074899673461914, "learning_rate": 4.06116504854369e-05, "loss": 0.2515, "step": 1934 }, { "epoch": 18.759036144578314, "grad_norm": 22.03255844116211, "learning_rate": 4.060679611650486e-05, "loss": 0.5507, "step": 1935 }, { "epoch": 18.76867469879518, "grad_norm": 0.6541060209274292, "learning_rate": 4.060194174757282e-05, "loss": 0.2823, "step": 1936 }, { "epoch": 18.778313253012048, "grad_norm": 7.354572772979736, "learning_rate": 4.059708737864078e-05, "loss": 0.2112, "step": 1937 }, { "epoch": 18.787951807228914, "grad_norm": 6.298649311065674, "learning_rate": 4.0592233009708743e-05, "loss": 0.3935, "step": 1938 }, { "epoch": 18.797590361445785, "grad_norm": 2.821392774581909, "learning_rate": 4.05873786407767e-05, "loss": 0.2568, "step": 1939 }, { "epoch": 18.80722891566265, "grad_norm": 13.2752685546875, "learning_rate": 4.0582524271844666e-05, "loss": 0.4848, "step": 1940 }, { "epoch": 18.81686746987952, "grad_norm": 2.4304776191711426, "learning_rate": 4.0577669902912624e-05, "loss": 0.1904, "step": 1941 }, { "epoch": 18.826506024096386, "grad_norm": 2.901285171508789, "learning_rate": 4.057281553398059e-05, "loss": 0.0538, "step": 1942 }, { "epoch": 18.836144578313252, "grad_norm": 4.761010646820068, "learning_rate": 4.0567961165048546e-05, "loss": 0.3714, "step": 1943 }, { "epoch": 18.84578313253012, "grad_norm": 5.713823318481445, "learning_rate": 4.0563106796116504e-05, "loss": 0.2481, "step": 1944 }, { "epoch": 18.855421686746986, "grad_norm": 11.054490089416504, "learning_rate": 4.055825242718447e-05, "loss": 0.2292, "step": 1945 }, { "epoch": 18.865060240963857, "grad_norm": 7.514093399047852, "learning_rate": 4.0553398058252426e-05, "loss": 0.2497, "step": 1946 }, { "epoch": 18.874698795180723, "grad_norm": 6.705752372741699, "learning_rate": 4.054854368932039e-05, "loss": 0.1074, "step": 1947 }, { "epoch": 18.88433734939759, "grad_norm": 6.66903829574585, "learning_rate": 4.054368932038835e-05, "loss": 0.2023, "step": 1948 }, { "epoch": 18.893975903614457, "grad_norm": 12.745063781738281, "learning_rate": 4.053883495145631e-05, "loss": 0.386, "step": 1949 }, { "epoch": 18.903614457831324, "grad_norm": 3.0384554862976074, "learning_rate": 4.053398058252427e-05, "loss": 0.1831, "step": 1950 }, { "epoch": 18.913253012048195, "grad_norm": 19.461164474487305, "learning_rate": 4.0529126213592235e-05, "loss": 0.2416, "step": 1951 }, { "epoch": 18.92289156626506, "grad_norm": 11.926227569580078, "learning_rate": 4.052427184466019e-05, "loss": 0.2331, "step": 1952 }, { "epoch": 18.93253012048193, "grad_norm": 12.41440486907959, "learning_rate": 4.051941747572816e-05, "loss": 0.3563, "step": 1953 }, { "epoch": 18.942168674698795, "grad_norm": 2.4427309036254883, "learning_rate": 4.0514563106796115e-05, "loss": 0.1829, "step": 1954 }, { "epoch": 18.951807228915662, "grad_norm": 5.616960048675537, "learning_rate": 4.050970873786408e-05, "loss": 0.1515, "step": 1955 }, { "epoch": 18.96144578313253, "grad_norm": 7.622843265533447, "learning_rate": 4.050485436893204e-05, "loss": 0.2963, "step": 1956 }, { "epoch": 18.971084337349396, "grad_norm": 6.163559436798096, "learning_rate": 4.05e-05, "loss": 0.2475, "step": 1957 }, { "epoch": 18.980722891566266, "grad_norm": 4.115414142608643, "learning_rate": 4.0495145631067966e-05, "loss": 0.2695, "step": 1958 }, { "epoch": 18.990361445783133, "grad_norm": 7.662500381469727, "learning_rate": 4.0490291262135924e-05, "loss": 0.1776, "step": 1959 }, { "epoch": 19.00602409638554, "grad_norm": 6.916787147521973, "learning_rate": 4.048543689320389e-05, "loss": 0.2692, "step": 1960 }, { "epoch": 19.01566265060241, "grad_norm": 3.7002623081207275, "learning_rate": 4.0480582524271846e-05, "loss": 0.4423, "step": 1961 }, { "epoch": 19.02530120481928, "grad_norm": 4.0103440284729, "learning_rate": 4.047572815533981e-05, "loss": 0.3639, "step": 1962 }, { "epoch": 19.034939759036146, "grad_norm": 1.0429742336273193, "learning_rate": 4.047087378640777e-05, "loss": 0.1155, "step": 1963 }, { "epoch": 19.044578313253012, "grad_norm": 13.800034523010254, "learning_rate": 4.046601941747573e-05, "loss": 0.2369, "step": 1964 }, { "epoch": 19.05421686746988, "grad_norm": 5.498164176940918, "learning_rate": 4.046116504854369e-05, "loss": 0.4054, "step": 1965 }, { "epoch": 19.063855421686746, "grad_norm": 9.21692943572998, "learning_rate": 4.0456310679611656e-05, "loss": 0.3169, "step": 1966 }, { "epoch": 19.073493975903613, "grad_norm": 2.5351617336273193, "learning_rate": 4.045145631067961e-05, "loss": 0.2705, "step": 1967 }, { "epoch": 19.083132530120483, "grad_norm": 7.235253810882568, "learning_rate": 4.044660194174758e-05, "loss": 0.3317, "step": 1968 }, { "epoch": 19.09277108433735, "grad_norm": 9.986830711364746, "learning_rate": 4.0441747572815536e-05, "loss": 0.3842, "step": 1969 }, { "epoch": 19.102409638554217, "grad_norm": 5.626833438873291, "learning_rate": 4.04368932038835e-05, "loss": 0.3025, "step": 1970 }, { "epoch": 19.112048192771084, "grad_norm": 11.287598609924316, "learning_rate": 4.043203883495146e-05, "loss": 0.1838, "step": 1971 }, { "epoch": 19.12168674698795, "grad_norm": 15.810935020446777, "learning_rate": 4.0427184466019416e-05, "loss": 0.2111, "step": 1972 }, { "epoch": 19.131325301204818, "grad_norm": 10.476149559020996, "learning_rate": 4.042233009708738e-05, "loss": 0.168, "step": 1973 }, { "epoch": 19.14096385542169, "grad_norm": 8.948771476745605, "learning_rate": 4.041747572815534e-05, "loss": 0.2033, "step": 1974 }, { "epoch": 19.150602409638555, "grad_norm": 5.020413875579834, "learning_rate": 4.04126213592233e-05, "loss": 0.1917, "step": 1975 }, { "epoch": 19.160240963855422, "grad_norm": 2.281508445739746, "learning_rate": 4.040776699029126e-05, "loss": 0.4291, "step": 1976 }, { "epoch": 19.16987951807229, "grad_norm": 32.63286590576172, "learning_rate": 4.0402912621359225e-05, "loss": 0.3823, "step": 1977 }, { "epoch": 19.179518072289156, "grad_norm": 5.133765697479248, "learning_rate": 4.039805825242718e-05, "loss": 0.271, "step": 1978 }, { "epoch": 19.189156626506023, "grad_norm": 17.0450496673584, "learning_rate": 4.039320388349515e-05, "loss": 0.4077, "step": 1979 }, { "epoch": 19.198795180722893, "grad_norm": 3.2055625915527344, "learning_rate": 4.0388349514563105e-05, "loss": 0.2691, "step": 1980 }, { "epoch": 19.20843373493976, "grad_norm": 4.045566082000732, "learning_rate": 4.038349514563107e-05, "loss": 0.3062, "step": 1981 }, { "epoch": 19.218072289156627, "grad_norm": 7.516140937805176, "learning_rate": 4.0378640776699034e-05, "loss": 0.2936, "step": 1982 }, { "epoch": 19.227710843373494, "grad_norm": 7.253679275512695, "learning_rate": 4.037378640776699e-05, "loss": 0.306, "step": 1983 }, { "epoch": 19.23734939759036, "grad_norm": 6.631253242492676, "learning_rate": 4.0368932038834956e-05, "loss": 0.4757, "step": 1984 }, { "epoch": 19.246987951807228, "grad_norm": 5.368898391723633, "learning_rate": 4.0364077669902914e-05, "loss": 0.2135, "step": 1985 }, { "epoch": 19.256626506024098, "grad_norm": 2.5721492767333984, "learning_rate": 4.035922330097088e-05, "loss": 0.2688, "step": 1986 }, { "epoch": 19.266265060240965, "grad_norm": 6.41307258605957, "learning_rate": 4.0354368932038836e-05, "loss": 0.1914, "step": 1987 }, { "epoch": 19.27590361445783, "grad_norm": 4.8708720207214355, "learning_rate": 4.03495145631068e-05, "loss": 0.3533, "step": 1988 }, { "epoch": 19.2855421686747, "grad_norm": 5.166661739349365, "learning_rate": 4.034466019417476e-05, "loss": 0.2894, "step": 1989 }, { "epoch": 19.295180722891565, "grad_norm": 16.849138259887695, "learning_rate": 4.033980582524272e-05, "loss": 0.407, "step": 1990 }, { "epoch": 19.304819277108432, "grad_norm": 4.05812406539917, "learning_rate": 4.033495145631068e-05, "loss": 0.2479, "step": 1991 }, { "epoch": 19.314457831325303, "grad_norm": 8.271328926086426, "learning_rate": 4.0330097087378645e-05, "loss": 0.2747, "step": 1992 }, { "epoch": 19.32409638554217, "grad_norm": 9.086712837219238, "learning_rate": 4.03252427184466e-05, "loss": 0.2907, "step": 1993 }, { "epoch": 19.333734939759037, "grad_norm": 1.9171645641326904, "learning_rate": 4.032038834951457e-05, "loss": 0.2739, "step": 1994 }, { "epoch": 19.343373493975903, "grad_norm": 14.490056991577148, "learning_rate": 4.0315533980582525e-05, "loss": 0.3568, "step": 1995 }, { "epoch": 19.35301204819277, "grad_norm": 6.011416912078857, "learning_rate": 4.031067961165049e-05, "loss": 0.2993, "step": 1996 }, { "epoch": 19.362650602409637, "grad_norm": 9.894396781921387, "learning_rate": 4.030582524271845e-05, "loss": 0.2839, "step": 1997 }, { "epoch": 19.372289156626508, "grad_norm": 8.688838958740234, "learning_rate": 4.030097087378641e-05, "loss": 0.3802, "step": 1998 }, { "epoch": 19.381927710843375, "grad_norm": 12.257469177246094, "learning_rate": 4.029611650485437e-05, "loss": 0.3038, "step": 1999 }, { "epoch": 19.39156626506024, "grad_norm": 2.1192588806152344, "learning_rate": 4.029126213592233e-05, "loss": 0.1276, "step": 2000 }, { "epoch": 19.40120481927711, "grad_norm": 5.449690341949463, "learning_rate": 4.028640776699029e-05, "loss": 0.2806, "step": 2001 }, { "epoch": 19.410843373493975, "grad_norm": 2.629140853881836, "learning_rate": 4.028155339805825e-05, "loss": 0.3019, "step": 2002 }, { "epoch": 19.420481927710842, "grad_norm": 5.077669620513916, "learning_rate": 4.0276699029126215e-05, "loss": 0.3755, "step": 2003 }, { "epoch": 19.430120481927712, "grad_norm": 4.886424541473389, "learning_rate": 4.027184466019417e-05, "loss": 0.2723, "step": 2004 }, { "epoch": 19.43975903614458, "grad_norm": 4.263600826263428, "learning_rate": 4.026699029126214e-05, "loss": 0.3816, "step": 2005 }, { "epoch": 19.449397590361446, "grad_norm": 2.7908852100372314, "learning_rate": 4.0262135922330095e-05, "loss": 0.1701, "step": 2006 }, { "epoch": 19.459036144578313, "grad_norm": 7.866851806640625, "learning_rate": 4.0257281553398066e-05, "loss": 0.3904, "step": 2007 }, { "epoch": 19.46867469879518, "grad_norm": 1.9226971864700317, "learning_rate": 4.0252427184466024e-05, "loss": 0.2371, "step": 2008 }, { "epoch": 19.478313253012047, "grad_norm": 4.409744739532471, "learning_rate": 4.024757281553399e-05, "loss": 0.2318, "step": 2009 }, { "epoch": 19.487951807228917, "grad_norm": 2.211742877960205, "learning_rate": 4.0242718446601946e-05, "loss": 0.0985, "step": 2010 }, { "epoch": 19.497590361445784, "grad_norm": 1.6959367990493774, "learning_rate": 4.0237864077669904e-05, "loss": 0.0797, "step": 2011 }, { "epoch": 19.50722891566265, "grad_norm": 2.7266108989715576, "learning_rate": 4.023300970873787e-05, "loss": 0.195, "step": 2012 }, { "epoch": 19.516867469879518, "grad_norm": 3.848005771636963, "learning_rate": 4.0228155339805826e-05, "loss": 0.2514, "step": 2013 }, { "epoch": 19.526506024096385, "grad_norm": 10.94779109954834, "learning_rate": 4.022330097087379e-05, "loss": 0.2938, "step": 2014 }, { "epoch": 19.53614457831325, "grad_norm": 6.718706130981445, "learning_rate": 4.021844660194175e-05, "loss": 0.1201, "step": 2015 }, { "epoch": 19.545783132530122, "grad_norm": 7.5695600509643555, "learning_rate": 4.021359223300971e-05, "loss": 0.2494, "step": 2016 }, { "epoch": 19.55542168674699, "grad_norm": 2.9845075607299805, "learning_rate": 4.020873786407767e-05, "loss": 0.2038, "step": 2017 }, { "epoch": 19.565060240963856, "grad_norm": 6.58821964263916, "learning_rate": 4.0203883495145635e-05, "loss": 0.307, "step": 2018 }, { "epoch": 19.574698795180723, "grad_norm": 10.533533096313477, "learning_rate": 4.019902912621359e-05, "loss": 0.2538, "step": 2019 }, { "epoch": 19.58433734939759, "grad_norm": 4.0879411697387695, "learning_rate": 4.019417475728156e-05, "loss": 0.0867, "step": 2020 }, { "epoch": 19.593975903614457, "grad_norm": 9.727871894836426, "learning_rate": 4.0189320388349515e-05, "loss": 0.5715, "step": 2021 }, { "epoch": 19.603614457831327, "grad_norm": 2.1095614433288574, "learning_rate": 4.018446601941748e-05, "loss": 0.3018, "step": 2022 }, { "epoch": 19.613253012048194, "grad_norm": 6.475232124328613, "learning_rate": 4.017961165048544e-05, "loss": 0.3666, "step": 2023 }, { "epoch": 19.62289156626506, "grad_norm": 4.575084686279297, "learning_rate": 4.01747572815534e-05, "loss": 0.2982, "step": 2024 }, { "epoch": 19.632530120481928, "grad_norm": 5.258953094482422, "learning_rate": 4.016990291262136e-05, "loss": 0.537, "step": 2025 }, { "epoch": 19.642168674698794, "grad_norm": 7.64390230178833, "learning_rate": 4.016504854368932e-05, "loss": 0.3632, "step": 2026 }, { "epoch": 19.65180722891566, "grad_norm": 6.561605453491211, "learning_rate": 4.016019417475728e-05, "loss": 0.4501, "step": 2027 }, { "epoch": 19.66144578313253, "grad_norm": 16.81606101989746, "learning_rate": 4.015533980582524e-05, "loss": 0.2909, "step": 2028 }, { "epoch": 19.6710843373494, "grad_norm": 11.061568260192871, "learning_rate": 4.0150485436893204e-05, "loss": 0.2227, "step": 2029 }, { "epoch": 19.680722891566266, "grad_norm": 3.6006219387054443, "learning_rate": 4.014563106796116e-05, "loss": 0.2726, "step": 2030 }, { "epoch": 19.690361445783132, "grad_norm": 1.1207244396209717, "learning_rate": 4.014077669902913e-05, "loss": 0.0702, "step": 2031 }, { "epoch": 19.7, "grad_norm": 2.9425411224365234, "learning_rate": 4.0135922330097084e-05, "loss": 0.2059, "step": 2032 }, { "epoch": 19.709638554216866, "grad_norm": 4.062492847442627, "learning_rate": 4.0131067961165056e-05, "loss": 0.1518, "step": 2033 }, { "epoch": 19.719277108433737, "grad_norm": 9.456972122192383, "learning_rate": 4.0126213592233013e-05, "loss": 0.314, "step": 2034 }, { "epoch": 19.728915662650603, "grad_norm": 5.117285251617432, "learning_rate": 4.012135922330098e-05, "loss": 0.2602, "step": 2035 }, { "epoch": 19.73855421686747, "grad_norm": 12.75444507598877, "learning_rate": 4.0116504854368936e-05, "loss": 0.2757, "step": 2036 }, { "epoch": 19.748192771084337, "grad_norm": 10.983954429626465, "learning_rate": 4.0111650485436894e-05, "loss": 0.2928, "step": 2037 }, { "epoch": 19.757831325301204, "grad_norm": 3.0278537273406982, "learning_rate": 4.010679611650486e-05, "loss": 0.4, "step": 2038 }, { "epoch": 19.76746987951807, "grad_norm": 3.9686882495880127, "learning_rate": 4.0101941747572816e-05, "loss": 0.3593, "step": 2039 }, { "epoch": 19.77710843373494, "grad_norm": 7.922918319702148, "learning_rate": 4.009708737864078e-05, "loss": 0.3552, "step": 2040 }, { "epoch": 19.78674698795181, "grad_norm": 10.138235092163086, "learning_rate": 4.009223300970874e-05, "loss": 0.4402, "step": 2041 }, { "epoch": 19.796385542168675, "grad_norm": 2.5531046390533447, "learning_rate": 4.00873786407767e-05, "loss": 0.3182, "step": 2042 }, { "epoch": 19.806024096385542, "grad_norm": 1.042135238647461, "learning_rate": 4.008252427184466e-05, "loss": 0.0974, "step": 2043 }, { "epoch": 19.81566265060241, "grad_norm": 4.521719932556152, "learning_rate": 4.0077669902912625e-05, "loss": 0.2618, "step": 2044 }, { "epoch": 19.825301204819276, "grad_norm": 1.5117689371109009, "learning_rate": 4.007281553398058e-05, "loss": 0.1239, "step": 2045 }, { "epoch": 19.834939759036146, "grad_norm": 22.16514015197754, "learning_rate": 4.006796116504855e-05, "loss": 0.3483, "step": 2046 }, { "epoch": 19.844578313253013, "grad_norm": 3.05080509185791, "learning_rate": 4.0063106796116505e-05, "loss": 0.1659, "step": 2047 }, { "epoch": 19.85421686746988, "grad_norm": 3.7093851566314697, "learning_rate": 4.005825242718447e-05, "loss": 0.0843, "step": 2048 }, { "epoch": 19.863855421686747, "grad_norm": 21.98187828063965, "learning_rate": 4.005339805825243e-05, "loss": 0.4064, "step": 2049 }, { "epoch": 19.873493975903614, "grad_norm": 12.020675659179688, "learning_rate": 4.004854368932039e-05, "loss": 0.222, "step": 2050 }, { "epoch": 19.88313253012048, "grad_norm": 5.173581600189209, "learning_rate": 4.004368932038835e-05, "loss": 0.5284, "step": 2051 }, { "epoch": 19.89277108433735, "grad_norm": 7.118027210235596, "learning_rate": 4.0038834951456314e-05, "loss": 0.296, "step": 2052 }, { "epoch": 19.902409638554218, "grad_norm": 16.179807662963867, "learning_rate": 4.003398058252427e-05, "loss": 0.2685, "step": 2053 }, { "epoch": 19.912048192771085, "grad_norm": 3.362480401992798, "learning_rate": 4.002912621359223e-05, "loss": 0.3755, "step": 2054 }, { "epoch": 19.92168674698795, "grad_norm": 3.1275134086608887, "learning_rate": 4.0024271844660194e-05, "loss": 0.2888, "step": 2055 }, { "epoch": 19.93132530120482, "grad_norm": 5.334527492523193, "learning_rate": 4.001941747572815e-05, "loss": 0.1485, "step": 2056 }, { "epoch": 19.940963855421685, "grad_norm": 8.184724807739258, "learning_rate": 4.0014563106796116e-05, "loss": 0.2318, "step": 2057 }, { "epoch": 19.950602409638556, "grad_norm": 8.365713119506836, "learning_rate": 4.000970873786408e-05, "loss": 0.4715, "step": 2058 }, { "epoch": 19.960240963855423, "grad_norm": 1.4225612878799438, "learning_rate": 4.0004854368932046e-05, "loss": 0.0827, "step": 2059 }, { "epoch": 19.96987951807229, "grad_norm": 10.427794456481934, "learning_rate": 4e-05, "loss": 0.5153, "step": 2060 }, { "epoch": 19.979518072289157, "grad_norm": 7.424880504608154, "learning_rate": 3.999514563106797e-05, "loss": 0.5208, "step": 2061 }, { "epoch": 19.989156626506023, "grad_norm": 10.342398643493652, "learning_rate": 3.9990291262135926e-05, "loss": 0.3499, "step": 2062 }, { "epoch": 20.004819277108435, "grad_norm": 2.5411570072174072, "learning_rate": 3.998543689320389e-05, "loss": 0.1845, "step": 2063 }, { "epoch": 20.014457831325302, "grad_norm": 49.85368347167969, "learning_rate": 3.998058252427185e-05, "loss": 0.3815, "step": 2064 }, { "epoch": 20.02409638554217, "grad_norm": 5.845186710357666, "learning_rate": 3.9975728155339806e-05, "loss": 0.3972, "step": 2065 }, { "epoch": 20.033734939759036, "grad_norm": 2.1375913619995117, "learning_rate": 3.997087378640777e-05, "loss": 0.0901, "step": 2066 }, { "epoch": 20.043373493975903, "grad_norm": 4.361823558807373, "learning_rate": 3.996601941747573e-05, "loss": 0.3289, "step": 2067 }, { "epoch": 20.05301204819277, "grad_norm": 3.614856004714966, "learning_rate": 3.996116504854369e-05, "loss": 0.1395, "step": 2068 }, { "epoch": 20.06265060240964, "grad_norm": 5.582481384277344, "learning_rate": 3.995631067961165e-05, "loss": 0.2299, "step": 2069 }, { "epoch": 20.072289156626507, "grad_norm": 22.7243595123291, "learning_rate": 3.9951456310679615e-05, "loss": 0.2396, "step": 2070 }, { "epoch": 20.081927710843374, "grad_norm": 3.3216984272003174, "learning_rate": 3.994660194174757e-05, "loss": 0.2497, "step": 2071 }, { "epoch": 20.09156626506024, "grad_norm": 3.4671316146850586, "learning_rate": 3.994174757281554e-05, "loss": 0.5546, "step": 2072 }, { "epoch": 20.101204819277108, "grad_norm": 10.939168930053711, "learning_rate": 3.9936893203883495e-05, "loss": 0.3005, "step": 2073 }, { "epoch": 20.110843373493974, "grad_norm": 9.47219181060791, "learning_rate": 3.993203883495146e-05, "loss": 0.236, "step": 2074 }, { "epoch": 20.120481927710845, "grad_norm": 22.347652435302734, "learning_rate": 3.992718446601942e-05, "loss": 0.2983, "step": 2075 }, { "epoch": 20.13012048192771, "grad_norm": 2.859618663787842, "learning_rate": 3.992233009708738e-05, "loss": 0.175, "step": 2076 }, { "epoch": 20.13975903614458, "grad_norm": 5.307650566101074, "learning_rate": 3.991747572815534e-05, "loss": 0.5733, "step": 2077 }, { "epoch": 20.149397590361446, "grad_norm": 5.899611949920654, "learning_rate": 3.9912621359223304e-05, "loss": 0.1445, "step": 2078 }, { "epoch": 20.159036144578312, "grad_norm": 3.3610804080963135, "learning_rate": 3.990776699029126e-05, "loss": 0.2631, "step": 2079 }, { "epoch": 20.16867469879518, "grad_norm": 4.93446683883667, "learning_rate": 3.9902912621359226e-05, "loss": 0.2685, "step": 2080 }, { "epoch": 20.17831325301205, "grad_norm": 2.407777786254883, "learning_rate": 3.9898058252427184e-05, "loss": 0.2689, "step": 2081 }, { "epoch": 20.187951807228917, "grad_norm": 9.656620979309082, "learning_rate": 3.989320388349514e-05, "loss": 0.4658, "step": 2082 }, { "epoch": 20.197590361445783, "grad_norm": 1.936995267868042, "learning_rate": 3.988834951456311e-05, "loss": 0.3341, "step": 2083 }, { "epoch": 20.20722891566265, "grad_norm": 2.206820487976074, "learning_rate": 3.988349514563107e-05, "loss": 0.2641, "step": 2084 }, { "epoch": 20.216867469879517, "grad_norm": 14.005345344543457, "learning_rate": 3.9878640776699035e-05, "loss": 0.2078, "step": 2085 }, { "epoch": 20.226506024096384, "grad_norm": 5.247167110443115, "learning_rate": 3.987378640776699e-05, "loss": 0.2695, "step": 2086 }, { "epoch": 20.236144578313255, "grad_norm": 7.403015613555908, "learning_rate": 3.986893203883496e-05, "loss": 0.298, "step": 2087 }, { "epoch": 20.24578313253012, "grad_norm": 3.7526440620422363, "learning_rate": 3.9864077669902915e-05, "loss": 0.4105, "step": 2088 }, { "epoch": 20.25542168674699, "grad_norm": 6.155702590942383, "learning_rate": 3.985922330097088e-05, "loss": 0.2143, "step": 2089 }, { "epoch": 20.265060240963855, "grad_norm": 4.682980060577393, "learning_rate": 3.985436893203884e-05, "loss": 0.2963, "step": 2090 }, { "epoch": 20.274698795180722, "grad_norm": 8.789857864379883, "learning_rate": 3.98495145631068e-05, "loss": 0.2052, "step": 2091 }, { "epoch": 20.28433734939759, "grad_norm": 7.184721946716309, "learning_rate": 3.984466019417476e-05, "loss": 0.1506, "step": 2092 }, { "epoch": 20.29397590361446, "grad_norm": 34.781272888183594, "learning_rate": 3.983980582524272e-05, "loss": 0.2646, "step": 2093 }, { "epoch": 20.303614457831326, "grad_norm": 7.759247303009033, "learning_rate": 3.983495145631068e-05, "loss": 0.4045, "step": 2094 }, { "epoch": 20.313253012048193, "grad_norm": 2.9456052780151367, "learning_rate": 3.983009708737864e-05, "loss": 0.3208, "step": 2095 }, { "epoch": 20.32289156626506, "grad_norm": 6.613046646118164, "learning_rate": 3.9825242718446605e-05, "loss": 0.3648, "step": 2096 }, { "epoch": 20.332530120481927, "grad_norm": 4.158034801483154, "learning_rate": 3.982038834951456e-05, "loss": 0.3416, "step": 2097 }, { "epoch": 20.342168674698794, "grad_norm": 2.480400800704956, "learning_rate": 3.981553398058253e-05, "loss": 0.179, "step": 2098 }, { "epoch": 20.351807228915664, "grad_norm": 7.9672627449035645, "learning_rate": 3.9810679611650485e-05, "loss": 0.4126, "step": 2099 }, { "epoch": 20.36144578313253, "grad_norm": 37.097251892089844, "learning_rate": 3.980582524271845e-05, "loss": 0.2593, "step": 2100 }, { "epoch": 20.371084337349398, "grad_norm": 3.278989315032959, "learning_rate": 3.980097087378641e-05, "loss": 0.1779, "step": 2101 }, { "epoch": 20.380722891566265, "grad_norm": 18.013839721679688, "learning_rate": 3.979611650485437e-05, "loss": 0.3797, "step": 2102 }, { "epoch": 20.39036144578313, "grad_norm": 2.0125913619995117, "learning_rate": 3.979126213592233e-05, "loss": 0.2097, "step": 2103 }, { "epoch": 20.4, "grad_norm": 14.614949226379395, "learning_rate": 3.9786407766990294e-05, "loss": 0.4964, "step": 2104 }, { "epoch": 20.40963855421687, "grad_norm": 2.6591453552246094, "learning_rate": 3.978155339805825e-05, "loss": 0.2329, "step": 2105 }, { "epoch": 20.419277108433736, "grad_norm": 6.719484329223633, "learning_rate": 3.9776699029126216e-05, "loss": 0.2404, "step": 2106 }, { "epoch": 20.428915662650603, "grad_norm": 3.7487504482269287, "learning_rate": 3.9771844660194174e-05, "loss": 0.1415, "step": 2107 }, { "epoch": 20.43855421686747, "grad_norm": 7.633403778076172, "learning_rate": 3.976699029126214e-05, "loss": 0.3595, "step": 2108 }, { "epoch": 20.448192771084337, "grad_norm": 3.540745735168457, "learning_rate": 3.97621359223301e-05, "loss": 0.1053, "step": 2109 }, { "epoch": 20.457831325301203, "grad_norm": 3.71032977104187, "learning_rate": 3.975728155339806e-05, "loss": 0.3044, "step": 2110 }, { "epoch": 20.467469879518074, "grad_norm": 2.9444425106048584, "learning_rate": 3.9752427184466025e-05, "loss": 0.304, "step": 2111 }, { "epoch": 20.47710843373494, "grad_norm": 15.43108081817627, "learning_rate": 3.974757281553398e-05, "loss": 0.2764, "step": 2112 }, { "epoch": 20.486746987951808, "grad_norm": 8.185397148132324, "learning_rate": 3.974271844660195e-05, "loss": 0.3722, "step": 2113 }, { "epoch": 20.496385542168674, "grad_norm": 4.774162292480469, "learning_rate": 3.9737864077669905e-05, "loss": 0.3572, "step": 2114 }, { "epoch": 20.50602409638554, "grad_norm": 14.730843544006348, "learning_rate": 3.973300970873787e-05, "loss": 0.3396, "step": 2115 }, { "epoch": 20.51566265060241, "grad_norm": 4.148080825805664, "learning_rate": 3.972815533980583e-05, "loss": 0.18, "step": 2116 }, { "epoch": 20.52530120481928, "grad_norm": 10.550758361816406, "learning_rate": 3.972330097087379e-05, "loss": 0.28, "step": 2117 }, { "epoch": 20.534939759036146, "grad_norm": 3.199174404144287, "learning_rate": 3.971844660194175e-05, "loss": 0.2002, "step": 2118 }, { "epoch": 20.544578313253012, "grad_norm": 2.9381346702575684, "learning_rate": 3.9713592233009714e-05, "loss": 0.0987, "step": 2119 }, { "epoch": 20.55421686746988, "grad_norm": 2.7012436389923096, "learning_rate": 3.970873786407767e-05, "loss": 0.1562, "step": 2120 }, { "epoch": 20.563855421686746, "grad_norm": 23.571060180664062, "learning_rate": 3.970388349514563e-05, "loss": 0.5704, "step": 2121 }, { "epoch": 20.573493975903613, "grad_norm": 18.79607391357422, "learning_rate": 3.9699029126213594e-05, "loss": 0.4063, "step": 2122 }, { "epoch": 20.583132530120483, "grad_norm": 8.30376148223877, "learning_rate": 3.969417475728155e-05, "loss": 0.3795, "step": 2123 }, { "epoch": 20.59277108433735, "grad_norm": 5.045494079589844, "learning_rate": 3.9689320388349517e-05, "loss": 0.3071, "step": 2124 }, { "epoch": 20.602409638554217, "grad_norm": 9.369250297546387, "learning_rate": 3.9684466019417474e-05, "loss": 0.3401, "step": 2125 }, { "epoch": 20.612048192771084, "grad_norm": 2.2030415534973145, "learning_rate": 3.967961165048544e-05, "loss": 0.1232, "step": 2126 }, { "epoch": 20.62168674698795, "grad_norm": 4.812403202056885, "learning_rate": 3.96747572815534e-05, "loss": 0.2553, "step": 2127 }, { "epoch": 20.631325301204818, "grad_norm": 6.59791898727417, "learning_rate": 3.966990291262136e-05, "loss": 0.3087, "step": 2128 }, { "epoch": 20.64096385542169, "grad_norm": 11.909011840820312, "learning_rate": 3.966504854368932e-05, "loss": 0.463, "step": 2129 }, { "epoch": 20.650602409638555, "grad_norm": 10.27454662322998, "learning_rate": 3.9660194174757283e-05, "loss": 0.2541, "step": 2130 }, { "epoch": 20.660240963855422, "grad_norm": 27.128080368041992, "learning_rate": 3.965533980582524e-05, "loss": 0.5581, "step": 2131 }, { "epoch": 20.66987951807229, "grad_norm": 13.054425239562988, "learning_rate": 3.9650485436893206e-05, "loss": 0.475, "step": 2132 }, { "epoch": 20.679518072289156, "grad_norm": 6.172542095184326, "learning_rate": 3.9645631067961164e-05, "loss": 0.5954, "step": 2133 }, { "epoch": 20.689156626506023, "grad_norm": 9.896239280700684, "learning_rate": 3.964077669902913e-05, "loss": 0.4618, "step": 2134 }, { "epoch": 20.698795180722893, "grad_norm": 2.851041078567505, "learning_rate": 3.963592233009709e-05, "loss": 0.2809, "step": 2135 }, { "epoch": 20.70843373493976, "grad_norm": 4.0639238357543945, "learning_rate": 3.963106796116505e-05, "loss": 0.1528, "step": 2136 }, { "epoch": 20.718072289156627, "grad_norm": 13.4429292678833, "learning_rate": 3.9626213592233015e-05, "loss": 0.3316, "step": 2137 }, { "epoch": 20.727710843373494, "grad_norm": 6.595510959625244, "learning_rate": 3.962135922330097e-05, "loss": 0.2783, "step": 2138 }, { "epoch": 20.73734939759036, "grad_norm": 3.004434585571289, "learning_rate": 3.961650485436894e-05, "loss": 0.2786, "step": 2139 }, { "epoch": 20.746987951807228, "grad_norm": 8.777240753173828, "learning_rate": 3.9611650485436895e-05, "loss": 0.5859, "step": 2140 }, { "epoch": 20.756626506024098, "grad_norm": 3.960214853286743, "learning_rate": 3.960679611650486e-05, "loss": 0.4524, "step": 2141 }, { "epoch": 20.766265060240965, "grad_norm": 3.8443377017974854, "learning_rate": 3.960194174757282e-05, "loss": 0.2282, "step": 2142 }, { "epoch": 20.77590361445783, "grad_norm": 4.895867347717285, "learning_rate": 3.959708737864078e-05, "loss": 0.3021, "step": 2143 }, { "epoch": 20.7855421686747, "grad_norm": 5.475423812866211, "learning_rate": 3.959223300970874e-05, "loss": 0.2929, "step": 2144 }, { "epoch": 20.795180722891565, "grad_norm": 3.880821943283081, "learning_rate": 3.9587378640776704e-05, "loss": 0.2221, "step": 2145 }, { "epoch": 20.804819277108432, "grad_norm": 0.8090966939926147, "learning_rate": 3.958252427184466e-05, "loss": 0.3141, "step": 2146 }, { "epoch": 20.814457831325303, "grad_norm": 3.98041033744812, "learning_rate": 3.9577669902912626e-05, "loss": 0.2364, "step": 2147 }, { "epoch": 20.82409638554217, "grad_norm": 2.9270412921905518, "learning_rate": 3.9572815533980584e-05, "loss": 0.1961, "step": 2148 }, { "epoch": 20.833734939759037, "grad_norm": 3.2383408546447754, "learning_rate": 3.956796116504854e-05, "loss": 0.2283, "step": 2149 }, { "epoch": 20.843373493975903, "grad_norm": 5.736262798309326, "learning_rate": 3.9563106796116506e-05, "loss": 0.1914, "step": 2150 }, { "epoch": 20.85301204819277, "grad_norm": 11.252874374389648, "learning_rate": 3.9558252427184464e-05, "loss": 0.5092, "step": 2151 }, { "epoch": 20.862650602409637, "grad_norm": 7.809035301208496, "learning_rate": 3.955339805825243e-05, "loss": 0.198, "step": 2152 }, { "epoch": 20.872289156626508, "grad_norm": 6.191172122955322, "learning_rate": 3.9548543689320386e-05, "loss": 0.3931, "step": 2153 }, { "epoch": 20.881927710843375, "grad_norm": 4.054310321807861, "learning_rate": 3.954368932038835e-05, "loss": 0.3401, "step": 2154 }, { "epoch": 20.89156626506024, "grad_norm": 15.298322677612305, "learning_rate": 3.953883495145631e-05, "loss": 0.1519, "step": 2155 }, { "epoch": 20.90120481927711, "grad_norm": 8.899800300598145, "learning_rate": 3.953398058252427e-05, "loss": 0.3222, "step": 2156 }, { "epoch": 20.910843373493975, "grad_norm": 14.530540466308594, "learning_rate": 3.952912621359223e-05, "loss": 0.1593, "step": 2157 }, { "epoch": 20.920481927710842, "grad_norm": 3.5052218437194824, "learning_rate": 3.9524271844660196e-05, "loss": 0.5062, "step": 2158 }, { "epoch": 20.930120481927712, "grad_norm": 3.8012759685516357, "learning_rate": 3.951941747572816e-05, "loss": 0.377, "step": 2159 }, { "epoch": 20.93975903614458, "grad_norm": 16.55525779724121, "learning_rate": 3.951456310679612e-05, "loss": 0.4218, "step": 2160 }, { "epoch": 20.949397590361446, "grad_norm": 4.770415782928467, "learning_rate": 3.950970873786408e-05, "loss": 0.3113, "step": 2161 }, { "epoch": 20.959036144578313, "grad_norm": 2.8456168174743652, "learning_rate": 3.950485436893204e-05, "loss": 0.2793, "step": 2162 }, { "epoch": 20.96867469879518, "grad_norm": 4.699953079223633, "learning_rate": 3.9500000000000005e-05, "loss": 0.4441, "step": 2163 }, { "epoch": 20.978313253012047, "grad_norm": 2.0579798221588135, "learning_rate": 3.949514563106796e-05, "loss": 0.1865, "step": 2164 }, { "epoch": 20.987951807228917, "grad_norm": 3.2507848739624023, "learning_rate": 3.949029126213593e-05, "loss": 0.2676, "step": 2165 }, { "epoch": 21.003614457831326, "grad_norm": 4.990149021148682, "learning_rate": 3.9485436893203885e-05, "loss": 0.1798, "step": 2166 }, { "epoch": 21.013253012048192, "grad_norm": 2.7002031803131104, "learning_rate": 3.948058252427185e-05, "loss": 0.2183, "step": 2167 }, { "epoch": 21.02289156626506, "grad_norm": 2.4996097087860107, "learning_rate": 3.947572815533981e-05, "loss": 0.225, "step": 2168 }, { "epoch": 21.032530120481926, "grad_norm": 5.872982025146484, "learning_rate": 3.947087378640777e-05, "loss": 0.2372, "step": 2169 }, { "epoch": 21.042168674698797, "grad_norm": 2.3510053157806396, "learning_rate": 3.946601941747573e-05, "loss": 0.2029, "step": 2170 }, { "epoch": 21.051807228915663, "grad_norm": 3.6995856761932373, "learning_rate": 3.9461165048543694e-05, "loss": 0.2427, "step": 2171 }, { "epoch": 21.06144578313253, "grad_norm": 5.078645706176758, "learning_rate": 3.945631067961165e-05, "loss": 0.2699, "step": 2172 }, { "epoch": 21.071084337349397, "grad_norm": 2.2026727199554443, "learning_rate": 3.9451456310679616e-05, "loss": 0.121, "step": 2173 }, { "epoch": 21.080722891566264, "grad_norm": 7.071846008300781, "learning_rate": 3.9446601941747574e-05, "loss": 0.2732, "step": 2174 }, { "epoch": 21.09036144578313, "grad_norm": 8.607181549072266, "learning_rate": 3.944174757281553e-05, "loss": 0.2959, "step": 2175 }, { "epoch": 21.1, "grad_norm": 6.099915504455566, "learning_rate": 3.9436893203883496e-05, "loss": 0.3297, "step": 2176 }, { "epoch": 21.10963855421687, "grad_norm": 2.091548442840576, "learning_rate": 3.9432038834951454e-05, "loss": 0.1916, "step": 2177 }, { "epoch": 21.119277108433735, "grad_norm": 6.095359802246094, "learning_rate": 3.942718446601942e-05, "loss": 0.2707, "step": 2178 }, { "epoch": 21.128915662650602, "grad_norm": 16.463075637817383, "learning_rate": 3.9422330097087376e-05, "loss": 0.4225, "step": 2179 }, { "epoch": 21.13855421686747, "grad_norm": 2.518153667449951, "learning_rate": 3.941747572815534e-05, "loss": 0.1631, "step": 2180 }, { "epoch": 21.148192771084336, "grad_norm": 3.336942672729492, "learning_rate": 3.94126213592233e-05, "loss": 0.1362, "step": 2181 }, { "epoch": 21.157831325301206, "grad_norm": 2.211099624633789, "learning_rate": 3.940776699029126e-05, "loss": 0.2182, "step": 2182 }, { "epoch": 21.167469879518073, "grad_norm": 4.207514762878418, "learning_rate": 3.940291262135922e-05, "loss": 0.2704, "step": 2183 }, { "epoch": 21.17710843373494, "grad_norm": 4.975114822387695, "learning_rate": 3.939805825242719e-05, "loss": 0.1713, "step": 2184 }, { "epoch": 21.186746987951807, "grad_norm": 2.186971664428711, "learning_rate": 3.939320388349515e-05, "loss": 0.1378, "step": 2185 }, { "epoch": 21.196385542168674, "grad_norm": 3.7675232887268066, "learning_rate": 3.938834951456311e-05, "loss": 0.3183, "step": 2186 }, { "epoch": 21.20602409638554, "grad_norm": 3.5464303493499756, "learning_rate": 3.938349514563107e-05, "loss": 0.1249, "step": 2187 }, { "epoch": 21.21566265060241, "grad_norm": 7.145229339599609, "learning_rate": 3.937864077669903e-05, "loss": 0.1801, "step": 2188 }, { "epoch": 21.225301204819278, "grad_norm": 1.7543087005615234, "learning_rate": 3.9373786407766994e-05, "loss": 0.2234, "step": 2189 }, { "epoch": 21.234939759036145, "grad_norm": 2.647648334503174, "learning_rate": 3.936893203883495e-05, "loss": 0.1374, "step": 2190 }, { "epoch": 21.24457831325301, "grad_norm": 3.640817880630493, "learning_rate": 3.936407766990292e-05, "loss": 0.2005, "step": 2191 }, { "epoch": 21.25421686746988, "grad_norm": 2.17555570602417, "learning_rate": 3.9359223300970875e-05, "loss": 0.0809, "step": 2192 }, { "epoch": 21.263855421686745, "grad_norm": 1.7247601747512817, "learning_rate": 3.935436893203884e-05, "loss": 0.0721, "step": 2193 }, { "epoch": 21.273493975903616, "grad_norm": 2.7890264987945557, "learning_rate": 3.93495145631068e-05, "loss": 0.2647, "step": 2194 }, { "epoch": 21.283132530120483, "grad_norm": 2.806610584259033, "learning_rate": 3.934466019417476e-05, "loss": 0.2655, "step": 2195 }, { "epoch": 21.29277108433735, "grad_norm": 5.139245510101318, "learning_rate": 3.933980582524272e-05, "loss": 0.3103, "step": 2196 }, { "epoch": 21.302409638554217, "grad_norm": 3.152369976043701, "learning_rate": 3.9334951456310684e-05, "loss": 0.198, "step": 2197 }, { "epoch": 21.312048192771083, "grad_norm": 5.76950740814209, "learning_rate": 3.933009708737864e-05, "loss": 0.2846, "step": 2198 }, { "epoch": 21.32168674698795, "grad_norm": 3.760169267654419, "learning_rate": 3.9325242718446606e-05, "loss": 0.3511, "step": 2199 }, { "epoch": 21.33132530120482, "grad_norm": 3.518585681915283, "learning_rate": 3.9320388349514564e-05, "loss": 0.2226, "step": 2200 }, { "epoch": 21.340963855421688, "grad_norm": 13.557014465332031, "learning_rate": 3.931553398058253e-05, "loss": 0.3404, "step": 2201 }, { "epoch": 21.350602409638554, "grad_norm": 6.060368537902832, "learning_rate": 3.9310679611650486e-05, "loss": 0.3467, "step": 2202 }, { "epoch": 21.36024096385542, "grad_norm": 18.548254013061523, "learning_rate": 3.9305825242718444e-05, "loss": 0.3459, "step": 2203 }, { "epoch": 21.36987951807229, "grad_norm": 2.3879406452178955, "learning_rate": 3.930097087378641e-05, "loss": 0.2503, "step": 2204 }, { "epoch": 21.379518072289155, "grad_norm": 1.607288122177124, "learning_rate": 3.9296116504854366e-05, "loss": 0.1621, "step": 2205 }, { "epoch": 21.389156626506026, "grad_norm": 9.740619659423828, "learning_rate": 3.929126213592233e-05, "loss": 0.2524, "step": 2206 }, { "epoch": 21.398795180722892, "grad_norm": 4.771846294403076, "learning_rate": 3.928640776699029e-05, "loss": 0.366, "step": 2207 }, { "epoch": 21.40843373493976, "grad_norm": 3.4548187255859375, "learning_rate": 3.928155339805825e-05, "loss": 0.4991, "step": 2208 }, { "epoch": 21.418072289156626, "grad_norm": 6.4775710105896, "learning_rate": 3.927669902912622e-05, "loss": 0.1992, "step": 2209 }, { "epoch": 21.427710843373493, "grad_norm": 5.540178298950195, "learning_rate": 3.927184466019418e-05, "loss": 0.3384, "step": 2210 }, { "epoch": 21.43734939759036, "grad_norm": 2.726489305496216, "learning_rate": 3.926699029126214e-05, "loss": 0.2844, "step": 2211 }, { "epoch": 21.44698795180723, "grad_norm": 5.423849582672119, "learning_rate": 3.9262135922330104e-05, "loss": 0.5513, "step": 2212 }, { "epoch": 21.456626506024097, "grad_norm": 5.803206920623779, "learning_rate": 3.925728155339806e-05, "loss": 0.3674, "step": 2213 }, { "epoch": 21.466265060240964, "grad_norm": 3.9585211277008057, "learning_rate": 3.925242718446602e-05, "loss": 0.3012, "step": 2214 }, { "epoch": 21.47590361445783, "grad_norm": 9.262625694274902, "learning_rate": 3.9247572815533984e-05, "loss": 0.163, "step": 2215 }, { "epoch": 21.485542168674698, "grad_norm": 5.7092156410217285, "learning_rate": 3.924271844660194e-05, "loss": 0.2125, "step": 2216 }, { "epoch": 21.495180722891565, "grad_norm": 3.2350728511810303, "learning_rate": 3.9237864077669907e-05, "loss": 0.4586, "step": 2217 }, { "epoch": 21.504819277108435, "grad_norm": 1.6951336860656738, "learning_rate": 3.9233009708737864e-05, "loss": 0.0958, "step": 2218 }, { "epoch": 21.514457831325302, "grad_norm": 2.8131654262542725, "learning_rate": 3.922815533980583e-05, "loss": 0.1401, "step": 2219 }, { "epoch": 21.52409638554217, "grad_norm": 5.7948222160339355, "learning_rate": 3.9223300970873787e-05, "loss": 0.3345, "step": 2220 }, { "epoch": 21.533734939759036, "grad_norm": 1.9605226516723633, "learning_rate": 3.921844660194175e-05, "loss": 0.1681, "step": 2221 }, { "epoch": 21.543373493975903, "grad_norm": 22.060503005981445, "learning_rate": 3.921359223300971e-05, "loss": 0.3369, "step": 2222 }, { "epoch": 21.55301204819277, "grad_norm": 9.449338912963867, "learning_rate": 3.9208737864077673e-05, "loss": 0.1868, "step": 2223 }, { "epoch": 21.56265060240964, "grad_norm": 11.890176773071289, "learning_rate": 3.920388349514563e-05, "loss": 0.2154, "step": 2224 }, { "epoch": 21.572289156626507, "grad_norm": 3.740774631500244, "learning_rate": 3.9199029126213596e-05, "loss": 0.3291, "step": 2225 }, { "epoch": 21.581927710843374, "grad_norm": 13.25005054473877, "learning_rate": 3.9194174757281553e-05, "loss": 0.4985, "step": 2226 }, { "epoch": 21.59156626506024, "grad_norm": 6.572266101837158, "learning_rate": 3.918932038834952e-05, "loss": 0.3453, "step": 2227 }, { "epoch": 21.601204819277108, "grad_norm": 6.372016906738281, "learning_rate": 3.9184466019417476e-05, "loss": 0.3254, "step": 2228 }, { "epoch": 21.610843373493974, "grad_norm": 3.0571346282958984, "learning_rate": 3.917961165048544e-05, "loss": 0.3269, "step": 2229 }, { "epoch": 21.620481927710845, "grad_norm": 3.258713960647583, "learning_rate": 3.91747572815534e-05, "loss": 0.2046, "step": 2230 }, { "epoch": 21.63012048192771, "grad_norm": 5.268436908721924, "learning_rate": 3.9169902912621356e-05, "loss": 0.1382, "step": 2231 }, { "epoch": 21.63975903614458, "grad_norm": 8.701772689819336, "learning_rate": 3.916504854368932e-05, "loss": 0.1915, "step": 2232 }, { "epoch": 21.649397590361446, "grad_norm": 20.51740264892578, "learning_rate": 3.916019417475728e-05, "loss": 0.2911, "step": 2233 }, { "epoch": 21.659036144578312, "grad_norm": 2.342618703842163, "learning_rate": 3.915533980582524e-05, "loss": 0.2757, "step": 2234 }, { "epoch": 21.66867469879518, "grad_norm": 12.822086334228516, "learning_rate": 3.915048543689321e-05, "loss": 0.2018, "step": 2235 }, { "epoch": 21.67831325301205, "grad_norm": 8.249173164367676, "learning_rate": 3.914563106796117e-05, "loss": 0.3224, "step": 2236 }, { "epoch": 21.687951807228917, "grad_norm": 4.09420108795166, "learning_rate": 3.914077669902913e-05, "loss": 0.3291, "step": 2237 }, { "epoch": 21.697590361445783, "grad_norm": 7.902811050415039, "learning_rate": 3.9135922330097094e-05, "loss": 0.2594, "step": 2238 }, { "epoch": 21.70722891566265, "grad_norm": 2.7186214923858643, "learning_rate": 3.913106796116505e-05, "loss": 0.1206, "step": 2239 }, { "epoch": 21.716867469879517, "grad_norm": 17.22148895263672, "learning_rate": 3.9126213592233016e-05, "loss": 0.1644, "step": 2240 }, { "epoch": 21.726506024096384, "grad_norm": 5.668268203735352, "learning_rate": 3.9121359223300974e-05, "loss": 0.1946, "step": 2241 }, { "epoch": 21.736144578313255, "grad_norm": 3.047661542892456, "learning_rate": 3.911650485436893e-05, "loss": 0.3892, "step": 2242 }, { "epoch": 21.74578313253012, "grad_norm": 11.174880027770996, "learning_rate": 3.9111650485436896e-05, "loss": 0.3122, "step": 2243 }, { "epoch": 21.75542168674699, "grad_norm": 3.9185516834259033, "learning_rate": 3.9106796116504854e-05, "loss": 0.3118, "step": 2244 }, { "epoch": 21.765060240963855, "grad_norm": 2.429762363433838, "learning_rate": 3.910194174757282e-05, "loss": 0.2269, "step": 2245 }, { "epoch": 21.774698795180722, "grad_norm": 3.530231237411499, "learning_rate": 3.9097087378640776e-05, "loss": 0.1666, "step": 2246 }, { "epoch": 21.78433734939759, "grad_norm": 1.050613284111023, "learning_rate": 3.909223300970874e-05, "loss": 0.2306, "step": 2247 }, { "epoch": 21.79397590361446, "grad_norm": 1.6108115911483765, "learning_rate": 3.90873786407767e-05, "loss": 0.1688, "step": 2248 }, { "epoch": 21.803614457831326, "grad_norm": 7.973141670227051, "learning_rate": 3.908252427184466e-05, "loss": 0.547, "step": 2249 }, { "epoch": 21.813253012048193, "grad_norm": 11.612807273864746, "learning_rate": 3.907766990291262e-05, "loss": 0.1965, "step": 2250 }, { "epoch": 21.82289156626506, "grad_norm": 2.6522130966186523, "learning_rate": 3.9072815533980585e-05, "loss": 0.2786, "step": 2251 }, { "epoch": 21.832530120481927, "grad_norm": 2.026620626449585, "learning_rate": 3.906796116504854e-05, "loss": 0.3149, "step": 2252 }, { "epoch": 21.842168674698794, "grad_norm": 1.2024269104003906, "learning_rate": 3.906310679611651e-05, "loss": 0.1566, "step": 2253 }, { "epoch": 21.851807228915664, "grad_norm": 7.684022426605225, "learning_rate": 3.9058252427184466e-05, "loss": 0.3644, "step": 2254 }, { "epoch": 21.86144578313253, "grad_norm": 7.586416244506836, "learning_rate": 3.905339805825243e-05, "loss": 0.3874, "step": 2255 }, { "epoch": 21.871084337349398, "grad_norm": 3.915249824523926, "learning_rate": 3.904854368932039e-05, "loss": 0.2281, "step": 2256 }, { "epoch": 21.880722891566265, "grad_norm": 3.910767078399658, "learning_rate": 3.904368932038835e-05, "loss": 0.1095, "step": 2257 }, { "epoch": 21.89036144578313, "grad_norm": 4.756340980529785, "learning_rate": 3.903883495145631e-05, "loss": 0.2022, "step": 2258 }, { "epoch": 21.9, "grad_norm": 17.333078384399414, "learning_rate": 3.903398058252427e-05, "loss": 0.3886, "step": 2259 }, { "epoch": 21.90963855421687, "grad_norm": 2.640634775161743, "learning_rate": 3.902912621359224e-05, "loss": 0.2722, "step": 2260 }, { "epoch": 21.919277108433736, "grad_norm": 4.415349960327148, "learning_rate": 3.90242718446602e-05, "loss": 0.3188, "step": 2261 }, { "epoch": 21.928915662650603, "grad_norm": 9.3425874710083, "learning_rate": 3.901941747572816e-05, "loss": 0.1778, "step": 2262 }, { "epoch": 21.93855421686747, "grad_norm": 3.5145103931427, "learning_rate": 3.901456310679612e-05, "loss": 0.4415, "step": 2263 }, { "epoch": 21.948192771084337, "grad_norm": 2.1971302032470703, "learning_rate": 3.9009708737864084e-05, "loss": 0.1706, "step": 2264 }, { "epoch": 21.957831325301203, "grad_norm": 2.3199143409729004, "learning_rate": 3.900485436893204e-05, "loss": 0.1477, "step": 2265 }, { "epoch": 21.967469879518074, "grad_norm": 15.367673873901367, "learning_rate": 3.9000000000000006e-05, "loss": 0.2886, "step": 2266 }, { "epoch": 21.97710843373494, "grad_norm": 3.6696574687957764, "learning_rate": 3.8995145631067964e-05, "loss": 0.3285, "step": 2267 }, { "epoch": 21.986746987951808, "grad_norm": 10.735132217407227, "learning_rate": 3.899029126213593e-05, "loss": 0.2138, "step": 2268 }, { "epoch": 22.002409638554216, "grad_norm": 4.154650688171387, "learning_rate": 3.8985436893203886e-05, "loss": 0.3536, "step": 2269 }, { "epoch": 22.012048192771083, "grad_norm": 2.969834566116333, "learning_rate": 3.8980582524271844e-05, "loss": 0.14, "step": 2270 }, { "epoch": 22.021686746987953, "grad_norm": 10.007474899291992, "learning_rate": 3.897572815533981e-05, "loss": 0.332, "step": 2271 }, { "epoch": 22.03132530120482, "grad_norm": 6.753190517425537, "learning_rate": 3.8970873786407766e-05, "loss": 0.2613, "step": 2272 }, { "epoch": 22.040963855421687, "grad_norm": 6.09775447845459, "learning_rate": 3.896601941747573e-05, "loss": 0.3112, "step": 2273 }, { "epoch": 22.050602409638554, "grad_norm": 6.384354591369629, "learning_rate": 3.896116504854369e-05, "loss": 0.3552, "step": 2274 }, { "epoch": 22.06024096385542, "grad_norm": 5.994947910308838, "learning_rate": 3.895631067961165e-05, "loss": 0.3328, "step": 2275 }, { "epoch": 22.069879518072288, "grad_norm": 4.692542552947998, "learning_rate": 3.895145631067961e-05, "loss": 0.2885, "step": 2276 }, { "epoch": 22.079518072289158, "grad_norm": 4.775685787200928, "learning_rate": 3.8946601941747575e-05, "loss": 0.2882, "step": 2277 }, { "epoch": 22.089156626506025, "grad_norm": 11.78956127166748, "learning_rate": 3.894174757281553e-05, "loss": 0.1771, "step": 2278 }, { "epoch": 22.09879518072289, "grad_norm": 3.8051531314849854, "learning_rate": 3.89368932038835e-05, "loss": 0.3104, "step": 2279 }, { "epoch": 22.10843373493976, "grad_norm": 2.549452066421509, "learning_rate": 3.8932038834951455e-05, "loss": 0.1085, "step": 2280 }, { "epoch": 22.118072289156625, "grad_norm": 3.612877607345581, "learning_rate": 3.892718446601942e-05, "loss": 0.1498, "step": 2281 }, { "epoch": 22.127710843373492, "grad_norm": 1.2030766010284424, "learning_rate": 3.892233009708738e-05, "loss": 0.0611, "step": 2282 }, { "epoch": 22.137349397590363, "grad_norm": 4.82010555267334, "learning_rate": 3.891747572815534e-05, "loss": 0.2735, "step": 2283 }, { "epoch": 22.14698795180723, "grad_norm": 7.265773296356201, "learning_rate": 3.89126213592233e-05, "loss": 0.1293, "step": 2284 }, { "epoch": 22.156626506024097, "grad_norm": 23.39487075805664, "learning_rate": 3.8907766990291264e-05, "loss": 0.2298, "step": 2285 }, { "epoch": 22.166265060240963, "grad_norm": 10.117898941040039, "learning_rate": 3.890291262135923e-05, "loss": 0.2574, "step": 2286 }, { "epoch": 22.17590361445783, "grad_norm": 7.3862714767456055, "learning_rate": 3.889805825242719e-05, "loss": 0.2671, "step": 2287 }, { "epoch": 22.185542168674697, "grad_norm": 5.63006067276001, "learning_rate": 3.889320388349515e-05, "loss": 0.5558, "step": 2288 }, { "epoch": 22.195180722891568, "grad_norm": 3.179356575012207, "learning_rate": 3.888834951456311e-05, "loss": 0.2393, "step": 2289 }, { "epoch": 22.204819277108435, "grad_norm": 2.5431368350982666, "learning_rate": 3.8883495145631074e-05, "loss": 0.2771, "step": 2290 }, { "epoch": 22.2144578313253, "grad_norm": 1.6894309520721436, "learning_rate": 3.887864077669903e-05, "loss": 0.1534, "step": 2291 }, { "epoch": 22.22409638554217, "grad_norm": 4.225996494293213, "learning_rate": 3.8873786407766996e-05, "loss": 0.4482, "step": 2292 }, { "epoch": 22.233734939759035, "grad_norm": 3.8371620178222656, "learning_rate": 3.8868932038834954e-05, "loss": 0.3483, "step": 2293 }, { "epoch": 22.243373493975902, "grad_norm": 3.4460108280181885, "learning_rate": 3.886407766990292e-05, "loss": 0.3519, "step": 2294 }, { "epoch": 22.253012048192772, "grad_norm": 9.419513702392578, "learning_rate": 3.8859223300970876e-05, "loss": 0.0625, "step": 2295 }, { "epoch": 22.26265060240964, "grad_norm": 4.3416290283203125, "learning_rate": 3.885436893203884e-05, "loss": 0.3096, "step": 2296 }, { "epoch": 22.272289156626506, "grad_norm": 4.012824058532715, "learning_rate": 3.88495145631068e-05, "loss": 0.4078, "step": 2297 }, { "epoch": 22.281927710843373, "grad_norm": 3.2404637336730957, "learning_rate": 3.8844660194174756e-05, "loss": 0.2332, "step": 2298 }, { "epoch": 22.29156626506024, "grad_norm": 5.374022483825684, "learning_rate": 3.883980582524272e-05, "loss": 0.3091, "step": 2299 }, { "epoch": 22.301204819277107, "grad_norm": 7.300354957580566, "learning_rate": 3.883495145631068e-05, "loss": 0.1007, "step": 2300 }, { "epoch": 22.310843373493977, "grad_norm": 2.3574600219726562, "learning_rate": 3.883009708737864e-05, "loss": 0.2133, "step": 2301 }, { "epoch": 22.320481927710844, "grad_norm": 5.160988807678223, "learning_rate": 3.88252427184466e-05, "loss": 0.3592, "step": 2302 }, { "epoch": 22.33012048192771, "grad_norm": 3.9630441665649414, "learning_rate": 3.8820388349514565e-05, "loss": 0.5073, "step": 2303 }, { "epoch": 22.339759036144578, "grad_norm": 4.691128253936768, "learning_rate": 3.881553398058252e-05, "loss": 0.3672, "step": 2304 }, { "epoch": 22.349397590361445, "grad_norm": 3.165266752243042, "learning_rate": 3.881067961165049e-05, "loss": 0.142, "step": 2305 }, { "epoch": 22.35903614457831, "grad_norm": 15.038174629211426, "learning_rate": 3.8805825242718445e-05, "loss": 0.3178, "step": 2306 }, { "epoch": 22.368674698795182, "grad_norm": 2.9314773082733154, "learning_rate": 3.880097087378641e-05, "loss": 0.2496, "step": 2307 }, { "epoch": 22.37831325301205, "grad_norm": 2.594672679901123, "learning_rate": 3.879611650485437e-05, "loss": 0.1236, "step": 2308 }, { "epoch": 22.387951807228916, "grad_norm": 1.8214069604873657, "learning_rate": 3.879126213592233e-05, "loss": 0.2379, "step": 2309 }, { "epoch": 22.397590361445783, "grad_norm": 3.734361410140991, "learning_rate": 3.8786407766990296e-05, "loss": 0.3105, "step": 2310 }, { "epoch": 22.40722891566265, "grad_norm": 17.860321044921875, "learning_rate": 3.8781553398058254e-05, "loss": 0.3535, "step": 2311 }, { "epoch": 22.416867469879517, "grad_norm": 4.311437606811523, "learning_rate": 3.877669902912622e-05, "loss": 0.1956, "step": 2312 }, { "epoch": 22.426506024096387, "grad_norm": 2.7646400928497314, "learning_rate": 3.8771844660194177e-05, "loss": 0.2916, "step": 2313 }, { "epoch": 22.436144578313254, "grad_norm": 6.986391067504883, "learning_rate": 3.876699029126214e-05, "loss": 0.3571, "step": 2314 }, { "epoch": 22.44578313253012, "grad_norm": 4.351537227630615, "learning_rate": 3.87621359223301e-05, "loss": 0.2677, "step": 2315 }, { "epoch": 22.455421686746988, "grad_norm": 4.791439056396484, "learning_rate": 3.875728155339806e-05, "loss": 0.3225, "step": 2316 }, { "epoch": 22.465060240963854, "grad_norm": 1.778225302696228, "learning_rate": 3.875242718446602e-05, "loss": 0.1274, "step": 2317 }, { "epoch": 22.47469879518072, "grad_norm": 3.109461784362793, "learning_rate": 3.8747572815533986e-05, "loss": 0.174, "step": 2318 }, { "epoch": 22.48433734939759, "grad_norm": 5.107090950012207, "learning_rate": 3.8742718446601943e-05, "loss": 0.4377, "step": 2319 }, { "epoch": 22.49397590361446, "grad_norm": 7.060576438903809, "learning_rate": 3.873786407766991e-05, "loss": 0.2744, "step": 2320 }, { "epoch": 22.503614457831326, "grad_norm": 18.67443084716797, "learning_rate": 3.8733009708737866e-05, "loss": 0.4617, "step": 2321 }, { "epoch": 22.513253012048192, "grad_norm": 10.1093111038208, "learning_rate": 3.872815533980583e-05, "loss": 0.4886, "step": 2322 }, { "epoch": 22.52289156626506, "grad_norm": 6.754415512084961, "learning_rate": 3.872330097087379e-05, "loss": 0.2706, "step": 2323 }, { "epoch": 22.532530120481926, "grad_norm": 11.464617729187012, "learning_rate": 3.8718446601941746e-05, "loss": 0.6754, "step": 2324 }, { "epoch": 22.542168674698797, "grad_norm": 5.4028544425964355, "learning_rate": 3.871359223300971e-05, "loss": 0.2464, "step": 2325 }, { "epoch": 22.551807228915663, "grad_norm": 9.204083442687988, "learning_rate": 3.870873786407767e-05, "loss": 0.1836, "step": 2326 }, { "epoch": 22.56144578313253, "grad_norm": 2.566560983657837, "learning_rate": 3.870388349514563e-05, "loss": 0.3678, "step": 2327 }, { "epoch": 22.571084337349397, "grad_norm": 12.63319206237793, "learning_rate": 3.869902912621359e-05, "loss": 0.4208, "step": 2328 }, { "epoch": 22.580722891566264, "grad_norm": 4.334457874298096, "learning_rate": 3.8694174757281555e-05, "loss": 0.1085, "step": 2329 }, { "epoch": 22.59036144578313, "grad_norm": 5.12446928024292, "learning_rate": 3.868932038834951e-05, "loss": 0.1758, "step": 2330 }, { "epoch": 22.6, "grad_norm": 11.288468360900879, "learning_rate": 3.868446601941748e-05, "loss": 0.3416, "step": 2331 }, { "epoch": 22.60963855421687, "grad_norm": 7.844435691833496, "learning_rate": 3.8679611650485435e-05, "loss": 0.3261, "step": 2332 }, { "epoch": 22.619277108433735, "grad_norm": 12.532093048095703, "learning_rate": 3.86747572815534e-05, "loss": 0.4892, "step": 2333 }, { "epoch": 22.628915662650602, "grad_norm": 3.2760939598083496, "learning_rate": 3.866990291262136e-05, "loss": 0.2245, "step": 2334 }, { "epoch": 22.63855421686747, "grad_norm": 2.7820963859558105, "learning_rate": 3.866504854368932e-05, "loss": 0.2671, "step": 2335 }, { "epoch": 22.648192771084336, "grad_norm": 4.568284511566162, "learning_rate": 3.8660194174757286e-05, "loss": 0.157, "step": 2336 }, { "epoch": 22.657831325301206, "grad_norm": 2.7239432334899902, "learning_rate": 3.8655339805825244e-05, "loss": 0.1407, "step": 2337 }, { "epoch": 22.667469879518073, "grad_norm": 4.774937629699707, "learning_rate": 3.865048543689321e-05, "loss": 0.238, "step": 2338 }, { "epoch": 22.67710843373494, "grad_norm": 2.626519203186035, "learning_rate": 3.8645631067961166e-05, "loss": 0.2469, "step": 2339 }, { "epoch": 22.686746987951807, "grad_norm": 3.925638437271118, "learning_rate": 3.864077669902913e-05, "loss": 0.1997, "step": 2340 }, { "epoch": 22.696385542168674, "grad_norm": 3.4279966354370117, "learning_rate": 3.863592233009709e-05, "loss": 0.2617, "step": 2341 }, { "epoch": 22.70602409638554, "grad_norm": 12.438013076782227, "learning_rate": 3.863106796116505e-05, "loss": 0.2428, "step": 2342 }, { "epoch": 22.71566265060241, "grad_norm": 24.96173667907715, "learning_rate": 3.862621359223301e-05, "loss": 0.366, "step": 2343 }, { "epoch": 22.725301204819278, "grad_norm": 11.981257438659668, "learning_rate": 3.8621359223300975e-05, "loss": 0.2219, "step": 2344 }, { "epoch": 22.734939759036145, "grad_norm": 6.793211936950684, "learning_rate": 3.861650485436893e-05, "loss": 0.2269, "step": 2345 }, { "epoch": 22.74457831325301, "grad_norm": 5.406839847564697, "learning_rate": 3.86116504854369e-05, "loss": 0.3974, "step": 2346 }, { "epoch": 22.75421686746988, "grad_norm": 6.413244724273682, "learning_rate": 3.8606796116504855e-05, "loss": 0.3029, "step": 2347 }, { "epoch": 22.763855421686745, "grad_norm": 8.784568786621094, "learning_rate": 3.860194174757282e-05, "loss": 0.3981, "step": 2348 }, { "epoch": 22.773493975903616, "grad_norm": 8.29118824005127, "learning_rate": 3.859708737864078e-05, "loss": 0.263, "step": 2349 }, { "epoch": 22.783132530120483, "grad_norm": 3.996696710586548, "learning_rate": 3.859223300970874e-05, "loss": 0.5345, "step": 2350 }, { "epoch": 22.79277108433735, "grad_norm": 8.69136905670166, "learning_rate": 3.85873786407767e-05, "loss": 0.2316, "step": 2351 }, { "epoch": 22.802409638554217, "grad_norm": 5.1183648109436035, "learning_rate": 3.858252427184466e-05, "loss": 0.311, "step": 2352 }, { "epoch": 22.812048192771083, "grad_norm": 3.646343469619751, "learning_rate": 3.857766990291262e-05, "loss": 0.4788, "step": 2353 }, { "epoch": 22.82168674698795, "grad_norm": 4.078192710876465, "learning_rate": 3.857281553398058e-05, "loss": 0.2424, "step": 2354 }, { "epoch": 22.83132530120482, "grad_norm": 2.7819206714630127, "learning_rate": 3.8567961165048545e-05, "loss": 0.2063, "step": 2355 }, { "epoch": 22.840963855421688, "grad_norm": 3.427734613418579, "learning_rate": 3.85631067961165e-05, "loss": 0.2858, "step": 2356 }, { "epoch": 22.850602409638554, "grad_norm": 1.84398353099823, "learning_rate": 3.855825242718447e-05, "loss": 0.1595, "step": 2357 }, { "epoch": 22.86024096385542, "grad_norm": 13.918545722961426, "learning_rate": 3.8553398058252425e-05, "loss": 0.2263, "step": 2358 }, { "epoch": 22.86987951807229, "grad_norm": 3.994442939758301, "learning_rate": 3.854854368932039e-05, "loss": 0.298, "step": 2359 }, { "epoch": 22.879518072289155, "grad_norm": 7.043934345245361, "learning_rate": 3.854368932038835e-05, "loss": 0.2409, "step": 2360 }, { "epoch": 22.889156626506026, "grad_norm": 2.3974225521087646, "learning_rate": 3.853883495145632e-05, "loss": 0.1631, "step": 2361 }, { "epoch": 22.898795180722892, "grad_norm": 5.105374336242676, "learning_rate": 3.8533980582524276e-05, "loss": 0.234, "step": 2362 }, { "epoch": 22.90843373493976, "grad_norm": 4.560052394866943, "learning_rate": 3.8529126213592234e-05, "loss": 0.1603, "step": 2363 }, { "epoch": 22.918072289156626, "grad_norm": 8.734090805053711, "learning_rate": 3.85242718446602e-05, "loss": 0.4848, "step": 2364 }, { "epoch": 22.927710843373493, "grad_norm": 5.881598949432373, "learning_rate": 3.8519417475728156e-05, "loss": 0.1978, "step": 2365 }, { "epoch": 22.93734939759036, "grad_norm": 14.439814567565918, "learning_rate": 3.851456310679612e-05, "loss": 0.1406, "step": 2366 }, { "epoch": 22.94698795180723, "grad_norm": 8.575167655944824, "learning_rate": 3.850970873786408e-05, "loss": 0.3929, "step": 2367 }, { "epoch": 22.956626506024097, "grad_norm": 2.9813551902770996, "learning_rate": 3.850485436893204e-05, "loss": 0.2009, "step": 2368 }, { "epoch": 22.966265060240964, "grad_norm": 12.875367164611816, "learning_rate": 3.85e-05, "loss": 0.4501, "step": 2369 }, { "epoch": 22.97590361445783, "grad_norm": 6.693046569824219, "learning_rate": 3.8495145631067965e-05, "loss": 0.5321, "step": 2370 }, { "epoch": 22.985542168674698, "grad_norm": 3.109534978866577, "learning_rate": 3.849029126213592e-05, "loss": 0.2087, "step": 2371 }, { "epoch": 23.00120481927711, "grad_norm": 5.775432586669922, "learning_rate": 3.848543689320389e-05, "loss": 0.2504, "step": 2372 }, { "epoch": 23.010843373493977, "grad_norm": 2.6607494354248047, "learning_rate": 3.8480582524271845e-05, "loss": 0.172, "step": 2373 }, { "epoch": 23.020481927710843, "grad_norm": 4.2134108543396, "learning_rate": 3.847572815533981e-05, "loss": 0.2929, "step": 2374 }, { "epoch": 23.03012048192771, "grad_norm": 1.696047306060791, "learning_rate": 3.847087378640777e-05, "loss": 0.103, "step": 2375 }, { "epoch": 23.039759036144577, "grad_norm": 5.822893142700195, "learning_rate": 3.846601941747573e-05, "loss": 0.2963, "step": 2376 }, { "epoch": 23.049397590361444, "grad_norm": 9.364815711975098, "learning_rate": 3.846116504854369e-05, "loss": 0.4956, "step": 2377 }, { "epoch": 23.059036144578315, "grad_norm": 4.336942672729492, "learning_rate": 3.8456310679611654e-05, "loss": 0.2223, "step": 2378 }, { "epoch": 23.06867469879518, "grad_norm": 3.298128843307495, "learning_rate": 3.845145631067961e-05, "loss": 0.2301, "step": 2379 }, { "epoch": 23.07831325301205, "grad_norm": 4.389523506164551, "learning_rate": 3.844660194174757e-05, "loss": 0.3747, "step": 2380 }, { "epoch": 23.087951807228915, "grad_norm": 7.878269195556641, "learning_rate": 3.8441747572815534e-05, "loss": 0.3494, "step": 2381 }, { "epoch": 23.097590361445782, "grad_norm": 4.9378981590271, "learning_rate": 3.843689320388349e-05, "loss": 0.3041, "step": 2382 }, { "epoch": 23.10722891566265, "grad_norm": 4.716901779174805, "learning_rate": 3.843203883495146e-05, "loss": 0.209, "step": 2383 }, { "epoch": 23.11686746987952, "grad_norm": 4.3125901222229, "learning_rate": 3.8427184466019414e-05, "loss": 0.3395, "step": 2384 }, { "epoch": 23.126506024096386, "grad_norm": 6.602298259735107, "learning_rate": 3.842233009708738e-05, "loss": 0.2136, "step": 2385 }, { "epoch": 23.136144578313253, "grad_norm": 4.128505706787109, "learning_rate": 3.8417475728155344e-05, "loss": 0.4445, "step": 2386 }, { "epoch": 23.14578313253012, "grad_norm": 6.3684983253479, "learning_rate": 3.841262135922331e-05, "loss": 0.3707, "step": 2387 }, { "epoch": 23.155421686746987, "grad_norm": 3.8238325119018555, "learning_rate": 3.8407766990291266e-05, "loss": 0.2955, "step": 2388 }, { "epoch": 23.165060240963854, "grad_norm": 5.315969944000244, "learning_rate": 3.840291262135923e-05, "loss": 0.2804, "step": 2389 }, { "epoch": 23.174698795180724, "grad_norm": 5.646123886108398, "learning_rate": 3.839805825242719e-05, "loss": 0.2052, "step": 2390 }, { "epoch": 23.18433734939759, "grad_norm": 10.5449800491333, "learning_rate": 3.8393203883495146e-05, "loss": 0.3551, "step": 2391 }, { "epoch": 23.193975903614458, "grad_norm": 3.6209211349487305, "learning_rate": 3.838834951456311e-05, "loss": 0.1999, "step": 2392 }, { "epoch": 23.203614457831325, "grad_norm": 3.042139768600464, "learning_rate": 3.838349514563107e-05, "loss": 0.2041, "step": 2393 }, { "epoch": 23.21325301204819, "grad_norm": 1.2999087572097778, "learning_rate": 3.837864077669903e-05, "loss": 0.1073, "step": 2394 }, { "epoch": 23.22289156626506, "grad_norm": 6.228593826293945, "learning_rate": 3.837378640776699e-05, "loss": 0.3094, "step": 2395 }, { "epoch": 23.23253012048193, "grad_norm": 3.7357656955718994, "learning_rate": 3.8368932038834955e-05, "loss": 0.1274, "step": 2396 }, { "epoch": 23.242168674698796, "grad_norm": 4.695888519287109, "learning_rate": 3.836407766990291e-05, "loss": 0.3313, "step": 2397 }, { "epoch": 23.251807228915663, "grad_norm": 2.053152084350586, "learning_rate": 3.835922330097088e-05, "loss": 0.2786, "step": 2398 }, { "epoch": 23.26144578313253, "grad_norm": 3.5448169708251953, "learning_rate": 3.8354368932038835e-05, "loss": 0.332, "step": 2399 }, { "epoch": 23.271084337349397, "grad_norm": 4.056637287139893, "learning_rate": 3.83495145631068e-05, "loss": 0.2726, "step": 2400 }, { "epoch": 23.280722891566263, "grad_norm": 3.7877044677734375, "learning_rate": 3.834466019417476e-05, "loss": 0.3215, "step": 2401 }, { "epoch": 23.290361445783134, "grad_norm": 2.2834746837615967, "learning_rate": 3.833980582524272e-05, "loss": 0.1742, "step": 2402 }, { "epoch": 23.3, "grad_norm": 2.8632473945617676, "learning_rate": 3.833495145631068e-05, "loss": 0.2022, "step": 2403 }, { "epoch": 23.309638554216868, "grad_norm": 8.94943618774414, "learning_rate": 3.8330097087378644e-05, "loss": 0.3776, "step": 2404 }, { "epoch": 23.319277108433734, "grad_norm": 7.7521796226501465, "learning_rate": 3.83252427184466e-05, "loss": 0.689, "step": 2405 }, { "epoch": 23.3289156626506, "grad_norm": 4.115089416503906, "learning_rate": 3.8320388349514566e-05, "loss": 0.2878, "step": 2406 }, { "epoch": 23.33855421686747, "grad_norm": 7.347455024719238, "learning_rate": 3.8315533980582524e-05, "loss": 0.3249, "step": 2407 }, { "epoch": 23.34819277108434, "grad_norm": 6.266973495483398, "learning_rate": 3.831067961165048e-05, "loss": 0.6392, "step": 2408 }, { "epoch": 23.357831325301206, "grad_norm": 6.043604373931885, "learning_rate": 3.8305825242718447e-05, "loss": 0.4008, "step": 2409 }, { "epoch": 23.367469879518072, "grad_norm": 4.3651509284973145, "learning_rate": 3.8300970873786404e-05, "loss": 0.5787, "step": 2410 }, { "epoch": 23.37710843373494, "grad_norm": 5.54539155960083, "learning_rate": 3.8296116504854376e-05, "loss": 0.3577, "step": 2411 }, { "epoch": 23.386746987951806, "grad_norm": 9.31284236907959, "learning_rate": 3.829126213592233e-05, "loss": 0.409, "step": 2412 }, { "epoch": 23.396385542168673, "grad_norm": 5.302669525146484, "learning_rate": 3.82864077669903e-05, "loss": 0.202, "step": 2413 }, { "epoch": 23.406024096385543, "grad_norm": 9.006712913513184, "learning_rate": 3.8281553398058256e-05, "loss": 0.2448, "step": 2414 }, { "epoch": 23.41566265060241, "grad_norm": 3.047684669494629, "learning_rate": 3.827669902912622e-05, "loss": 0.2242, "step": 2415 }, { "epoch": 23.425301204819277, "grad_norm": 2.9254446029663086, "learning_rate": 3.827184466019418e-05, "loss": 0.3761, "step": 2416 }, { "epoch": 23.434939759036144, "grad_norm": 7.526650428771973, "learning_rate": 3.826699029126214e-05, "loss": 0.1962, "step": 2417 }, { "epoch": 23.44457831325301, "grad_norm": 2.140501022338867, "learning_rate": 3.82621359223301e-05, "loss": 0.2449, "step": 2418 }, { "epoch": 23.454216867469878, "grad_norm": 4.930917739868164, "learning_rate": 3.825728155339806e-05, "loss": 0.413, "step": 2419 }, { "epoch": 23.46385542168675, "grad_norm": 3.354710102081299, "learning_rate": 3.825242718446602e-05, "loss": 0.1818, "step": 2420 }, { "epoch": 23.473493975903615, "grad_norm": 3.469557046890259, "learning_rate": 3.824757281553398e-05, "loss": 0.2041, "step": 2421 }, { "epoch": 23.483132530120482, "grad_norm": 25.229381561279297, "learning_rate": 3.8242718446601945e-05, "loss": 0.3237, "step": 2422 }, { "epoch": 23.49277108433735, "grad_norm": 4.350718975067139, "learning_rate": 3.82378640776699e-05, "loss": 0.1696, "step": 2423 }, { "epoch": 23.502409638554216, "grad_norm": 2.7675893306732178, "learning_rate": 3.823300970873787e-05, "loss": 0.1227, "step": 2424 }, { "epoch": 23.512048192771083, "grad_norm": 1.6994929313659668, "learning_rate": 3.8228155339805825e-05, "loss": 0.1155, "step": 2425 }, { "epoch": 23.521686746987953, "grad_norm": 5.4549455642700195, "learning_rate": 3.822330097087379e-05, "loss": 0.2997, "step": 2426 }, { "epoch": 23.53132530120482, "grad_norm": 6.100864887237549, "learning_rate": 3.821844660194175e-05, "loss": 0.3864, "step": 2427 }, { "epoch": 23.540963855421687, "grad_norm": 4.115309238433838, "learning_rate": 3.821359223300971e-05, "loss": 0.2896, "step": 2428 }, { "epoch": 23.550602409638554, "grad_norm": 7.6408467292785645, "learning_rate": 3.820873786407767e-05, "loss": 0.3187, "step": 2429 }, { "epoch": 23.56024096385542, "grad_norm": 6.441904544830322, "learning_rate": 3.8203883495145634e-05, "loss": 0.3798, "step": 2430 }, { "epoch": 23.569879518072288, "grad_norm": 6.274235725402832, "learning_rate": 3.819902912621359e-05, "loss": 0.3443, "step": 2431 }, { "epoch": 23.579518072289158, "grad_norm": 7.031723499298096, "learning_rate": 3.8194174757281556e-05, "loss": 0.3394, "step": 2432 }, { "epoch": 23.589156626506025, "grad_norm": 1.3843544721603394, "learning_rate": 3.8189320388349514e-05, "loss": 0.1124, "step": 2433 }, { "epoch": 23.59879518072289, "grad_norm": 4.580493927001953, "learning_rate": 3.818446601941748e-05, "loss": 0.1709, "step": 2434 }, { "epoch": 23.60843373493976, "grad_norm": 6.9058308601379395, "learning_rate": 3.8179611650485436e-05, "loss": 0.5427, "step": 2435 }, { "epoch": 23.618072289156625, "grad_norm": 4.524303436279297, "learning_rate": 3.8174757281553394e-05, "loss": 0.213, "step": 2436 }, { "epoch": 23.627710843373492, "grad_norm": 3.906773805618286, "learning_rate": 3.8169902912621365e-05, "loss": 0.2148, "step": 2437 }, { "epoch": 23.637349397590363, "grad_norm": 5.033607482910156, "learning_rate": 3.816504854368932e-05, "loss": 0.2309, "step": 2438 }, { "epoch": 23.64698795180723, "grad_norm": 2.8065197467803955, "learning_rate": 3.816019417475729e-05, "loss": 0.1675, "step": 2439 }, { "epoch": 23.656626506024097, "grad_norm": 3.8033409118652344, "learning_rate": 3.8155339805825245e-05, "loss": 0.2187, "step": 2440 }, { "epoch": 23.666265060240963, "grad_norm": 16.80631446838379, "learning_rate": 3.815048543689321e-05, "loss": 0.4241, "step": 2441 }, { "epoch": 23.67590361445783, "grad_norm": 4.839042663574219, "learning_rate": 3.814563106796117e-05, "loss": 0.3292, "step": 2442 }, { "epoch": 23.685542168674697, "grad_norm": 20.06939125061035, "learning_rate": 3.814077669902913e-05, "loss": 0.4196, "step": 2443 }, { "epoch": 23.695180722891568, "grad_norm": 4.782577991485596, "learning_rate": 3.813592233009709e-05, "loss": 0.287, "step": 2444 }, { "epoch": 23.704819277108435, "grad_norm": 3.06108021736145, "learning_rate": 3.8131067961165055e-05, "loss": 0.1405, "step": 2445 }, { "epoch": 23.7144578313253, "grad_norm": 3.6338484287261963, "learning_rate": 3.812621359223301e-05, "loss": 0.2231, "step": 2446 }, { "epoch": 23.72409638554217, "grad_norm": 6.319159030914307, "learning_rate": 3.812135922330097e-05, "loss": 0.2367, "step": 2447 }, { "epoch": 23.733734939759035, "grad_norm": 4.615152835845947, "learning_rate": 3.8116504854368935e-05, "loss": 0.2046, "step": 2448 }, { "epoch": 23.743373493975902, "grad_norm": 2.5443856716156006, "learning_rate": 3.811165048543689e-05, "loss": 0.1232, "step": 2449 }, { "epoch": 23.753012048192772, "grad_norm": 2.7826781272888184, "learning_rate": 3.810679611650486e-05, "loss": 0.1441, "step": 2450 }, { "epoch": 23.76265060240964, "grad_norm": 4.977726936340332, "learning_rate": 3.8101941747572815e-05, "loss": 0.3732, "step": 2451 }, { "epoch": 23.772289156626506, "grad_norm": 6.055996417999268, "learning_rate": 3.809708737864078e-05, "loss": 0.3389, "step": 2452 }, { "epoch": 23.781927710843373, "grad_norm": 3.8086729049682617, "learning_rate": 3.809223300970874e-05, "loss": 0.2417, "step": 2453 }, { "epoch": 23.79156626506024, "grad_norm": 4.945036888122559, "learning_rate": 3.80873786407767e-05, "loss": 0.3737, "step": 2454 }, { "epoch": 23.801204819277107, "grad_norm": 4.233209133148193, "learning_rate": 3.808252427184466e-05, "loss": 0.235, "step": 2455 }, { "epoch": 23.810843373493977, "grad_norm": 5.641939163208008, "learning_rate": 3.8077669902912624e-05, "loss": 0.3992, "step": 2456 }, { "epoch": 23.820481927710844, "grad_norm": 6.221657752990723, "learning_rate": 3.807281553398058e-05, "loss": 0.2307, "step": 2457 }, { "epoch": 23.83012048192771, "grad_norm": 2.6011509895324707, "learning_rate": 3.8067961165048546e-05, "loss": 0.119, "step": 2458 }, { "epoch": 23.839759036144578, "grad_norm": 4.193288803100586, "learning_rate": 3.8063106796116504e-05, "loss": 0.2182, "step": 2459 }, { "epoch": 23.849397590361445, "grad_norm": 3.008150815963745, "learning_rate": 3.805825242718447e-05, "loss": 0.2296, "step": 2460 }, { "epoch": 23.85903614457831, "grad_norm": 2.326580762863159, "learning_rate": 3.8053398058252426e-05, "loss": 0.1789, "step": 2461 }, { "epoch": 23.868674698795182, "grad_norm": 3.4217746257781982, "learning_rate": 3.804854368932039e-05, "loss": 0.254, "step": 2462 }, { "epoch": 23.87831325301205, "grad_norm": 8.31168270111084, "learning_rate": 3.8043689320388355e-05, "loss": 0.1904, "step": 2463 }, { "epoch": 23.887951807228916, "grad_norm": 13.787246704101562, "learning_rate": 3.803883495145631e-05, "loss": 0.4665, "step": 2464 }, { "epoch": 23.897590361445783, "grad_norm": 3.298628568649292, "learning_rate": 3.803398058252428e-05, "loss": 0.1383, "step": 2465 }, { "epoch": 23.90722891566265, "grad_norm": 3.0005030632019043, "learning_rate": 3.8029126213592235e-05, "loss": 0.2763, "step": 2466 }, { "epoch": 23.916867469879517, "grad_norm": 2.5018203258514404, "learning_rate": 3.80242718446602e-05, "loss": 0.2589, "step": 2467 }, { "epoch": 23.926506024096387, "grad_norm": 3.564764976501465, "learning_rate": 3.801941747572816e-05, "loss": 0.2527, "step": 2468 }, { "epoch": 23.936144578313254, "grad_norm": 7.06553316116333, "learning_rate": 3.801456310679612e-05, "loss": 0.2716, "step": 2469 }, { "epoch": 23.94578313253012, "grad_norm": 2.37127685546875, "learning_rate": 3.800970873786408e-05, "loss": 0.1251, "step": 2470 }, { "epoch": 23.955421686746988, "grad_norm": 3.793468713760376, "learning_rate": 3.8004854368932044e-05, "loss": 0.2858, "step": 2471 }, { "epoch": 23.965060240963854, "grad_norm": 3.6041526794433594, "learning_rate": 3.8e-05, "loss": 0.234, "step": 2472 }, { "epoch": 23.97469879518072, "grad_norm": 5.858757972717285, "learning_rate": 3.799514563106796e-05, "loss": 0.3504, "step": 2473 }, { "epoch": 23.98433734939759, "grad_norm": 3.166447877883911, "learning_rate": 3.7990291262135924e-05, "loss": 0.2618, "step": 2474 }, { "epoch": 23.99397590361446, "grad_norm": 5.357047080993652, "learning_rate": 3.798543689320388e-05, "loss": 0.3965, "step": 2475 }, { "epoch": 24.009638554216867, "grad_norm": 6.144484519958496, "learning_rate": 3.798058252427185e-05, "loss": 0.1791, "step": 2476 }, { "epoch": 24.019277108433734, "grad_norm": 4.7440361976623535, "learning_rate": 3.7975728155339804e-05, "loss": 0.3436, "step": 2477 }, { "epoch": 24.0289156626506, "grad_norm": 4.873615264892578, "learning_rate": 3.797087378640777e-05, "loss": 0.3226, "step": 2478 }, { "epoch": 24.03855421686747, "grad_norm": 3.8327927589416504, "learning_rate": 3.796601941747573e-05, "loss": 0.3654, "step": 2479 }, { "epoch": 24.048192771084338, "grad_norm": 3.189448595046997, "learning_rate": 3.796116504854369e-05, "loss": 0.1745, "step": 2480 }, { "epoch": 24.057831325301205, "grad_norm": 6.555073261260986, "learning_rate": 3.795631067961165e-05, "loss": 0.2628, "step": 2481 }, { "epoch": 24.06746987951807, "grad_norm": 3.2469067573547363, "learning_rate": 3.7951456310679614e-05, "loss": 0.2213, "step": 2482 }, { "epoch": 24.07710843373494, "grad_norm": 2.271636486053467, "learning_rate": 3.794660194174757e-05, "loss": 0.117, "step": 2483 }, { "epoch": 24.086746987951805, "grad_norm": 5.815107345581055, "learning_rate": 3.7941747572815536e-05, "loss": 0.2064, "step": 2484 }, { "epoch": 24.096385542168676, "grad_norm": 7.84411096572876, "learning_rate": 3.7936893203883494e-05, "loss": 0.4305, "step": 2485 }, { "epoch": 24.106024096385543, "grad_norm": 4.575732231140137, "learning_rate": 3.793203883495146e-05, "loss": 0.3638, "step": 2486 }, { "epoch": 24.11566265060241, "grad_norm": 3.9312193393707275, "learning_rate": 3.792718446601942e-05, "loss": 0.2867, "step": 2487 }, { "epoch": 24.125301204819277, "grad_norm": 4.467780590057373, "learning_rate": 3.792233009708738e-05, "loss": 0.3177, "step": 2488 }, { "epoch": 24.134939759036143, "grad_norm": 3.4974868297576904, "learning_rate": 3.7917475728155345e-05, "loss": 0.1548, "step": 2489 }, { "epoch": 24.14457831325301, "grad_norm": 6.250709056854248, "learning_rate": 3.79126213592233e-05, "loss": 0.4506, "step": 2490 }, { "epoch": 24.15421686746988, "grad_norm": 9.129837989807129, "learning_rate": 3.790776699029127e-05, "loss": 0.4516, "step": 2491 }, { "epoch": 24.163855421686748, "grad_norm": 6.275645732879639, "learning_rate": 3.7902912621359225e-05, "loss": 0.3614, "step": 2492 }, { "epoch": 24.173493975903614, "grad_norm": 3.539855480194092, "learning_rate": 3.789805825242719e-05, "loss": 0.352, "step": 2493 }, { "epoch": 24.18313253012048, "grad_norm": 3.586169958114624, "learning_rate": 3.789320388349515e-05, "loss": 0.265, "step": 2494 }, { "epoch": 24.19277108433735, "grad_norm": 1.9867808818817139, "learning_rate": 3.788834951456311e-05, "loss": 0.1605, "step": 2495 }, { "epoch": 24.202409638554215, "grad_norm": 4.106800079345703, "learning_rate": 3.788349514563107e-05, "loss": 0.2187, "step": 2496 }, { "epoch": 24.212048192771086, "grad_norm": 3.445962429046631, "learning_rate": 3.7878640776699034e-05, "loss": 0.432, "step": 2497 }, { "epoch": 24.221686746987952, "grad_norm": 4.787572860717773, "learning_rate": 3.787378640776699e-05, "loss": 0.2844, "step": 2498 }, { "epoch": 24.23132530120482, "grad_norm": 5.048244953155518, "learning_rate": 3.7868932038834956e-05, "loss": 0.3833, "step": 2499 }, { "epoch": 24.240963855421686, "grad_norm": 5.105795860290527, "learning_rate": 3.7864077669902914e-05, "loss": 0.5343, "step": 2500 }, { "epoch": 24.250602409638553, "grad_norm": 3.0558462142944336, "learning_rate": 3.785922330097087e-05, "loss": 0.132, "step": 2501 }, { "epoch": 24.26024096385542, "grad_norm": 14.995365142822266, "learning_rate": 3.7854368932038836e-05, "loss": 0.3309, "step": 2502 }, { "epoch": 24.26987951807229, "grad_norm": 6.287730693817139, "learning_rate": 3.7849514563106794e-05, "loss": 0.3685, "step": 2503 }, { "epoch": 24.279518072289157, "grad_norm": 4.629750728607178, "learning_rate": 3.784466019417476e-05, "loss": 0.2391, "step": 2504 }, { "epoch": 24.289156626506024, "grad_norm": 7.77407169342041, "learning_rate": 3.7839805825242717e-05, "loss": 0.2892, "step": 2505 }, { "epoch": 24.29879518072289, "grad_norm": 3.512712240219116, "learning_rate": 3.783495145631068e-05, "loss": 0.1748, "step": 2506 }, { "epoch": 24.308433734939758, "grad_norm": 6.601154327392578, "learning_rate": 3.783009708737864e-05, "loss": 0.2675, "step": 2507 }, { "epoch": 24.318072289156625, "grad_norm": 3.2437336444854736, "learning_rate": 3.78252427184466e-05, "loss": 0.3292, "step": 2508 }, { "epoch": 24.327710843373495, "grad_norm": 4.716838359832764, "learning_rate": 3.782038834951456e-05, "loss": 0.3164, "step": 2509 }, { "epoch": 24.337349397590362, "grad_norm": 4.395771503448486, "learning_rate": 3.7815533980582526e-05, "loss": 0.2151, "step": 2510 }, { "epoch": 24.34698795180723, "grad_norm": 6.1648054122924805, "learning_rate": 3.781067961165048e-05, "loss": 0.2262, "step": 2511 }, { "epoch": 24.356626506024096, "grad_norm": 5.872390270233154, "learning_rate": 3.780582524271845e-05, "loss": 0.3449, "step": 2512 }, { "epoch": 24.366265060240963, "grad_norm": 4.591111183166504, "learning_rate": 3.780097087378641e-05, "loss": 0.3557, "step": 2513 }, { "epoch": 24.37590361445783, "grad_norm": 2.9134840965270996, "learning_rate": 3.779611650485437e-05, "loss": 0.3362, "step": 2514 }, { "epoch": 24.3855421686747, "grad_norm": 5.859024524688721, "learning_rate": 3.7791262135922335e-05, "loss": 0.2548, "step": 2515 }, { "epoch": 24.395180722891567, "grad_norm": 13.805564880371094, "learning_rate": 3.778640776699029e-05, "loss": 0.1282, "step": 2516 }, { "epoch": 24.404819277108434, "grad_norm": 4.7751665115356445, "learning_rate": 3.778155339805826e-05, "loss": 0.4336, "step": 2517 }, { "epoch": 24.4144578313253, "grad_norm": 6.090742588043213, "learning_rate": 3.7776699029126215e-05, "loss": 0.3514, "step": 2518 }, { "epoch": 24.424096385542168, "grad_norm": 8.2051420211792, "learning_rate": 3.777184466019418e-05, "loss": 0.3473, "step": 2519 }, { "epoch": 24.433734939759034, "grad_norm": 2.143308639526367, "learning_rate": 3.776699029126214e-05, "loss": 0.1198, "step": 2520 }, { "epoch": 24.443373493975905, "grad_norm": 3.8810336589813232, "learning_rate": 3.77621359223301e-05, "loss": 0.2404, "step": 2521 }, { "epoch": 24.45301204819277, "grad_norm": 3.4042649269104004, "learning_rate": 3.775728155339806e-05, "loss": 0.2204, "step": 2522 }, { "epoch": 24.46265060240964, "grad_norm": 5.479767799377441, "learning_rate": 3.7752427184466024e-05, "loss": 0.3517, "step": 2523 }, { "epoch": 24.472289156626506, "grad_norm": 5.464761257171631, "learning_rate": 3.774757281553398e-05, "loss": 0.4424, "step": 2524 }, { "epoch": 24.481927710843372, "grad_norm": 5.503174781799316, "learning_rate": 3.7742718446601946e-05, "loss": 0.2326, "step": 2525 }, { "epoch": 24.49156626506024, "grad_norm": 4.358739376068115, "learning_rate": 3.7737864077669904e-05, "loss": 0.509, "step": 2526 }, { "epoch": 24.50120481927711, "grad_norm": 3.3904480934143066, "learning_rate": 3.773300970873787e-05, "loss": 0.2232, "step": 2527 }, { "epoch": 24.510843373493977, "grad_norm": 4.935648441314697, "learning_rate": 3.7728155339805826e-05, "loss": 0.2629, "step": 2528 }, { "epoch": 24.520481927710843, "grad_norm": 5.709972858428955, "learning_rate": 3.7723300970873784e-05, "loss": 0.4116, "step": 2529 }, { "epoch": 24.53012048192771, "grad_norm": 6.60905647277832, "learning_rate": 3.771844660194175e-05, "loss": 0.2909, "step": 2530 }, { "epoch": 24.539759036144577, "grad_norm": 3.7947916984558105, "learning_rate": 3.7713592233009706e-05, "loss": 0.3801, "step": 2531 }, { "epoch": 24.549397590361444, "grad_norm": 3.7329068183898926, "learning_rate": 3.770873786407767e-05, "loss": 0.1566, "step": 2532 }, { "epoch": 24.559036144578315, "grad_norm": 3.238612651824951, "learning_rate": 3.770388349514563e-05, "loss": 0.2308, "step": 2533 }, { "epoch": 24.56867469879518, "grad_norm": 2.085468292236328, "learning_rate": 3.769902912621359e-05, "loss": 0.3132, "step": 2534 }, { "epoch": 24.57831325301205, "grad_norm": 7.430493354797363, "learning_rate": 3.769417475728155e-05, "loss": 0.2147, "step": 2535 }, { "epoch": 24.587951807228915, "grad_norm": 4.293447971343994, "learning_rate": 3.7689320388349515e-05, "loss": 0.3196, "step": 2536 }, { "epoch": 24.597590361445782, "grad_norm": 4.452263832092285, "learning_rate": 3.768446601941747e-05, "loss": 0.3606, "step": 2537 }, { "epoch": 24.60722891566265, "grad_norm": 1.6295751333236694, "learning_rate": 3.7679611650485444e-05, "loss": 0.1132, "step": 2538 }, { "epoch": 24.61686746987952, "grad_norm": 6.4030561447143555, "learning_rate": 3.76747572815534e-05, "loss": 0.183, "step": 2539 }, { "epoch": 24.626506024096386, "grad_norm": 5.674808979034424, "learning_rate": 3.766990291262136e-05, "loss": 0.3009, "step": 2540 }, { "epoch": 24.636144578313253, "grad_norm": 6.854308605194092, "learning_rate": 3.7665048543689325e-05, "loss": 0.3246, "step": 2541 }, { "epoch": 24.64578313253012, "grad_norm": 4.074446678161621, "learning_rate": 3.766019417475728e-05, "loss": 0.2625, "step": 2542 }, { "epoch": 24.655421686746987, "grad_norm": 4.225254535675049, "learning_rate": 3.765533980582525e-05, "loss": 0.1896, "step": 2543 }, { "epoch": 24.665060240963854, "grad_norm": 5.293919086456299, "learning_rate": 3.7650485436893205e-05, "loss": 0.3465, "step": 2544 }, { "epoch": 24.674698795180724, "grad_norm": 4.93405818939209, "learning_rate": 3.764563106796117e-05, "loss": 0.1504, "step": 2545 }, { "epoch": 24.68433734939759, "grad_norm": 2.969148874282837, "learning_rate": 3.764077669902913e-05, "loss": 0.3083, "step": 2546 }, { "epoch": 24.693975903614458, "grad_norm": 7.475980281829834, "learning_rate": 3.763592233009709e-05, "loss": 0.2542, "step": 2547 }, { "epoch": 24.703614457831325, "grad_norm": 4.7799153327941895, "learning_rate": 3.763106796116505e-05, "loss": 0.2307, "step": 2548 }, { "epoch": 24.71325301204819, "grad_norm": 5.574760913848877, "learning_rate": 3.7626213592233014e-05, "loss": 0.5317, "step": 2549 }, { "epoch": 24.72289156626506, "grad_norm": 4.213448524475098, "learning_rate": 3.762135922330097e-05, "loss": 0.3024, "step": 2550 }, { "epoch": 24.73253012048193, "grad_norm": 3.572582483291626, "learning_rate": 3.7616504854368936e-05, "loss": 0.3336, "step": 2551 }, { "epoch": 24.742168674698796, "grad_norm": 3.6164114475250244, "learning_rate": 3.7611650485436894e-05, "loss": 0.2695, "step": 2552 }, { "epoch": 24.751807228915663, "grad_norm": 6.0416083335876465, "learning_rate": 3.760679611650486e-05, "loss": 0.2448, "step": 2553 }, { "epoch": 24.76144578313253, "grad_norm": 5.567793846130371, "learning_rate": 3.7601941747572816e-05, "loss": 0.3686, "step": 2554 }, { "epoch": 24.771084337349397, "grad_norm": 3.383084774017334, "learning_rate": 3.759708737864078e-05, "loss": 0.1761, "step": 2555 }, { "epoch": 24.780722891566263, "grad_norm": 3.992485761642456, "learning_rate": 3.759223300970874e-05, "loss": 0.1873, "step": 2556 }, { "epoch": 24.790361445783134, "grad_norm": 3.314966917037964, "learning_rate": 3.7587378640776696e-05, "loss": 0.1088, "step": 2557 }, { "epoch": 24.8, "grad_norm": 2.5784568786621094, "learning_rate": 3.758252427184466e-05, "loss": 0.2793, "step": 2558 }, { "epoch": 24.809638554216868, "grad_norm": 3.6699211597442627, "learning_rate": 3.757766990291262e-05, "loss": 0.2686, "step": 2559 }, { "epoch": 24.819277108433734, "grad_norm": 3.846933603286743, "learning_rate": 3.757281553398058e-05, "loss": 0.3318, "step": 2560 }, { "epoch": 24.8289156626506, "grad_norm": 6.37354040145874, "learning_rate": 3.756796116504854e-05, "loss": 0.3928, "step": 2561 }, { "epoch": 24.83855421686747, "grad_norm": 3.481639862060547, "learning_rate": 3.7563106796116505e-05, "loss": 0.2796, "step": 2562 }, { "epoch": 24.84819277108434, "grad_norm": 5.676112651824951, "learning_rate": 3.755825242718447e-05, "loss": 0.3334, "step": 2563 }, { "epoch": 24.857831325301206, "grad_norm": 7.251621246337891, "learning_rate": 3.7553398058252434e-05, "loss": 0.4325, "step": 2564 }, { "epoch": 24.867469879518072, "grad_norm": 5.617877006530762, "learning_rate": 3.754854368932039e-05, "loss": 0.2975, "step": 2565 }, { "epoch": 24.87710843373494, "grad_norm": 2.9651601314544678, "learning_rate": 3.7543689320388357e-05, "loss": 0.2735, "step": 2566 }, { "epoch": 24.886746987951806, "grad_norm": 2.513188362121582, "learning_rate": 3.7538834951456314e-05, "loss": 0.1492, "step": 2567 }, { "epoch": 24.896385542168673, "grad_norm": 4.0259528160095215, "learning_rate": 3.753398058252427e-05, "loss": 0.256, "step": 2568 }, { "epoch": 24.906024096385543, "grad_norm": 5.407732009887695, "learning_rate": 3.752912621359224e-05, "loss": 0.2906, "step": 2569 }, { "epoch": 24.91566265060241, "grad_norm": 4.900196075439453, "learning_rate": 3.7524271844660194e-05, "loss": 0.5247, "step": 2570 }, { "epoch": 24.925301204819277, "grad_norm": 4.24161958694458, "learning_rate": 3.751941747572816e-05, "loss": 0.2888, "step": 2571 }, { "epoch": 24.934939759036144, "grad_norm": 5.751067161560059, "learning_rate": 3.751456310679612e-05, "loss": 0.4, "step": 2572 }, { "epoch": 24.94457831325301, "grad_norm": 3.306333541870117, "learning_rate": 3.750970873786408e-05, "loss": 0.1406, "step": 2573 }, { "epoch": 24.954216867469878, "grad_norm": 17.062759399414062, "learning_rate": 3.750485436893204e-05, "loss": 0.3118, "step": 2574 }, { "epoch": 24.96385542168675, "grad_norm": 1.773095726966858, "learning_rate": 3.7500000000000003e-05, "loss": 0.1209, "step": 2575 }, { "epoch": 24.973493975903615, "grad_norm": 4.996022701263428, "learning_rate": 3.749514563106796e-05, "loss": 0.1528, "step": 2576 }, { "epoch": 24.983132530120482, "grad_norm": 4.184274196624756, "learning_rate": 3.7490291262135926e-05, "loss": 0.2173, "step": 2577 }, { "epoch": 24.99277108433735, "grad_norm": 4.541049003601074, "learning_rate": 3.7485436893203884e-05, "loss": 0.3937, "step": 2578 }, { "epoch": 25.00843373493976, "grad_norm": 3.12406325340271, "learning_rate": 3.748058252427185e-05, "loss": 0.1551, "step": 2579 }, { "epoch": 25.018072289156628, "grad_norm": 2.200871706008911, "learning_rate": 3.7475728155339806e-05, "loss": 0.2216, "step": 2580 }, { "epoch": 25.027710843373494, "grad_norm": 12.986577987670898, "learning_rate": 3.747087378640777e-05, "loss": 0.392, "step": 2581 }, { "epoch": 25.03734939759036, "grad_norm": 3.731802225112915, "learning_rate": 3.746601941747573e-05, "loss": 0.3707, "step": 2582 }, { "epoch": 25.04698795180723, "grad_norm": 17.880821228027344, "learning_rate": 3.746116504854369e-05, "loss": 0.2518, "step": 2583 }, { "epoch": 25.056626506024095, "grad_norm": 1.680676817893982, "learning_rate": 3.745631067961165e-05, "loss": 0.1626, "step": 2584 }, { "epoch": 25.066265060240966, "grad_norm": 1.234863519668579, "learning_rate": 3.745145631067961e-05, "loss": 0.2179, "step": 2585 }, { "epoch": 25.075903614457832, "grad_norm": 9.961991310119629, "learning_rate": 3.744660194174757e-05, "loss": 0.2471, "step": 2586 }, { "epoch": 25.0855421686747, "grad_norm": 3.6140851974487305, "learning_rate": 3.744174757281553e-05, "loss": 0.1895, "step": 2587 }, { "epoch": 25.095180722891566, "grad_norm": 3.8405673503875732, "learning_rate": 3.74368932038835e-05, "loss": 0.3416, "step": 2588 }, { "epoch": 25.104819277108433, "grad_norm": 5.974029064178467, "learning_rate": 3.743203883495146e-05, "loss": 0.2021, "step": 2589 }, { "epoch": 25.1144578313253, "grad_norm": 12.414642333984375, "learning_rate": 3.7427184466019424e-05, "loss": 0.3072, "step": 2590 }, { "epoch": 25.12409638554217, "grad_norm": 9.754329681396484, "learning_rate": 3.742233009708738e-05, "loss": 0.3228, "step": 2591 }, { "epoch": 25.133734939759037, "grad_norm": 7.227195739746094, "learning_rate": 3.7417475728155346e-05, "loss": 0.3527, "step": 2592 }, { "epoch": 25.143373493975904, "grad_norm": 5.213927745819092, "learning_rate": 3.7412621359223304e-05, "loss": 0.2627, "step": 2593 }, { "epoch": 25.15301204819277, "grad_norm": 11.565166473388672, "learning_rate": 3.740776699029127e-05, "loss": 0.2239, "step": 2594 }, { "epoch": 25.162650602409638, "grad_norm": 2.2868118286132812, "learning_rate": 3.7402912621359226e-05, "loss": 0.3513, "step": 2595 }, { "epoch": 25.172289156626505, "grad_norm": 21.02579116821289, "learning_rate": 3.7398058252427184e-05, "loss": 0.3509, "step": 2596 }, { "epoch": 25.181927710843375, "grad_norm": 5.028548717498779, "learning_rate": 3.739320388349515e-05, "loss": 0.1196, "step": 2597 }, { "epoch": 25.191566265060242, "grad_norm": 12.056272506713867, "learning_rate": 3.7388349514563106e-05, "loss": 0.2793, "step": 2598 }, { "epoch": 25.20120481927711, "grad_norm": 3.264629602432251, "learning_rate": 3.738349514563107e-05, "loss": 0.1369, "step": 2599 }, { "epoch": 25.210843373493976, "grad_norm": 5.196573257446289, "learning_rate": 3.737864077669903e-05, "loss": 0.1849, "step": 2600 }, { "epoch": 25.220481927710843, "grad_norm": 3.1677122116088867, "learning_rate": 3.737378640776699e-05, "loss": 0.1372, "step": 2601 }, { "epoch": 25.23012048192771, "grad_norm": 1.2213706970214844, "learning_rate": 3.736893203883495e-05, "loss": 0.2118, "step": 2602 }, { "epoch": 25.23975903614458, "grad_norm": 12.503268241882324, "learning_rate": 3.7364077669902916e-05, "loss": 0.4458, "step": 2603 }, { "epoch": 25.249397590361447, "grad_norm": 13.227378845214844, "learning_rate": 3.735922330097087e-05, "loss": 0.3236, "step": 2604 }, { "epoch": 25.259036144578314, "grad_norm": 10.354339599609375, "learning_rate": 3.735436893203884e-05, "loss": 0.2501, "step": 2605 }, { "epoch": 25.26867469879518, "grad_norm": 17.56131362915039, "learning_rate": 3.7349514563106796e-05, "loss": 0.4958, "step": 2606 }, { "epoch": 25.278313253012048, "grad_norm": 7.920300006866455, "learning_rate": 3.734466019417476e-05, "loss": 0.2263, "step": 2607 }, { "epoch": 25.287951807228914, "grad_norm": 13.739277839660645, "learning_rate": 3.733980582524272e-05, "loss": 0.1624, "step": 2608 }, { "epoch": 25.297590361445785, "grad_norm": 0.6673651933670044, "learning_rate": 3.733495145631068e-05, "loss": 0.2985, "step": 2609 }, { "epoch": 25.30722891566265, "grad_norm": 2.970564126968384, "learning_rate": 3.733009708737864e-05, "loss": 0.0768, "step": 2610 }, { "epoch": 25.31686746987952, "grad_norm": 10.723261833190918, "learning_rate": 3.7325242718446605e-05, "loss": 0.1688, "step": 2611 }, { "epoch": 25.326506024096386, "grad_norm": 3.4338881969451904, "learning_rate": 3.732038834951456e-05, "loss": 0.2127, "step": 2612 }, { "epoch": 25.336144578313252, "grad_norm": 1.4715875387191772, "learning_rate": 3.731553398058253e-05, "loss": 0.264, "step": 2613 }, { "epoch": 25.34578313253012, "grad_norm": 1.3643782138824463, "learning_rate": 3.731067961165049e-05, "loss": 0.1668, "step": 2614 }, { "epoch": 25.355421686746986, "grad_norm": 38.05350875854492, "learning_rate": 3.730582524271845e-05, "loss": 0.3273, "step": 2615 }, { "epoch": 25.365060240963857, "grad_norm": 1.5849403142929077, "learning_rate": 3.7300970873786414e-05, "loss": 0.1931, "step": 2616 }, { "epoch": 25.374698795180723, "grad_norm": 8.523408889770508, "learning_rate": 3.729611650485437e-05, "loss": 0.2284, "step": 2617 }, { "epoch": 25.38433734939759, "grad_norm": 11.197234153747559, "learning_rate": 3.7291262135922336e-05, "loss": 0.3243, "step": 2618 }, { "epoch": 25.393975903614457, "grad_norm": 3.6288743019104004, "learning_rate": 3.7286407766990294e-05, "loss": 0.3132, "step": 2619 }, { "epoch": 25.403614457831324, "grad_norm": 24.94884490966797, "learning_rate": 3.728155339805826e-05, "loss": 0.3466, "step": 2620 }, { "epoch": 25.413253012048195, "grad_norm": 9.910782814025879, "learning_rate": 3.7276699029126216e-05, "loss": 0.2825, "step": 2621 }, { "epoch": 25.42289156626506, "grad_norm": 4.146345138549805, "learning_rate": 3.7271844660194174e-05, "loss": 0.3094, "step": 2622 }, { "epoch": 25.43253012048193, "grad_norm": 9.359118461608887, "learning_rate": 3.726699029126214e-05, "loss": 0.2294, "step": 2623 }, { "epoch": 25.442168674698795, "grad_norm": 17.03042984008789, "learning_rate": 3.7262135922330096e-05, "loss": 0.2169, "step": 2624 }, { "epoch": 25.451807228915662, "grad_norm": 4.548567771911621, "learning_rate": 3.725728155339806e-05, "loss": 0.2536, "step": 2625 }, { "epoch": 25.46144578313253, "grad_norm": 9.25506591796875, "learning_rate": 3.725242718446602e-05, "loss": 0.3754, "step": 2626 }, { "epoch": 25.471084337349396, "grad_norm": 14.146265983581543, "learning_rate": 3.724757281553398e-05, "loss": 0.3768, "step": 2627 }, { "epoch": 25.480722891566266, "grad_norm": 17.527748107910156, "learning_rate": 3.724271844660194e-05, "loss": 0.3604, "step": 2628 }, { "epoch": 25.490361445783133, "grad_norm": 19.26581573486328, "learning_rate": 3.7237864077669905e-05, "loss": 0.3493, "step": 2629 }, { "epoch": 25.5, "grad_norm": 9.153851509094238, "learning_rate": 3.723300970873786e-05, "loss": 0.2683, "step": 2630 }, { "epoch": 25.509638554216867, "grad_norm": 2.6848931312561035, "learning_rate": 3.722815533980583e-05, "loss": 0.2449, "step": 2631 }, { "epoch": 25.519277108433734, "grad_norm": 18.401586532592773, "learning_rate": 3.7223300970873785e-05, "loss": 0.1954, "step": 2632 }, { "epoch": 25.528915662650604, "grad_norm": 14.949087142944336, "learning_rate": 3.721844660194175e-05, "loss": 0.403, "step": 2633 }, { "epoch": 25.53855421686747, "grad_norm": 6.443897724151611, "learning_rate": 3.721359223300971e-05, "loss": 0.2651, "step": 2634 }, { "epoch": 25.548192771084338, "grad_norm": 26.85508155822754, "learning_rate": 3.720873786407767e-05, "loss": 0.2493, "step": 2635 }, { "epoch": 25.557831325301205, "grad_norm": 13.14754867553711, "learning_rate": 3.720388349514563e-05, "loss": 0.3937, "step": 2636 }, { "epoch": 25.56746987951807, "grad_norm": 4.715914249420166, "learning_rate": 3.7199029126213595e-05, "loss": 0.1997, "step": 2637 }, { "epoch": 25.57710843373494, "grad_norm": 5.066986560821533, "learning_rate": 3.719417475728155e-05, "loss": 0.174, "step": 2638 }, { "epoch": 25.586746987951805, "grad_norm": 8.191783905029297, "learning_rate": 3.718932038834952e-05, "loss": 0.3136, "step": 2639 }, { "epoch": 25.596385542168676, "grad_norm": 24.32651138305664, "learning_rate": 3.718446601941748e-05, "loss": 0.312, "step": 2640 }, { "epoch": 25.606024096385543, "grad_norm": 8.060317039489746, "learning_rate": 3.717961165048544e-05, "loss": 0.4367, "step": 2641 }, { "epoch": 25.61566265060241, "grad_norm": 4.384703159332275, "learning_rate": 3.7174757281553404e-05, "loss": 0.3618, "step": 2642 }, { "epoch": 25.625301204819277, "grad_norm": 9.573484420776367, "learning_rate": 3.716990291262136e-05, "loss": 0.3639, "step": 2643 }, { "epoch": 25.634939759036143, "grad_norm": 8.835271835327148, "learning_rate": 3.7165048543689326e-05, "loss": 0.5002, "step": 2644 }, { "epoch": 25.644578313253014, "grad_norm": 7.5700812339782715, "learning_rate": 3.7160194174757284e-05, "loss": 0.1527, "step": 2645 }, { "epoch": 25.65421686746988, "grad_norm": 14.76187801361084, "learning_rate": 3.715533980582525e-05, "loss": 0.3497, "step": 2646 }, { "epoch": 25.663855421686748, "grad_norm": 1.9688602685928345, "learning_rate": 3.7150485436893206e-05, "loss": 0.1221, "step": 2647 }, { "epoch": 25.673493975903614, "grad_norm": 6.522762298583984, "learning_rate": 3.714563106796117e-05, "loss": 0.282, "step": 2648 }, { "epoch": 25.68313253012048, "grad_norm": 13.538207054138184, "learning_rate": 3.714077669902913e-05, "loss": 0.2482, "step": 2649 }, { "epoch": 25.69277108433735, "grad_norm": 5.590204238891602, "learning_rate": 3.7135922330097086e-05, "loss": 0.3102, "step": 2650 }, { "epoch": 25.702409638554215, "grad_norm": 17.046777725219727, "learning_rate": 3.713106796116505e-05, "loss": 0.2656, "step": 2651 }, { "epoch": 25.712048192771086, "grad_norm": 3.732842445373535, "learning_rate": 3.712621359223301e-05, "loss": 0.1207, "step": 2652 }, { "epoch": 25.721686746987952, "grad_norm": 3.3667590618133545, "learning_rate": 3.712135922330097e-05, "loss": 0.2403, "step": 2653 }, { "epoch": 25.73132530120482, "grad_norm": 17.013669967651367, "learning_rate": 3.711650485436893e-05, "loss": 0.1899, "step": 2654 }, { "epoch": 25.740963855421686, "grad_norm": 10.288735389709473, "learning_rate": 3.7111650485436895e-05, "loss": 0.1655, "step": 2655 }, { "epoch": 25.750602409638553, "grad_norm": 1.9907119274139404, "learning_rate": 3.710679611650485e-05, "loss": 0.2869, "step": 2656 }, { "epoch": 25.760240963855424, "grad_norm": 9.768619537353516, "learning_rate": 3.710194174757282e-05, "loss": 0.2334, "step": 2657 }, { "epoch": 25.76987951807229, "grad_norm": 21.724594116210938, "learning_rate": 3.7097087378640775e-05, "loss": 0.2391, "step": 2658 }, { "epoch": 25.779518072289157, "grad_norm": 6.806708812713623, "learning_rate": 3.709223300970874e-05, "loss": 0.418, "step": 2659 }, { "epoch": 25.789156626506024, "grad_norm": 7.231996059417725, "learning_rate": 3.70873786407767e-05, "loss": 0.0973, "step": 2660 }, { "epoch": 25.79879518072289, "grad_norm": 6.821699142456055, "learning_rate": 3.708252427184466e-05, "loss": 0.317, "step": 2661 }, { "epoch": 25.808433734939758, "grad_norm": 5.30513858795166, "learning_rate": 3.707766990291262e-05, "loss": 0.3956, "step": 2662 }, { "epoch": 25.818072289156625, "grad_norm": 12.228585243225098, "learning_rate": 3.7072815533980584e-05, "loss": 0.2884, "step": 2663 }, { "epoch": 25.827710843373495, "grad_norm": 5.229503631591797, "learning_rate": 3.706796116504855e-05, "loss": 0.3302, "step": 2664 }, { "epoch": 25.837349397590362, "grad_norm": 5.521374702453613, "learning_rate": 3.7063106796116507e-05, "loss": 0.3087, "step": 2665 }, { "epoch": 25.84698795180723, "grad_norm": 16.03432273864746, "learning_rate": 3.705825242718447e-05, "loss": 0.1696, "step": 2666 }, { "epoch": 25.856626506024096, "grad_norm": 3.288855791091919, "learning_rate": 3.705339805825243e-05, "loss": 0.1509, "step": 2667 }, { "epoch": 25.866265060240963, "grad_norm": 18.550024032592773, "learning_rate": 3.7048543689320393e-05, "loss": 0.1625, "step": 2668 }, { "epoch": 25.87590361445783, "grad_norm": 2.113718271255493, "learning_rate": 3.704368932038835e-05, "loss": 0.1556, "step": 2669 }, { "epoch": 25.8855421686747, "grad_norm": 7.662268161773682, "learning_rate": 3.7038834951456316e-05, "loss": 0.1565, "step": 2670 }, { "epoch": 25.895180722891567, "grad_norm": 8.794183731079102, "learning_rate": 3.7033980582524273e-05, "loss": 0.1731, "step": 2671 }, { "epoch": 25.904819277108434, "grad_norm": 9.62356185913086, "learning_rate": 3.702912621359224e-05, "loss": 0.4591, "step": 2672 }, { "epoch": 25.9144578313253, "grad_norm": 3.717597007751465, "learning_rate": 3.7024271844660196e-05, "loss": 0.3629, "step": 2673 }, { "epoch": 25.924096385542168, "grad_norm": 9.988271713256836, "learning_rate": 3.701941747572816e-05, "loss": 0.1825, "step": 2674 }, { "epoch": 25.933734939759034, "grad_norm": 11.792613983154297, "learning_rate": 3.701456310679612e-05, "loss": 0.4314, "step": 2675 }, { "epoch": 25.943373493975905, "grad_norm": 7.868515968322754, "learning_rate": 3.700970873786408e-05, "loss": 0.3892, "step": 2676 }, { "epoch": 25.95301204819277, "grad_norm": 3.724980592727661, "learning_rate": 3.700485436893204e-05, "loss": 0.2249, "step": 2677 }, { "epoch": 25.96265060240964, "grad_norm": 4.552977085113525, "learning_rate": 3.7e-05, "loss": 0.3296, "step": 2678 }, { "epoch": 25.972289156626506, "grad_norm": 1.4427621364593506, "learning_rate": 3.699514563106796e-05, "loss": 0.1927, "step": 2679 }, { "epoch": 25.981927710843372, "grad_norm": 10.198486328125, "learning_rate": 3.699029126213592e-05, "loss": 0.1686, "step": 2680 }, { "epoch": 25.99156626506024, "grad_norm": 9.672926902770996, "learning_rate": 3.6985436893203885e-05, "loss": 0.1578, "step": 2681 }, { "epoch": 26.00722891566265, "grad_norm": 4.737537384033203, "learning_rate": 3.698058252427184e-05, "loss": 0.3755, "step": 2682 }, { "epoch": 26.016867469879518, "grad_norm": 2.219717502593994, "learning_rate": 3.697572815533981e-05, "loss": 0.431, "step": 2683 }, { "epoch": 26.026506024096385, "grad_norm": 8.171062469482422, "learning_rate": 3.6970873786407765e-05, "loss": 0.3784, "step": 2684 }, { "epoch": 26.03614457831325, "grad_norm": 4.787692546844482, "learning_rate": 3.696601941747573e-05, "loss": 0.1605, "step": 2685 }, { "epoch": 26.045783132530122, "grad_norm": 3.560943365097046, "learning_rate": 3.696116504854369e-05, "loss": 0.3644, "step": 2686 }, { "epoch": 26.05542168674699, "grad_norm": 15.69239616394043, "learning_rate": 3.695631067961165e-05, "loss": 0.4694, "step": 2687 }, { "epoch": 26.065060240963856, "grad_norm": 2.6847002506256104, "learning_rate": 3.695145631067961e-05, "loss": 0.2643, "step": 2688 }, { "epoch": 26.074698795180723, "grad_norm": 4.0911545753479, "learning_rate": 3.6946601941747574e-05, "loss": 0.1162, "step": 2689 }, { "epoch": 26.08433734939759, "grad_norm": 14.973780632019043, "learning_rate": 3.694174757281554e-05, "loss": 0.2753, "step": 2690 }, { "epoch": 26.093975903614457, "grad_norm": 16.73080825805664, "learning_rate": 3.6936893203883496e-05, "loss": 0.2781, "step": 2691 }, { "epoch": 26.103614457831327, "grad_norm": 8.234735488891602, "learning_rate": 3.693203883495146e-05, "loss": 0.3658, "step": 2692 }, { "epoch": 26.113253012048194, "grad_norm": 4.532176971435547, "learning_rate": 3.692718446601942e-05, "loss": 0.2546, "step": 2693 }, { "epoch": 26.12289156626506, "grad_norm": 10.60953426361084, "learning_rate": 3.692233009708738e-05, "loss": 0.3231, "step": 2694 }, { "epoch": 26.132530120481928, "grad_norm": 3.0414938926696777, "learning_rate": 3.691747572815534e-05, "loss": 0.2524, "step": 2695 }, { "epoch": 26.142168674698794, "grad_norm": 16.684789657592773, "learning_rate": 3.6912621359223306e-05, "loss": 0.2543, "step": 2696 }, { "epoch": 26.15180722891566, "grad_norm": 5.834177494049072, "learning_rate": 3.690776699029126e-05, "loss": 0.3032, "step": 2697 }, { "epoch": 26.16144578313253, "grad_norm": 3.7372498512268066, "learning_rate": 3.690291262135923e-05, "loss": 0.2425, "step": 2698 }, { "epoch": 26.1710843373494, "grad_norm": 2.627988576889038, "learning_rate": 3.6898058252427186e-05, "loss": 0.3778, "step": 2699 }, { "epoch": 26.180722891566266, "grad_norm": 6.169162273406982, "learning_rate": 3.689320388349515e-05, "loss": 0.313, "step": 2700 }, { "epoch": 26.190361445783132, "grad_norm": 2.1655144691467285, "learning_rate": 3.688834951456311e-05, "loss": 0.156, "step": 2701 }, { "epoch": 26.2, "grad_norm": 18.440074920654297, "learning_rate": 3.688349514563107e-05, "loss": 0.43, "step": 2702 }, { "epoch": 26.209638554216866, "grad_norm": 4.519237518310547, "learning_rate": 3.687864077669903e-05, "loss": 0.3831, "step": 2703 }, { "epoch": 26.219277108433737, "grad_norm": 3.3307719230651855, "learning_rate": 3.6873786407766995e-05, "loss": 0.1775, "step": 2704 }, { "epoch": 26.228915662650603, "grad_norm": 13.969783782958984, "learning_rate": 3.686893203883495e-05, "loss": 0.1833, "step": 2705 }, { "epoch": 26.23855421686747, "grad_norm": 2.7968881130218506, "learning_rate": 3.686407766990291e-05, "loss": 0.3019, "step": 2706 }, { "epoch": 26.248192771084337, "grad_norm": 12.753390312194824, "learning_rate": 3.6859223300970875e-05, "loss": 0.4043, "step": 2707 }, { "epoch": 26.257831325301204, "grad_norm": 4.492683410644531, "learning_rate": 3.685436893203883e-05, "loss": 0.4177, "step": 2708 }, { "epoch": 26.26746987951807, "grad_norm": 4.790268421173096, "learning_rate": 3.68495145631068e-05, "loss": 0.2851, "step": 2709 }, { "epoch": 26.27710843373494, "grad_norm": 2.803922653198242, "learning_rate": 3.6844660194174755e-05, "loss": 0.2789, "step": 2710 }, { "epoch": 26.28674698795181, "grad_norm": 5.027953147888184, "learning_rate": 3.683980582524272e-05, "loss": 0.2009, "step": 2711 }, { "epoch": 26.296385542168675, "grad_norm": 19.430831909179688, "learning_rate": 3.683495145631068e-05, "loss": 0.322, "step": 2712 }, { "epoch": 26.306024096385542, "grad_norm": 4.018276691436768, "learning_rate": 3.683009708737864e-05, "loss": 0.1809, "step": 2713 }, { "epoch": 26.31566265060241, "grad_norm": 7.549533367156982, "learning_rate": 3.6825242718446606e-05, "loss": 0.2049, "step": 2714 }, { "epoch": 26.325301204819276, "grad_norm": 4.588858127593994, "learning_rate": 3.682038834951457e-05, "loss": 0.2534, "step": 2715 }, { "epoch": 26.334939759036146, "grad_norm": 2.7092559337615967, "learning_rate": 3.681553398058253e-05, "loss": 0.3026, "step": 2716 }, { "epoch": 26.344578313253013, "grad_norm": 2.4663891792297363, "learning_rate": 3.6810679611650486e-05, "loss": 0.1293, "step": 2717 }, { "epoch": 26.35421686746988, "grad_norm": 35.31010055541992, "learning_rate": 3.680582524271845e-05, "loss": 0.2923, "step": 2718 }, { "epoch": 26.363855421686747, "grad_norm": 24.306472778320312, "learning_rate": 3.680097087378641e-05, "loss": 0.4362, "step": 2719 }, { "epoch": 26.373493975903614, "grad_norm": 12.520792961120605, "learning_rate": 3.679611650485437e-05, "loss": 0.2203, "step": 2720 }, { "epoch": 26.38313253012048, "grad_norm": 9.756479263305664, "learning_rate": 3.679126213592233e-05, "loss": 0.2649, "step": 2721 }, { "epoch": 26.39277108433735, "grad_norm": 2.0646767616271973, "learning_rate": 3.6786407766990295e-05, "loss": 0.1691, "step": 2722 }, { "epoch": 26.402409638554218, "grad_norm": 25.553312301635742, "learning_rate": 3.678155339805825e-05, "loss": 0.2106, "step": 2723 }, { "epoch": 26.412048192771085, "grad_norm": 12.691906929016113, "learning_rate": 3.677669902912622e-05, "loss": 0.2854, "step": 2724 }, { "epoch": 26.42168674698795, "grad_norm": 15.383675575256348, "learning_rate": 3.6771844660194175e-05, "loss": 0.2895, "step": 2725 }, { "epoch": 26.43132530120482, "grad_norm": 3.4396841526031494, "learning_rate": 3.676699029126214e-05, "loss": 0.169, "step": 2726 }, { "epoch": 26.440963855421685, "grad_norm": 8.135239601135254, "learning_rate": 3.67621359223301e-05, "loss": 0.2405, "step": 2727 }, { "epoch": 26.450602409638556, "grad_norm": 2.268479347229004, "learning_rate": 3.675728155339806e-05, "loss": 0.1813, "step": 2728 }, { "epoch": 26.460240963855423, "grad_norm": 2.954416036605835, "learning_rate": 3.675242718446602e-05, "loss": 0.2553, "step": 2729 }, { "epoch": 26.46987951807229, "grad_norm": 4.361794948577881, "learning_rate": 3.6747572815533984e-05, "loss": 0.2044, "step": 2730 }, { "epoch": 26.479518072289157, "grad_norm": 3.7606568336486816, "learning_rate": 3.674271844660194e-05, "loss": 0.435, "step": 2731 }, { "epoch": 26.489156626506023, "grad_norm": 10.978249549865723, "learning_rate": 3.673786407766991e-05, "loss": 0.5028, "step": 2732 }, { "epoch": 26.49879518072289, "grad_norm": 5.937941551208496, "learning_rate": 3.6733009708737865e-05, "loss": 0.1942, "step": 2733 }, { "epoch": 26.50843373493976, "grad_norm": 7.013629913330078, "learning_rate": 3.672815533980582e-05, "loss": 0.204, "step": 2734 }, { "epoch": 26.518072289156628, "grad_norm": 2.6794819831848145, "learning_rate": 3.672330097087379e-05, "loss": 0.251, "step": 2735 }, { "epoch": 26.527710843373494, "grad_norm": 7.240952968597412, "learning_rate": 3.6718446601941745e-05, "loss": 0.3916, "step": 2736 }, { "epoch": 26.53734939759036, "grad_norm": 8.719618797302246, "learning_rate": 3.671359223300971e-05, "loss": 0.1557, "step": 2737 }, { "epoch": 26.54698795180723, "grad_norm": 2.088534116744995, "learning_rate": 3.670873786407767e-05, "loss": 0.3344, "step": 2738 }, { "epoch": 26.556626506024095, "grad_norm": 5.185559272766113, "learning_rate": 3.670388349514563e-05, "loss": 0.1339, "step": 2739 }, { "epoch": 26.566265060240966, "grad_norm": 0.9085701107978821, "learning_rate": 3.6699029126213596e-05, "loss": 0.1288, "step": 2740 }, { "epoch": 26.575903614457832, "grad_norm": 8.040753364562988, "learning_rate": 3.669417475728156e-05, "loss": 0.1983, "step": 2741 }, { "epoch": 26.5855421686747, "grad_norm": 8.424850463867188, "learning_rate": 3.668932038834952e-05, "loss": 0.2819, "step": 2742 }, { "epoch": 26.595180722891566, "grad_norm": 9.846990585327148, "learning_rate": 3.668446601941748e-05, "loss": 0.3036, "step": 2743 }, { "epoch": 26.604819277108433, "grad_norm": 1.4922882318496704, "learning_rate": 3.667961165048544e-05, "loss": 0.1768, "step": 2744 }, { "epoch": 26.6144578313253, "grad_norm": 8.199638366699219, "learning_rate": 3.66747572815534e-05, "loss": 0.5015, "step": 2745 }, { "epoch": 26.62409638554217, "grad_norm": 21.354660034179688, "learning_rate": 3.666990291262136e-05, "loss": 0.334, "step": 2746 }, { "epoch": 26.633734939759037, "grad_norm": 2.588620662689209, "learning_rate": 3.666504854368932e-05, "loss": 0.2711, "step": 2747 }, { "epoch": 26.643373493975904, "grad_norm": 5.086578369140625, "learning_rate": 3.6660194174757285e-05, "loss": 0.33, "step": 2748 }, { "epoch": 26.65301204819277, "grad_norm": 2.9290666580200195, "learning_rate": 3.665533980582524e-05, "loss": 0.1955, "step": 2749 }, { "epoch": 26.662650602409638, "grad_norm": 5.170743465423584, "learning_rate": 3.665048543689321e-05, "loss": 0.3372, "step": 2750 }, { "epoch": 26.672289156626505, "grad_norm": 5.185875415802002, "learning_rate": 3.6645631067961165e-05, "loss": 0.3472, "step": 2751 }, { "epoch": 26.681927710843375, "grad_norm": 10.672532081604004, "learning_rate": 3.664077669902913e-05, "loss": 0.2198, "step": 2752 }, { "epoch": 26.691566265060242, "grad_norm": 2.541860818862915, "learning_rate": 3.663592233009709e-05, "loss": 0.2291, "step": 2753 }, { "epoch": 26.70120481927711, "grad_norm": 15.345331192016602, "learning_rate": 3.663106796116505e-05, "loss": 0.3933, "step": 2754 }, { "epoch": 26.710843373493976, "grad_norm": 11.527290344238281, "learning_rate": 3.662621359223301e-05, "loss": 0.4086, "step": 2755 }, { "epoch": 26.720481927710843, "grad_norm": 15.943427085876465, "learning_rate": 3.6621359223300974e-05, "loss": 0.5659, "step": 2756 }, { "epoch": 26.73012048192771, "grad_norm": 3.2393083572387695, "learning_rate": 3.661650485436893e-05, "loss": 0.2613, "step": 2757 }, { "epoch": 26.739759036144576, "grad_norm": 6.000941753387451, "learning_rate": 3.6611650485436897e-05, "loss": 0.6075, "step": 2758 }, { "epoch": 26.749397590361447, "grad_norm": 15.471744537353516, "learning_rate": 3.6606796116504854e-05, "loss": 0.3452, "step": 2759 }, { "epoch": 26.759036144578314, "grad_norm": 13.838976860046387, "learning_rate": 3.660194174757282e-05, "loss": 0.3462, "step": 2760 }, { "epoch": 26.76867469879518, "grad_norm": 1.4248788356781006, "learning_rate": 3.6597087378640777e-05, "loss": 0.2511, "step": 2761 }, { "epoch": 26.778313253012048, "grad_norm": 10.098450660705566, "learning_rate": 3.6592233009708734e-05, "loss": 0.2553, "step": 2762 }, { "epoch": 26.787951807228914, "grad_norm": 13.849137306213379, "learning_rate": 3.65873786407767e-05, "loss": 0.3554, "step": 2763 }, { "epoch": 26.797590361445785, "grad_norm": 7.370362758636475, "learning_rate": 3.658252427184466e-05, "loss": 0.1509, "step": 2764 }, { "epoch": 26.80722891566265, "grad_norm": 9.126312255859375, "learning_rate": 3.657766990291263e-05, "loss": 0.3642, "step": 2765 }, { "epoch": 26.81686746987952, "grad_norm": 2.07540225982666, "learning_rate": 3.6572815533980586e-05, "loss": 0.3462, "step": 2766 }, { "epoch": 26.826506024096386, "grad_norm": 5.524561882019043, "learning_rate": 3.656796116504855e-05, "loss": 0.3618, "step": 2767 }, { "epoch": 26.836144578313252, "grad_norm": 4.5300374031066895, "learning_rate": 3.656310679611651e-05, "loss": 0.3823, "step": 2768 }, { "epoch": 26.84578313253012, "grad_norm": 1.92404305934906, "learning_rate": 3.655825242718447e-05, "loss": 0.1935, "step": 2769 }, { "epoch": 26.855421686746986, "grad_norm": 9.87675666809082, "learning_rate": 3.655339805825243e-05, "loss": 0.4158, "step": 2770 }, { "epoch": 26.865060240963857, "grad_norm": 3.9949350357055664, "learning_rate": 3.654854368932039e-05, "loss": 0.2491, "step": 2771 }, { "epoch": 26.874698795180723, "grad_norm": 8.495071411132812, "learning_rate": 3.654368932038835e-05, "loss": 0.2476, "step": 2772 }, { "epoch": 26.88433734939759, "grad_norm": 10.605113983154297, "learning_rate": 3.653883495145631e-05, "loss": 0.2721, "step": 2773 }, { "epoch": 26.893975903614457, "grad_norm": 2.3722712993621826, "learning_rate": 3.6533980582524275e-05, "loss": 0.2813, "step": 2774 }, { "epoch": 26.903614457831324, "grad_norm": 7.544497489929199, "learning_rate": 3.652912621359223e-05, "loss": 0.2952, "step": 2775 }, { "epoch": 26.913253012048195, "grad_norm": 17.636686325073242, "learning_rate": 3.65242718446602e-05, "loss": 0.2753, "step": 2776 }, { "epoch": 26.92289156626506, "grad_norm": 3.9578371047973633, "learning_rate": 3.6519417475728155e-05, "loss": 0.172, "step": 2777 }, { "epoch": 26.93253012048193, "grad_norm": 4.612271785736084, "learning_rate": 3.651456310679612e-05, "loss": 0.1508, "step": 2778 }, { "epoch": 26.942168674698795, "grad_norm": 11.256501197814941, "learning_rate": 3.650970873786408e-05, "loss": 0.3777, "step": 2779 }, { "epoch": 26.951807228915662, "grad_norm": 6.314225673675537, "learning_rate": 3.650485436893204e-05, "loss": 0.2956, "step": 2780 }, { "epoch": 26.96144578313253, "grad_norm": 3.4650514125823975, "learning_rate": 3.65e-05, "loss": 0.185, "step": 2781 }, { "epoch": 26.971084337349396, "grad_norm": 14.397181510925293, "learning_rate": 3.6495145631067964e-05, "loss": 0.2015, "step": 2782 }, { "epoch": 26.980722891566266, "grad_norm": 5.238957405090332, "learning_rate": 3.649029126213592e-05, "loss": 0.195, "step": 2783 }, { "epoch": 26.990361445783133, "grad_norm": 5.377532005310059, "learning_rate": 3.6485436893203886e-05, "loss": 0.2864, "step": 2784 }, { "epoch": 27.00602409638554, "grad_norm": 9.267820358276367, "learning_rate": 3.6480582524271844e-05, "loss": 0.4513, "step": 2785 }, { "epoch": 27.01566265060241, "grad_norm": 5.221312999725342, "learning_rate": 3.647572815533981e-05, "loss": 0.2636, "step": 2786 }, { "epoch": 27.02530120481928, "grad_norm": 3.6016626358032227, "learning_rate": 3.6470873786407766e-05, "loss": 0.3043, "step": 2787 }, { "epoch": 27.034939759036146, "grad_norm": 6.098742961883545, "learning_rate": 3.6466019417475724e-05, "loss": 0.1618, "step": 2788 }, { "epoch": 27.044578313253012, "grad_norm": 5.936646938323975, "learning_rate": 3.646116504854369e-05, "loss": 0.2506, "step": 2789 }, { "epoch": 27.05421686746988, "grad_norm": 5.49082612991333, "learning_rate": 3.645631067961165e-05, "loss": 0.3858, "step": 2790 }, { "epoch": 27.063855421686746, "grad_norm": 1.767899513244629, "learning_rate": 3.645145631067962e-05, "loss": 0.1662, "step": 2791 }, { "epoch": 27.073493975903613, "grad_norm": 5.927277565002441, "learning_rate": 3.6446601941747576e-05, "loss": 0.4454, "step": 2792 }, { "epoch": 27.083132530120483, "grad_norm": 2.9833462238311768, "learning_rate": 3.644174757281554e-05, "loss": 0.1624, "step": 2793 }, { "epoch": 27.09277108433735, "grad_norm": 7.724845886230469, "learning_rate": 3.64368932038835e-05, "loss": 0.2092, "step": 2794 }, { "epoch": 27.102409638554217, "grad_norm": 2.402567148208618, "learning_rate": 3.643203883495146e-05, "loss": 0.1323, "step": 2795 }, { "epoch": 27.112048192771084, "grad_norm": 4.759934902191162, "learning_rate": 3.642718446601942e-05, "loss": 0.2616, "step": 2796 }, { "epoch": 27.12168674698795, "grad_norm": 21.16843032836914, "learning_rate": 3.6422330097087385e-05, "loss": 0.3429, "step": 2797 }, { "epoch": 27.131325301204818, "grad_norm": 4.38462495803833, "learning_rate": 3.641747572815534e-05, "loss": 0.3191, "step": 2798 }, { "epoch": 27.14096385542169, "grad_norm": 3.8242218494415283, "learning_rate": 3.64126213592233e-05, "loss": 0.3243, "step": 2799 }, { "epoch": 27.150602409638555, "grad_norm": 5.563554286956787, "learning_rate": 3.6407766990291265e-05, "loss": 0.1942, "step": 2800 }, { "epoch": 27.160240963855422, "grad_norm": 14.245654106140137, "learning_rate": 3.640291262135922e-05, "loss": 0.0952, "step": 2801 }, { "epoch": 27.16987951807229, "grad_norm": 5.802273750305176, "learning_rate": 3.639805825242719e-05, "loss": 0.2425, "step": 2802 }, { "epoch": 27.179518072289156, "grad_norm": 4.186059474945068, "learning_rate": 3.6393203883495145e-05, "loss": 0.3172, "step": 2803 }, { "epoch": 27.189156626506023, "grad_norm": 6.347503662109375, "learning_rate": 3.638834951456311e-05, "loss": 0.1536, "step": 2804 }, { "epoch": 27.198795180722893, "grad_norm": 10.331056594848633, "learning_rate": 3.638349514563107e-05, "loss": 0.2198, "step": 2805 }, { "epoch": 27.20843373493976, "grad_norm": 1.3974140882492065, "learning_rate": 3.637864077669903e-05, "loss": 0.2074, "step": 2806 }, { "epoch": 27.218072289156627, "grad_norm": 2.429835796356201, "learning_rate": 3.637378640776699e-05, "loss": 0.2326, "step": 2807 }, { "epoch": 27.227710843373494, "grad_norm": 13.588373184204102, "learning_rate": 3.6368932038834954e-05, "loss": 0.5259, "step": 2808 }, { "epoch": 27.23734939759036, "grad_norm": 3.127986192703247, "learning_rate": 3.636407766990291e-05, "loss": 0.3009, "step": 2809 }, { "epoch": 27.246987951807228, "grad_norm": 14.413559913635254, "learning_rate": 3.6359223300970876e-05, "loss": 0.3867, "step": 2810 }, { "epoch": 27.256626506024098, "grad_norm": 14.229085922241211, "learning_rate": 3.6354368932038834e-05, "loss": 0.3302, "step": 2811 }, { "epoch": 27.266265060240965, "grad_norm": 6.414801597595215, "learning_rate": 3.63495145631068e-05, "loss": 0.2643, "step": 2812 }, { "epoch": 27.27590361445783, "grad_norm": 6.508942604064941, "learning_rate": 3.6344660194174756e-05, "loss": 0.1879, "step": 2813 }, { "epoch": 27.2855421686747, "grad_norm": 2.8974404335021973, "learning_rate": 3.633980582524272e-05, "loss": 0.1456, "step": 2814 }, { "epoch": 27.295180722891565, "grad_norm": 6.125744819641113, "learning_rate": 3.6334951456310685e-05, "loss": 0.2894, "step": 2815 }, { "epoch": 27.304819277108432, "grad_norm": 6.088405609130859, "learning_rate": 3.633009708737864e-05, "loss": 0.2335, "step": 2816 }, { "epoch": 27.314457831325303, "grad_norm": 8.436656951904297, "learning_rate": 3.632524271844661e-05, "loss": 0.176, "step": 2817 }, { "epoch": 27.32409638554217, "grad_norm": 34.851749420166016, "learning_rate": 3.6320388349514565e-05, "loss": 0.3963, "step": 2818 }, { "epoch": 27.333734939759037, "grad_norm": 3.808814764022827, "learning_rate": 3.631553398058253e-05, "loss": 0.3215, "step": 2819 }, { "epoch": 27.343373493975903, "grad_norm": 2.4090495109558105, "learning_rate": 3.631067961165049e-05, "loss": 0.2429, "step": 2820 }, { "epoch": 27.35301204819277, "grad_norm": 2.5314481258392334, "learning_rate": 3.630582524271845e-05, "loss": 0.1811, "step": 2821 }, { "epoch": 27.362650602409637, "grad_norm": 3.1475470066070557, "learning_rate": 3.630097087378641e-05, "loss": 0.1555, "step": 2822 }, { "epoch": 27.372289156626508, "grad_norm": 1.4730817079544067, "learning_rate": 3.6296116504854374e-05, "loss": 0.1486, "step": 2823 }, { "epoch": 27.381927710843375, "grad_norm": 8.21680736541748, "learning_rate": 3.629126213592233e-05, "loss": 0.0942, "step": 2824 }, { "epoch": 27.39156626506024, "grad_norm": 3.394721269607544, "learning_rate": 3.62864077669903e-05, "loss": 0.2164, "step": 2825 }, { "epoch": 27.40120481927711, "grad_norm": 4.852686882019043, "learning_rate": 3.6281553398058254e-05, "loss": 0.3518, "step": 2826 }, { "epoch": 27.410843373493975, "grad_norm": 22.406131744384766, "learning_rate": 3.627669902912621e-05, "loss": 0.341, "step": 2827 }, { "epoch": 27.420481927710842, "grad_norm": 7.340428829193115, "learning_rate": 3.627184466019418e-05, "loss": 0.3623, "step": 2828 }, { "epoch": 27.430120481927712, "grad_norm": 7.650634765625, "learning_rate": 3.6266990291262135e-05, "loss": 0.3308, "step": 2829 }, { "epoch": 27.43975903614458, "grad_norm": 6.863785743713379, "learning_rate": 3.62621359223301e-05, "loss": 0.5205, "step": 2830 }, { "epoch": 27.449397590361446, "grad_norm": 1.8006038665771484, "learning_rate": 3.625728155339806e-05, "loss": 0.1335, "step": 2831 }, { "epoch": 27.459036144578313, "grad_norm": 4.326522350311279, "learning_rate": 3.625242718446602e-05, "loss": 0.2984, "step": 2832 }, { "epoch": 27.46867469879518, "grad_norm": 16.600194931030273, "learning_rate": 3.624757281553398e-05, "loss": 0.5424, "step": 2833 }, { "epoch": 27.478313253012047, "grad_norm": 5.213439464569092, "learning_rate": 3.6242718446601944e-05, "loss": 0.1089, "step": 2834 }, { "epoch": 27.487951807228917, "grad_norm": 4.585071563720703, "learning_rate": 3.62378640776699e-05, "loss": 0.3238, "step": 2835 }, { "epoch": 27.497590361445784, "grad_norm": 3.164538621902466, "learning_rate": 3.6233009708737866e-05, "loss": 0.2107, "step": 2836 }, { "epoch": 27.50722891566265, "grad_norm": 4.1425604820251465, "learning_rate": 3.6228155339805824e-05, "loss": 0.2564, "step": 2837 }, { "epoch": 27.516867469879518, "grad_norm": 3.0502662658691406, "learning_rate": 3.622330097087379e-05, "loss": 0.2807, "step": 2838 }, { "epoch": 27.526506024096385, "grad_norm": 6.56874942779541, "learning_rate": 3.6218446601941746e-05, "loss": 0.1873, "step": 2839 }, { "epoch": 27.53614457831325, "grad_norm": 15.418595314025879, "learning_rate": 3.621359223300971e-05, "loss": 0.3841, "step": 2840 }, { "epoch": 27.545783132530122, "grad_norm": 3.244178056716919, "learning_rate": 3.6208737864077675e-05, "loss": 0.2804, "step": 2841 }, { "epoch": 27.55542168674699, "grad_norm": 7.866957187652588, "learning_rate": 3.620388349514563e-05, "loss": 0.5267, "step": 2842 }, { "epoch": 27.565060240963856, "grad_norm": 0.9526791572570801, "learning_rate": 3.61990291262136e-05, "loss": 0.1173, "step": 2843 }, { "epoch": 27.574698795180723, "grad_norm": 23.915794372558594, "learning_rate": 3.6194174757281555e-05, "loss": 0.2059, "step": 2844 }, { "epoch": 27.58433734939759, "grad_norm": 7.748142719268799, "learning_rate": 3.618932038834952e-05, "loss": 0.0971, "step": 2845 }, { "epoch": 27.593975903614457, "grad_norm": 5.0839080810546875, "learning_rate": 3.618446601941748e-05, "loss": 0.3864, "step": 2846 }, { "epoch": 27.603614457831327, "grad_norm": 12.830086708068848, "learning_rate": 3.617961165048544e-05, "loss": 0.2094, "step": 2847 }, { "epoch": 27.613253012048194, "grad_norm": 18.639419555664062, "learning_rate": 3.61747572815534e-05, "loss": 0.342, "step": 2848 }, { "epoch": 27.62289156626506, "grad_norm": 7.673815727233887, "learning_rate": 3.6169902912621364e-05, "loss": 0.4839, "step": 2849 }, { "epoch": 27.632530120481928, "grad_norm": 11.19152545928955, "learning_rate": 3.616504854368932e-05, "loss": 0.2394, "step": 2850 }, { "epoch": 27.642168674698794, "grad_norm": 19.54655647277832, "learning_rate": 3.6160194174757286e-05, "loss": 0.3846, "step": 2851 }, { "epoch": 27.65180722891566, "grad_norm": 12.248712539672852, "learning_rate": 3.6155339805825244e-05, "loss": 0.4658, "step": 2852 }, { "epoch": 27.66144578313253, "grad_norm": 2.48825740814209, "learning_rate": 3.615048543689321e-05, "loss": 0.1524, "step": 2853 }, { "epoch": 27.6710843373494, "grad_norm": 14.62050724029541, "learning_rate": 3.6145631067961167e-05, "loss": 0.3748, "step": 2854 }, { "epoch": 27.680722891566266, "grad_norm": 34.208770751953125, "learning_rate": 3.6140776699029124e-05, "loss": 0.3773, "step": 2855 }, { "epoch": 27.690361445783132, "grad_norm": 10.079337120056152, "learning_rate": 3.613592233009709e-05, "loss": 0.209, "step": 2856 }, { "epoch": 27.7, "grad_norm": 4.722128868103027, "learning_rate": 3.6131067961165047e-05, "loss": 0.3421, "step": 2857 }, { "epoch": 27.709638554216866, "grad_norm": 3.791710615158081, "learning_rate": 3.612621359223301e-05, "loss": 0.2911, "step": 2858 }, { "epoch": 27.719277108433737, "grad_norm": 1.2609931230545044, "learning_rate": 3.612135922330097e-05, "loss": 0.2202, "step": 2859 }, { "epoch": 27.728915662650603, "grad_norm": 2.2913267612457275, "learning_rate": 3.6116504854368933e-05, "loss": 0.1855, "step": 2860 }, { "epoch": 27.73855421686747, "grad_norm": 3.8581814765930176, "learning_rate": 3.611165048543689e-05, "loss": 0.2401, "step": 2861 }, { "epoch": 27.748192771084337, "grad_norm": 15.722972869873047, "learning_rate": 3.6106796116504856e-05, "loss": 0.1995, "step": 2862 }, { "epoch": 27.757831325301204, "grad_norm": 6.7273125648498535, "learning_rate": 3.6101941747572813e-05, "loss": 0.1919, "step": 2863 }, { "epoch": 27.76746987951807, "grad_norm": 2.668850898742676, "learning_rate": 3.609708737864078e-05, "loss": 0.198, "step": 2864 }, { "epoch": 27.77710843373494, "grad_norm": 3.3029966354370117, "learning_rate": 3.6092233009708736e-05, "loss": 0.1954, "step": 2865 }, { "epoch": 27.78674698795181, "grad_norm": 13.301776885986328, "learning_rate": 3.60873786407767e-05, "loss": 0.4187, "step": 2866 }, { "epoch": 27.796385542168675, "grad_norm": 12.794827461242676, "learning_rate": 3.6082524271844665e-05, "loss": 0.2979, "step": 2867 }, { "epoch": 27.806024096385542, "grad_norm": 5.96229362487793, "learning_rate": 3.607766990291262e-05, "loss": 0.2309, "step": 2868 }, { "epoch": 27.81566265060241, "grad_norm": 18.604129791259766, "learning_rate": 3.607281553398059e-05, "loss": 0.3566, "step": 2869 }, { "epoch": 27.825301204819276, "grad_norm": 43.17854690551758, "learning_rate": 3.6067961165048545e-05, "loss": 0.3685, "step": 2870 }, { "epoch": 27.834939759036146, "grad_norm": 5.907766819000244, "learning_rate": 3.606310679611651e-05, "loss": 0.3273, "step": 2871 }, { "epoch": 27.844578313253013, "grad_norm": 1.1731877326965332, "learning_rate": 3.605825242718447e-05, "loss": 0.1165, "step": 2872 }, { "epoch": 27.85421686746988, "grad_norm": 8.57100772857666, "learning_rate": 3.605339805825243e-05, "loss": 0.4755, "step": 2873 }, { "epoch": 27.863855421686747, "grad_norm": 28.703706741333008, "learning_rate": 3.604854368932039e-05, "loss": 0.331, "step": 2874 }, { "epoch": 27.873493975903614, "grad_norm": 14.278800964355469, "learning_rate": 3.6043689320388354e-05, "loss": 0.3441, "step": 2875 }, { "epoch": 27.88313253012048, "grad_norm": 8.989706039428711, "learning_rate": 3.603883495145631e-05, "loss": 0.3192, "step": 2876 }, { "epoch": 27.89277108433735, "grad_norm": 3.2547831535339355, "learning_rate": 3.6033980582524276e-05, "loss": 0.2039, "step": 2877 }, { "epoch": 27.902409638554218, "grad_norm": 2.3495497703552246, "learning_rate": 3.6029126213592234e-05, "loss": 0.1561, "step": 2878 }, { "epoch": 27.912048192771085, "grad_norm": 3.4527359008789062, "learning_rate": 3.60242718446602e-05, "loss": 0.1039, "step": 2879 }, { "epoch": 27.92168674698795, "grad_norm": 3.0645430088043213, "learning_rate": 3.6019417475728156e-05, "loss": 0.2464, "step": 2880 }, { "epoch": 27.93132530120482, "grad_norm": 3.645888090133667, "learning_rate": 3.601456310679612e-05, "loss": 0.2692, "step": 2881 }, { "epoch": 27.940963855421685, "grad_norm": 3.311596393585205, "learning_rate": 3.600970873786408e-05, "loss": 0.2474, "step": 2882 }, { "epoch": 27.950602409638556, "grad_norm": 9.519781112670898, "learning_rate": 3.6004854368932036e-05, "loss": 0.3623, "step": 2883 }, { "epoch": 27.960240963855423, "grad_norm": 6.766671180725098, "learning_rate": 3.6e-05, "loss": 0.278, "step": 2884 }, { "epoch": 27.96987951807229, "grad_norm": 10.553816795349121, "learning_rate": 3.599514563106796e-05, "loss": 0.2574, "step": 2885 }, { "epoch": 27.979518072289157, "grad_norm": 6.727451801300049, "learning_rate": 3.599029126213592e-05, "loss": 0.2916, "step": 2886 }, { "epoch": 27.989156626506023, "grad_norm": 2.249431848526001, "learning_rate": 3.598543689320388e-05, "loss": 0.2993, "step": 2887 }, { "epoch": 28.004819277108435, "grad_norm": 5.421168327331543, "learning_rate": 3.5980582524271845e-05, "loss": 0.3039, "step": 2888 }, { "epoch": 28.014457831325302, "grad_norm": 5.157059192657471, "learning_rate": 3.59757281553398e-05, "loss": 0.2177, "step": 2889 }, { "epoch": 28.02409638554217, "grad_norm": 2.7611780166625977, "learning_rate": 3.597087378640777e-05, "loss": 0.1955, "step": 2890 }, { "epoch": 28.033734939759036, "grad_norm": 3.3616411685943604, "learning_rate": 3.596601941747573e-05, "loss": 0.2987, "step": 2891 }, { "epoch": 28.043373493975903, "grad_norm": 7.499139785766602, "learning_rate": 3.59611650485437e-05, "loss": 0.5438, "step": 2892 }, { "epoch": 28.05301204819277, "grad_norm": 2.6098244190216064, "learning_rate": 3.5956310679611655e-05, "loss": 0.2442, "step": 2893 }, { "epoch": 28.06265060240964, "grad_norm": 3.6482160091400146, "learning_rate": 3.595145631067961e-05, "loss": 0.2242, "step": 2894 }, { "epoch": 28.072289156626507, "grad_norm": 3.419644594192505, "learning_rate": 3.594660194174758e-05, "loss": 0.336, "step": 2895 }, { "epoch": 28.081927710843374, "grad_norm": 4.560307502746582, "learning_rate": 3.5941747572815535e-05, "loss": 0.4511, "step": 2896 }, { "epoch": 28.09156626506024, "grad_norm": 5.178317070007324, "learning_rate": 3.59368932038835e-05, "loss": 0.3985, "step": 2897 }, { "epoch": 28.101204819277108, "grad_norm": 3.9089772701263428, "learning_rate": 3.593203883495146e-05, "loss": 0.2829, "step": 2898 }, { "epoch": 28.110843373493974, "grad_norm": 1.4143925905227661, "learning_rate": 3.592718446601942e-05, "loss": 0.1256, "step": 2899 }, { "epoch": 28.120481927710845, "grad_norm": 18.786314010620117, "learning_rate": 3.592233009708738e-05, "loss": 0.2657, "step": 2900 }, { "epoch": 28.13012048192771, "grad_norm": 19.23563003540039, "learning_rate": 3.5917475728155344e-05, "loss": 0.2446, "step": 2901 }, { "epoch": 28.13975903614458, "grad_norm": 2.5589795112609863, "learning_rate": 3.59126213592233e-05, "loss": 0.139, "step": 2902 }, { "epoch": 28.149397590361446, "grad_norm": 11.711976051330566, "learning_rate": 3.5907766990291266e-05, "loss": 0.261, "step": 2903 }, { "epoch": 28.159036144578312, "grad_norm": 5.765239238739014, "learning_rate": 3.5902912621359224e-05, "loss": 0.3528, "step": 2904 }, { "epoch": 28.16867469879518, "grad_norm": 3.000293016433716, "learning_rate": 3.589805825242719e-05, "loss": 0.1197, "step": 2905 }, { "epoch": 28.17831325301205, "grad_norm": 9.745441436767578, "learning_rate": 3.5893203883495146e-05, "loss": 0.415, "step": 2906 }, { "epoch": 28.187951807228917, "grad_norm": 3.577080488204956, "learning_rate": 3.588834951456311e-05, "loss": 0.3517, "step": 2907 }, { "epoch": 28.197590361445783, "grad_norm": 8.768906593322754, "learning_rate": 3.588349514563107e-05, "loss": 0.4467, "step": 2908 }, { "epoch": 28.20722891566265, "grad_norm": 3.957280397415161, "learning_rate": 3.587864077669903e-05, "loss": 0.2867, "step": 2909 }, { "epoch": 28.216867469879517, "grad_norm": 6.8015522956848145, "learning_rate": 3.587378640776699e-05, "loss": 0.273, "step": 2910 }, { "epoch": 28.226506024096384, "grad_norm": 3.591066360473633, "learning_rate": 3.586893203883495e-05, "loss": 0.1252, "step": 2911 }, { "epoch": 28.236144578313255, "grad_norm": 3.27258563041687, "learning_rate": 3.586407766990291e-05, "loss": 0.1199, "step": 2912 }, { "epoch": 28.24578313253012, "grad_norm": 3.183629035949707, "learning_rate": 3.585922330097087e-05, "loss": 0.2931, "step": 2913 }, { "epoch": 28.25542168674699, "grad_norm": 3.8096718788146973, "learning_rate": 3.5854368932038835e-05, "loss": 0.3065, "step": 2914 }, { "epoch": 28.265060240963855, "grad_norm": 11.291254043579102, "learning_rate": 3.584951456310679e-05, "loss": 0.3571, "step": 2915 }, { "epoch": 28.274698795180722, "grad_norm": 6.1936798095703125, "learning_rate": 3.5844660194174764e-05, "loss": 0.3009, "step": 2916 }, { "epoch": 28.28433734939759, "grad_norm": 4.09614896774292, "learning_rate": 3.583980582524272e-05, "loss": 0.2431, "step": 2917 }, { "epoch": 28.29397590361446, "grad_norm": 10.25815486907959, "learning_rate": 3.583495145631069e-05, "loss": 0.1971, "step": 2918 }, { "epoch": 28.303614457831326, "grad_norm": 22.28202247619629, "learning_rate": 3.5830097087378644e-05, "loss": 0.2604, "step": 2919 }, { "epoch": 28.313253012048193, "grad_norm": 7.82061243057251, "learning_rate": 3.58252427184466e-05, "loss": 0.4702, "step": 2920 }, { "epoch": 28.32289156626506, "grad_norm": 8.40011215209961, "learning_rate": 3.582038834951457e-05, "loss": 0.4979, "step": 2921 }, { "epoch": 28.332530120481927, "grad_norm": 13.611349105834961, "learning_rate": 3.5815533980582524e-05, "loss": 0.3278, "step": 2922 }, { "epoch": 28.342168674698794, "grad_norm": 4.318121433258057, "learning_rate": 3.581067961165049e-05, "loss": 0.2877, "step": 2923 }, { "epoch": 28.351807228915664, "grad_norm": 4.913221836090088, "learning_rate": 3.580582524271845e-05, "loss": 0.4359, "step": 2924 }, { "epoch": 28.36144578313253, "grad_norm": 10.841287612915039, "learning_rate": 3.580097087378641e-05, "loss": 0.3005, "step": 2925 }, { "epoch": 28.371084337349398, "grad_norm": 3.333693265914917, "learning_rate": 3.579611650485437e-05, "loss": 0.3221, "step": 2926 }, { "epoch": 28.380722891566265, "grad_norm": 3.9633076190948486, "learning_rate": 3.5791262135922334e-05, "loss": 0.1661, "step": 2927 }, { "epoch": 28.39036144578313, "grad_norm": 6.835230350494385, "learning_rate": 3.578640776699029e-05, "loss": 0.4815, "step": 2928 }, { "epoch": 28.4, "grad_norm": 2.526440143585205, "learning_rate": 3.5781553398058256e-05, "loss": 0.3112, "step": 2929 }, { "epoch": 28.40963855421687, "grad_norm": 4.071704387664795, "learning_rate": 3.5776699029126214e-05, "loss": 0.2659, "step": 2930 }, { "epoch": 28.419277108433736, "grad_norm": 18.49576187133789, "learning_rate": 3.577184466019418e-05, "loss": 0.2045, "step": 2931 }, { "epoch": 28.428915662650603, "grad_norm": 8.524687767028809, "learning_rate": 3.5766990291262136e-05, "loss": 0.38, "step": 2932 }, { "epoch": 28.43855421686747, "grad_norm": 4.203794002532959, "learning_rate": 3.57621359223301e-05, "loss": 0.1754, "step": 2933 }, { "epoch": 28.448192771084337, "grad_norm": 11.44940185546875, "learning_rate": 3.575728155339806e-05, "loss": 0.3486, "step": 2934 }, { "epoch": 28.457831325301203, "grad_norm": 8.950465202331543, "learning_rate": 3.575242718446602e-05, "loss": 0.2926, "step": 2935 }, { "epoch": 28.467469879518074, "grad_norm": 4.826960563659668, "learning_rate": 3.574757281553398e-05, "loss": 0.159, "step": 2936 }, { "epoch": 28.47710843373494, "grad_norm": 4.930605888366699, "learning_rate": 3.574271844660194e-05, "loss": 0.3597, "step": 2937 }, { "epoch": 28.486746987951808, "grad_norm": 9.29212760925293, "learning_rate": 3.57378640776699e-05, "loss": 0.3882, "step": 2938 }, { "epoch": 28.496385542168674, "grad_norm": 1.8610907793045044, "learning_rate": 3.573300970873786e-05, "loss": 0.1914, "step": 2939 }, { "epoch": 28.50602409638554, "grad_norm": 4.266864776611328, "learning_rate": 3.5728155339805825e-05, "loss": 0.2309, "step": 2940 }, { "epoch": 28.51566265060241, "grad_norm": 5.330141067504883, "learning_rate": 3.572330097087378e-05, "loss": 0.2329, "step": 2941 }, { "epoch": 28.52530120481928, "grad_norm": 2.3213367462158203, "learning_rate": 3.5718446601941754e-05, "loss": 0.1787, "step": 2942 }, { "epoch": 28.534939759036146, "grad_norm": 4.5696210861206055, "learning_rate": 3.571359223300971e-05, "loss": 0.297, "step": 2943 }, { "epoch": 28.544578313253012, "grad_norm": 8.060906410217285, "learning_rate": 3.5708737864077676e-05, "loss": 0.2928, "step": 2944 }, { "epoch": 28.55421686746988, "grad_norm": 5.119874000549316, "learning_rate": 3.5703883495145634e-05, "loss": 0.3048, "step": 2945 }, { "epoch": 28.563855421686746, "grad_norm": 15.77598762512207, "learning_rate": 3.56990291262136e-05, "loss": 0.2443, "step": 2946 }, { "epoch": 28.573493975903613, "grad_norm": 2.9449708461761475, "learning_rate": 3.5694174757281556e-05, "loss": 0.196, "step": 2947 }, { "epoch": 28.583132530120483, "grad_norm": 5.454138278961182, "learning_rate": 3.5689320388349514e-05, "loss": 0.2399, "step": 2948 }, { "epoch": 28.59277108433735, "grad_norm": 12.485671043395996, "learning_rate": 3.568446601941748e-05, "loss": 0.3409, "step": 2949 }, { "epoch": 28.602409638554217, "grad_norm": 9.378785133361816, "learning_rate": 3.5679611650485437e-05, "loss": 0.3045, "step": 2950 }, { "epoch": 28.612048192771084, "grad_norm": 3.33420467376709, "learning_rate": 3.56747572815534e-05, "loss": 0.23, "step": 2951 }, { "epoch": 28.62168674698795, "grad_norm": 9.19075870513916, "learning_rate": 3.566990291262136e-05, "loss": 0.3324, "step": 2952 }, { "epoch": 28.631325301204818, "grad_norm": 3.412560224533081, "learning_rate": 3.566504854368932e-05, "loss": 0.2018, "step": 2953 }, { "epoch": 28.64096385542169, "grad_norm": 5.537172317504883, "learning_rate": 3.566019417475728e-05, "loss": 0.2492, "step": 2954 }, { "epoch": 28.650602409638555, "grad_norm": 5.23703145980835, "learning_rate": 3.5655339805825246e-05, "loss": 0.2556, "step": 2955 }, { "epoch": 28.660240963855422, "grad_norm": 9.696022987365723, "learning_rate": 3.5650485436893203e-05, "loss": 0.3671, "step": 2956 }, { "epoch": 28.66987951807229, "grad_norm": 3.6470234394073486, "learning_rate": 3.564563106796117e-05, "loss": 0.2013, "step": 2957 }, { "epoch": 28.679518072289156, "grad_norm": 1.9999370574951172, "learning_rate": 3.5640776699029126e-05, "loss": 0.1804, "step": 2958 }, { "epoch": 28.689156626506023, "grad_norm": 17.906099319458008, "learning_rate": 3.563592233009709e-05, "loss": 0.3711, "step": 2959 }, { "epoch": 28.698795180722893, "grad_norm": 5.130929470062256, "learning_rate": 3.563106796116505e-05, "loss": 0.2344, "step": 2960 }, { "epoch": 28.70843373493976, "grad_norm": 4.270246505737305, "learning_rate": 3.562621359223301e-05, "loss": 0.1219, "step": 2961 }, { "epoch": 28.718072289156627, "grad_norm": 8.338406562805176, "learning_rate": 3.562135922330097e-05, "loss": 0.2968, "step": 2962 }, { "epoch": 28.727710843373494, "grad_norm": 24.529335021972656, "learning_rate": 3.5616504854368935e-05, "loss": 0.3841, "step": 2963 }, { "epoch": 28.73734939759036, "grad_norm": 10.058650970458984, "learning_rate": 3.561165048543689e-05, "loss": 0.4289, "step": 2964 }, { "epoch": 28.746987951807228, "grad_norm": 2.353403329849243, "learning_rate": 3.560679611650485e-05, "loss": 0.1604, "step": 2965 }, { "epoch": 28.756626506024098, "grad_norm": 8.167855262756348, "learning_rate": 3.5601941747572815e-05, "loss": 0.2517, "step": 2966 }, { "epoch": 28.766265060240965, "grad_norm": 6.2384161949157715, "learning_rate": 3.559708737864078e-05, "loss": 0.2358, "step": 2967 }, { "epoch": 28.77590361445783, "grad_norm": 15.06254768371582, "learning_rate": 3.5592233009708744e-05, "loss": 0.207, "step": 2968 }, { "epoch": 28.7855421686747, "grad_norm": 4.3059210777282715, "learning_rate": 3.55873786407767e-05, "loss": 0.2271, "step": 2969 }, { "epoch": 28.795180722891565, "grad_norm": 4.634490013122559, "learning_rate": 3.5582524271844666e-05, "loss": 0.1751, "step": 2970 }, { "epoch": 28.804819277108432, "grad_norm": 1.974816918373108, "learning_rate": 3.5577669902912624e-05, "loss": 0.158, "step": 2971 }, { "epoch": 28.814457831325303, "grad_norm": 4.24223518371582, "learning_rate": 3.557281553398059e-05, "loss": 0.1944, "step": 2972 }, { "epoch": 28.82409638554217, "grad_norm": 11.534941673278809, "learning_rate": 3.5567961165048546e-05, "loss": 0.3621, "step": 2973 }, { "epoch": 28.833734939759037, "grad_norm": 12.735404968261719, "learning_rate": 3.556310679611651e-05, "loss": 0.3134, "step": 2974 }, { "epoch": 28.843373493975903, "grad_norm": 10.541128158569336, "learning_rate": 3.555825242718447e-05, "loss": 0.5475, "step": 2975 }, { "epoch": 28.85301204819277, "grad_norm": 1.8390334844589233, "learning_rate": 3.5553398058252426e-05, "loss": 0.1902, "step": 2976 }, { "epoch": 28.862650602409637, "grad_norm": 16.732402801513672, "learning_rate": 3.554854368932039e-05, "loss": 0.2643, "step": 2977 }, { "epoch": 28.872289156626508, "grad_norm": 6.609226703643799, "learning_rate": 3.554368932038835e-05, "loss": 0.3089, "step": 2978 }, { "epoch": 28.881927710843375, "grad_norm": 4.790772914886475, "learning_rate": 3.553883495145631e-05, "loss": 0.3715, "step": 2979 }, { "epoch": 28.89156626506024, "grad_norm": 6.241481781005859, "learning_rate": 3.553398058252427e-05, "loss": 0.2915, "step": 2980 }, { "epoch": 28.90120481927711, "grad_norm": 8.962647438049316, "learning_rate": 3.5529126213592235e-05, "loss": 0.222, "step": 2981 }, { "epoch": 28.910843373493975, "grad_norm": 15.168928146362305, "learning_rate": 3.552427184466019e-05, "loss": 0.3553, "step": 2982 }, { "epoch": 28.920481927710842, "grad_norm": 3.9862709045410156, "learning_rate": 3.551941747572816e-05, "loss": 0.2346, "step": 2983 }, { "epoch": 28.930120481927712, "grad_norm": 5.183629989624023, "learning_rate": 3.5514563106796115e-05, "loss": 0.2203, "step": 2984 }, { "epoch": 28.93975903614458, "grad_norm": 1.9316134452819824, "learning_rate": 3.550970873786408e-05, "loss": 0.2173, "step": 2985 }, { "epoch": 28.949397590361446, "grad_norm": 10.962873458862305, "learning_rate": 3.550485436893204e-05, "loss": 0.2533, "step": 2986 }, { "epoch": 28.959036144578313, "grad_norm": 4.3826189041137695, "learning_rate": 3.55e-05, "loss": 0.1996, "step": 2987 }, { "epoch": 28.96867469879518, "grad_norm": 4.683048248291016, "learning_rate": 3.549514563106796e-05, "loss": 0.1706, "step": 2988 }, { "epoch": 28.978313253012047, "grad_norm": 15.920515060424805, "learning_rate": 3.5490291262135925e-05, "loss": 0.1449, "step": 2989 }, { "epoch": 28.987951807228917, "grad_norm": 2.2842953205108643, "learning_rate": 3.548543689320388e-05, "loss": 0.1737, "step": 2990 }, { "epoch": 29.003614457831326, "grad_norm": 1.7632348537445068, "learning_rate": 3.548058252427185e-05, "loss": 0.1301, "step": 2991 }, { "epoch": 29.013253012048192, "grad_norm": 8.400160789489746, "learning_rate": 3.547572815533981e-05, "loss": 0.2983, "step": 2992 }, { "epoch": 29.02289156626506, "grad_norm": 6.87416410446167, "learning_rate": 3.547087378640777e-05, "loss": 0.3479, "step": 2993 }, { "epoch": 29.032530120481926, "grad_norm": 3.2781548500061035, "learning_rate": 3.5466019417475734e-05, "loss": 0.2526, "step": 2994 }, { "epoch": 29.042168674698797, "grad_norm": 4.916610240936279, "learning_rate": 3.546116504854369e-05, "loss": 0.2309, "step": 2995 }, { "epoch": 29.051807228915663, "grad_norm": 5.101386070251465, "learning_rate": 3.5456310679611656e-05, "loss": 0.3439, "step": 2996 }, { "epoch": 29.06144578313253, "grad_norm": 3.162917137145996, "learning_rate": 3.5451456310679614e-05, "loss": 0.2542, "step": 2997 }, { "epoch": 29.071084337349397, "grad_norm": 5.036881923675537, "learning_rate": 3.544660194174758e-05, "loss": 0.2848, "step": 2998 }, { "epoch": 29.080722891566264, "grad_norm": 4.772645950317383, "learning_rate": 3.5441747572815536e-05, "loss": 0.3707, "step": 2999 }, { "epoch": 29.09036144578313, "grad_norm": 8.954848289489746, "learning_rate": 3.54368932038835e-05, "loss": 0.3347, "step": 3000 }, { "epoch": 29.1, "grad_norm": 7.45590353012085, "learning_rate": 3.543203883495146e-05, "loss": 0.1444, "step": 3001 }, { "epoch": 29.10963855421687, "grad_norm": 3.542755365371704, "learning_rate": 3.542718446601942e-05, "loss": 0.2376, "step": 3002 }, { "epoch": 29.119277108433735, "grad_norm": 4.100888252258301, "learning_rate": 3.542233009708738e-05, "loss": 0.1722, "step": 3003 }, { "epoch": 29.128915662650602, "grad_norm": 5.491598606109619, "learning_rate": 3.541747572815534e-05, "loss": 0.1953, "step": 3004 }, { "epoch": 29.13855421686747, "grad_norm": 14.238850593566895, "learning_rate": 3.54126213592233e-05, "loss": 0.4984, "step": 3005 }, { "epoch": 29.148192771084336, "grad_norm": 4.115592956542969, "learning_rate": 3.540776699029126e-05, "loss": 0.3621, "step": 3006 }, { "epoch": 29.157831325301206, "grad_norm": 4.584468364715576, "learning_rate": 3.5402912621359225e-05, "loss": 0.2456, "step": 3007 }, { "epoch": 29.167469879518073, "grad_norm": 3.619417428970337, "learning_rate": 3.539805825242718e-05, "loss": 0.3946, "step": 3008 }, { "epoch": 29.17710843373494, "grad_norm": 4.063743591308594, "learning_rate": 3.539320388349515e-05, "loss": 0.1225, "step": 3009 }, { "epoch": 29.186746987951807, "grad_norm": 6.704817771911621, "learning_rate": 3.5388349514563105e-05, "loss": 0.2217, "step": 3010 }, { "epoch": 29.196385542168674, "grad_norm": 12.290092468261719, "learning_rate": 3.538349514563107e-05, "loss": 0.2268, "step": 3011 }, { "epoch": 29.20602409638554, "grad_norm": 7.006570339202881, "learning_rate": 3.537864077669903e-05, "loss": 0.2059, "step": 3012 }, { "epoch": 29.21566265060241, "grad_norm": 4.296692848205566, "learning_rate": 3.537378640776699e-05, "loss": 0.4086, "step": 3013 }, { "epoch": 29.225301204819278, "grad_norm": 22.615943908691406, "learning_rate": 3.536893203883495e-05, "loss": 0.232, "step": 3014 }, { "epoch": 29.234939759036145, "grad_norm": 6.1023993492126465, "learning_rate": 3.5364077669902914e-05, "loss": 0.29, "step": 3015 }, { "epoch": 29.24457831325301, "grad_norm": 6.584347724914551, "learning_rate": 3.535922330097087e-05, "loss": 0.3729, "step": 3016 }, { "epoch": 29.25421686746988, "grad_norm": 2.889639139175415, "learning_rate": 3.535436893203884e-05, "loss": 0.2567, "step": 3017 }, { "epoch": 29.263855421686745, "grad_norm": 4.358686923980713, "learning_rate": 3.53495145631068e-05, "loss": 0.3401, "step": 3018 }, { "epoch": 29.273493975903616, "grad_norm": 11.37821102142334, "learning_rate": 3.534466019417476e-05, "loss": 0.2443, "step": 3019 }, { "epoch": 29.283132530120483, "grad_norm": 4.940424919128418, "learning_rate": 3.5339805825242724e-05, "loss": 0.2369, "step": 3020 }, { "epoch": 29.29277108433735, "grad_norm": 15.250666618347168, "learning_rate": 3.533495145631068e-05, "loss": 0.3281, "step": 3021 }, { "epoch": 29.302409638554217, "grad_norm": 2.795090675354004, "learning_rate": 3.5330097087378646e-05, "loss": 0.146, "step": 3022 }, { "epoch": 29.312048192771083, "grad_norm": 4.934690952301025, "learning_rate": 3.5325242718446604e-05, "loss": 0.307, "step": 3023 }, { "epoch": 29.32168674698795, "grad_norm": 1.5515434741973877, "learning_rate": 3.532038834951457e-05, "loss": 0.173, "step": 3024 }, { "epoch": 29.33132530120482, "grad_norm": 4.690242767333984, "learning_rate": 3.5315533980582526e-05, "loss": 0.3309, "step": 3025 }, { "epoch": 29.340963855421688, "grad_norm": 2.9438538551330566, "learning_rate": 3.531067961165049e-05, "loss": 0.167, "step": 3026 }, { "epoch": 29.350602409638554, "grad_norm": 4.254283905029297, "learning_rate": 3.530582524271845e-05, "loss": 0.1646, "step": 3027 }, { "epoch": 29.36024096385542, "grad_norm": 3.2137794494628906, "learning_rate": 3.530097087378641e-05, "loss": 0.2203, "step": 3028 }, { "epoch": 29.36987951807229, "grad_norm": 9.91950798034668, "learning_rate": 3.529611650485437e-05, "loss": 0.2956, "step": 3029 }, { "epoch": 29.379518072289155, "grad_norm": 15.305045127868652, "learning_rate": 3.5291262135922335e-05, "loss": 0.1556, "step": 3030 }, { "epoch": 29.389156626506026, "grad_norm": 9.586871147155762, "learning_rate": 3.528640776699029e-05, "loss": 0.2724, "step": 3031 }, { "epoch": 29.398795180722892, "grad_norm": 3.3915164470672607, "learning_rate": 3.528155339805825e-05, "loss": 0.2074, "step": 3032 }, { "epoch": 29.40843373493976, "grad_norm": 4.248534202575684, "learning_rate": 3.5276699029126215e-05, "loss": 0.2814, "step": 3033 }, { "epoch": 29.418072289156626, "grad_norm": 10.17581844329834, "learning_rate": 3.527184466019417e-05, "loss": 0.2668, "step": 3034 }, { "epoch": 29.427710843373493, "grad_norm": 6.1103410720825195, "learning_rate": 3.526699029126214e-05, "loss": 0.3617, "step": 3035 }, { "epoch": 29.43734939759036, "grad_norm": 4.086521148681641, "learning_rate": 3.5262135922330095e-05, "loss": 0.3196, "step": 3036 }, { "epoch": 29.44698795180723, "grad_norm": 15.514613151550293, "learning_rate": 3.525728155339806e-05, "loss": 0.256, "step": 3037 }, { "epoch": 29.456626506024097, "grad_norm": 7.210671901702881, "learning_rate": 3.525242718446602e-05, "loss": 0.3176, "step": 3038 }, { "epoch": 29.466265060240964, "grad_norm": 2.214967727661133, "learning_rate": 3.524757281553398e-05, "loss": 0.152, "step": 3039 }, { "epoch": 29.47590361445783, "grad_norm": 6.965200424194336, "learning_rate": 3.524271844660194e-05, "loss": 0.2524, "step": 3040 }, { "epoch": 29.485542168674698, "grad_norm": 8.578865051269531, "learning_rate": 3.5237864077669904e-05, "loss": 0.2716, "step": 3041 }, { "epoch": 29.495180722891565, "grad_norm": 5.56239128112793, "learning_rate": 3.523300970873786e-05, "loss": 0.4611, "step": 3042 }, { "epoch": 29.504819277108435, "grad_norm": 7.618365287780762, "learning_rate": 3.5228155339805826e-05, "loss": 0.1386, "step": 3043 }, { "epoch": 29.514457831325302, "grad_norm": 14.688817024230957, "learning_rate": 3.522330097087379e-05, "loss": 0.2204, "step": 3044 }, { "epoch": 29.52409638554217, "grad_norm": 21.812515258789062, "learning_rate": 3.521844660194175e-05, "loss": 0.2999, "step": 3045 }, { "epoch": 29.533734939759036, "grad_norm": 5.355910301208496, "learning_rate": 3.521359223300971e-05, "loss": 0.3695, "step": 3046 }, { "epoch": 29.543373493975903, "grad_norm": 4.296477794647217, "learning_rate": 3.520873786407767e-05, "loss": 0.2925, "step": 3047 }, { "epoch": 29.55301204819277, "grad_norm": 4.322268009185791, "learning_rate": 3.5203883495145636e-05, "loss": 0.2331, "step": 3048 }, { "epoch": 29.56265060240964, "grad_norm": 3.910918712615967, "learning_rate": 3.519902912621359e-05, "loss": 0.2416, "step": 3049 }, { "epoch": 29.572289156626507, "grad_norm": 12.590864181518555, "learning_rate": 3.519417475728156e-05, "loss": 0.1706, "step": 3050 }, { "epoch": 29.581927710843374, "grad_norm": 5.239188194274902, "learning_rate": 3.5189320388349516e-05, "loss": 0.3355, "step": 3051 }, { "epoch": 29.59156626506024, "grad_norm": 5.770255088806152, "learning_rate": 3.518446601941748e-05, "loss": 0.2059, "step": 3052 }, { "epoch": 29.601204819277108, "grad_norm": 6.226093292236328, "learning_rate": 3.517961165048544e-05, "loss": 0.3041, "step": 3053 }, { "epoch": 29.610843373493974, "grad_norm": 4.185781002044678, "learning_rate": 3.51747572815534e-05, "loss": 0.2018, "step": 3054 }, { "epoch": 29.620481927710845, "grad_norm": 15.787603378295898, "learning_rate": 3.516990291262136e-05, "loss": 0.2959, "step": 3055 }, { "epoch": 29.63012048192771, "grad_norm": 2.913583278656006, "learning_rate": 3.5165048543689325e-05, "loss": 0.2227, "step": 3056 }, { "epoch": 29.63975903614458, "grad_norm": 4.654484748840332, "learning_rate": 3.516019417475728e-05, "loss": 0.3236, "step": 3057 }, { "epoch": 29.649397590361446, "grad_norm": 13.831178665161133, "learning_rate": 3.515533980582525e-05, "loss": 0.284, "step": 3058 }, { "epoch": 29.659036144578312, "grad_norm": 4.4015889167785645, "learning_rate": 3.5150485436893205e-05, "loss": 0.4174, "step": 3059 }, { "epoch": 29.66867469879518, "grad_norm": 4.193428039550781, "learning_rate": 3.514563106796116e-05, "loss": 0.2588, "step": 3060 }, { "epoch": 29.67831325301205, "grad_norm": 3.0262949466705322, "learning_rate": 3.514077669902913e-05, "loss": 0.233, "step": 3061 }, { "epoch": 29.687951807228917, "grad_norm": 6.498055934906006, "learning_rate": 3.5135922330097085e-05, "loss": 0.1606, "step": 3062 }, { "epoch": 29.697590361445783, "grad_norm": 2.619117021560669, "learning_rate": 3.513106796116505e-05, "loss": 0.2862, "step": 3063 }, { "epoch": 29.70722891566265, "grad_norm": 4.259716987609863, "learning_rate": 3.512621359223301e-05, "loss": 0.1874, "step": 3064 }, { "epoch": 29.716867469879517, "grad_norm": 4.363185882568359, "learning_rate": 3.512135922330097e-05, "loss": 0.3763, "step": 3065 }, { "epoch": 29.726506024096384, "grad_norm": 2.9745981693267822, "learning_rate": 3.511650485436893e-05, "loss": 0.1933, "step": 3066 }, { "epoch": 29.736144578313255, "grad_norm": 30.2531795501709, "learning_rate": 3.5111650485436894e-05, "loss": 0.2009, "step": 3067 }, { "epoch": 29.74578313253012, "grad_norm": 2.3939709663391113, "learning_rate": 3.510679611650486e-05, "loss": 0.1184, "step": 3068 }, { "epoch": 29.75542168674699, "grad_norm": 3.1329424381256104, "learning_rate": 3.5101941747572816e-05, "loss": 0.2514, "step": 3069 }, { "epoch": 29.765060240963855, "grad_norm": 2.2652578353881836, "learning_rate": 3.509708737864078e-05, "loss": 0.2649, "step": 3070 }, { "epoch": 29.774698795180722, "grad_norm": 4.5164055824279785, "learning_rate": 3.509223300970874e-05, "loss": 0.2243, "step": 3071 }, { "epoch": 29.78433734939759, "grad_norm": 11.183974266052246, "learning_rate": 3.50873786407767e-05, "loss": 0.423, "step": 3072 }, { "epoch": 29.79397590361446, "grad_norm": 4.061408519744873, "learning_rate": 3.508252427184466e-05, "loss": 0.325, "step": 3073 }, { "epoch": 29.803614457831326, "grad_norm": 3.8974554538726807, "learning_rate": 3.5077669902912625e-05, "loss": 0.1689, "step": 3074 }, { "epoch": 29.813253012048193, "grad_norm": 4.0061187744140625, "learning_rate": 3.507281553398058e-05, "loss": 0.3895, "step": 3075 }, { "epoch": 29.82289156626506, "grad_norm": 7.638926029205322, "learning_rate": 3.506796116504855e-05, "loss": 0.2152, "step": 3076 }, { "epoch": 29.832530120481927, "grad_norm": 2.392723560333252, "learning_rate": 3.5063106796116505e-05, "loss": 0.1495, "step": 3077 }, { "epoch": 29.842168674698794, "grad_norm": 5.323252201080322, "learning_rate": 3.505825242718447e-05, "loss": 0.4188, "step": 3078 }, { "epoch": 29.851807228915664, "grad_norm": 4.885797500610352, "learning_rate": 3.505339805825243e-05, "loss": 0.2933, "step": 3079 }, { "epoch": 29.86144578313253, "grad_norm": 1.5112780332565308, "learning_rate": 3.504854368932039e-05, "loss": 0.084, "step": 3080 }, { "epoch": 29.871084337349398, "grad_norm": 3.3351399898529053, "learning_rate": 3.504368932038835e-05, "loss": 0.2523, "step": 3081 }, { "epoch": 29.880722891566265, "grad_norm": 6.343727111816406, "learning_rate": 3.5038834951456315e-05, "loss": 0.2436, "step": 3082 }, { "epoch": 29.89036144578313, "grad_norm": 13.389361381530762, "learning_rate": 3.503398058252427e-05, "loss": 0.4317, "step": 3083 }, { "epoch": 29.9, "grad_norm": 4.041967391967773, "learning_rate": 3.502912621359224e-05, "loss": 0.3819, "step": 3084 }, { "epoch": 29.90963855421687, "grad_norm": 2.9163765907287598, "learning_rate": 3.5024271844660195e-05, "loss": 0.3382, "step": 3085 }, { "epoch": 29.919277108433736, "grad_norm": 5.151396751403809, "learning_rate": 3.501941747572815e-05, "loss": 0.3808, "step": 3086 }, { "epoch": 29.928915662650603, "grad_norm": 24.337705612182617, "learning_rate": 3.501456310679612e-05, "loss": 0.3565, "step": 3087 }, { "epoch": 29.93855421686747, "grad_norm": 16.91181182861328, "learning_rate": 3.5009708737864075e-05, "loss": 0.5018, "step": 3088 }, { "epoch": 29.948192771084337, "grad_norm": 7.189394950866699, "learning_rate": 3.500485436893204e-05, "loss": 0.3671, "step": 3089 }, { "epoch": 29.957831325301203, "grad_norm": 14.820808410644531, "learning_rate": 3.5e-05, "loss": 0.2929, "step": 3090 }, { "epoch": 29.967469879518074, "grad_norm": 3.5662806034088135, "learning_rate": 3.499514563106796e-05, "loss": 0.1302, "step": 3091 }, { "epoch": 29.97710843373494, "grad_norm": 35.59196853637695, "learning_rate": 3.499029126213592e-05, "loss": 0.1964, "step": 3092 }, { "epoch": 29.986746987951808, "grad_norm": 5.880045413970947, "learning_rate": 3.498543689320389e-05, "loss": 0.276, "step": 3093 }, { "epoch": 30.002409638554216, "grad_norm": 2.9678826332092285, "learning_rate": 3.498058252427185e-05, "loss": 0.179, "step": 3094 }, { "epoch": 30.012048192771083, "grad_norm": 7.6417317390441895, "learning_rate": 3.497572815533981e-05, "loss": 0.2062, "step": 3095 }, { "epoch": 30.021686746987953, "grad_norm": 9.328006744384766, "learning_rate": 3.497087378640777e-05, "loss": 0.2786, "step": 3096 }, { "epoch": 30.03132530120482, "grad_norm": 5.362834930419922, "learning_rate": 3.496601941747573e-05, "loss": 0.2711, "step": 3097 }, { "epoch": 30.040963855421687, "grad_norm": 5.2214460372924805, "learning_rate": 3.496116504854369e-05, "loss": 0.2919, "step": 3098 }, { "epoch": 30.050602409638554, "grad_norm": 4.698786735534668, "learning_rate": 3.495631067961165e-05, "loss": 0.3538, "step": 3099 }, { "epoch": 30.06024096385542, "grad_norm": 4.866965293884277, "learning_rate": 3.4951456310679615e-05, "loss": 0.3353, "step": 3100 }, { "epoch": 30.069879518072288, "grad_norm": 5.164997577667236, "learning_rate": 3.494660194174757e-05, "loss": 0.3041, "step": 3101 }, { "epoch": 30.079518072289158, "grad_norm": 5.434213161468506, "learning_rate": 3.494174757281554e-05, "loss": 0.3292, "step": 3102 }, { "epoch": 30.089156626506025, "grad_norm": 2.44954252243042, "learning_rate": 3.4936893203883495e-05, "loss": 0.1316, "step": 3103 }, { "epoch": 30.09879518072289, "grad_norm": 5.38813591003418, "learning_rate": 3.493203883495146e-05, "loss": 0.1907, "step": 3104 }, { "epoch": 30.10843373493976, "grad_norm": 4.351000785827637, "learning_rate": 3.492718446601942e-05, "loss": 0.2854, "step": 3105 }, { "epoch": 30.118072289156625, "grad_norm": 2.9929404258728027, "learning_rate": 3.492233009708738e-05, "loss": 0.3565, "step": 3106 }, { "epoch": 30.127710843373492, "grad_norm": 2.9110331535339355, "learning_rate": 3.491747572815534e-05, "loss": 0.3475, "step": 3107 }, { "epoch": 30.137349397590363, "grad_norm": 3.0739808082580566, "learning_rate": 3.4912621359223304e-05, "loss": 0.1573, "step": 3108 }, { "epoch": 30.14698795180723, "grad_norm": 7.70318603515625, "learning_rate": 3.490776699029126e-05, "loss": 0.326, "step": 3109 }, { "epoch": 30.156626506024097, "grad_norm": 5.487557888031006, "learning_rate": 3.490291262135923e-05, "loss": 0.3104, "step": 3110 }, { "epoch": 30.166265060240963, "grad_norm": 2.1086127758026123, "learning_rate": 3.4898058252427184e-05, "loss": 0.2072, "step": 3111 }, { "epoch": 30.17590361445783, "grad_norm": 4.235467910766602, "learning_rate": 3.489320388349515e-05, "loss": 0.2804, "step": 3112 }, { "epoch": 30.185542168674697, "grad_norm": 5.096614837646484, "learning_rate": 3.488834951456311e-05, "loss": 0.2218, "step": 3113 }, { "epoch": 30.195180722891568, "grad_norm": 6.4741339683532715, "learning_rate": 3.4883495145631064e-05, "loss": 0.2357, "step": 3114 }, { "epoch": 30.204819277108435, "grad_norm": 2.3332369327545166, "learning_rate": 3.487864077669903e-05, "loss": 0.2316, "step": 3115 }, { "epoch": 30.2144578313253, "grad_norm": 3.480605363845825, "learning_rate": 3.487378640776699e-05, "loss": 0.285, "step": 3116 }, { "epoch": 30.22409638554217, "grad_norm": 2.4661872386932373, "learning_rate": 3.486893203883495e-05, "loss": 0.1061, "step": 3117 }, { "epoch": 30.233734939759035, "grad_norm": 3.3928182125091553, "learning_rate": 3.4864077669902916e-05, "loss": 0.3335, "step": 3118 }, { "epoch": 30.243373493975902, "grad_norm": 2.6796321868896484, "learning_rate": 3.485922330097088e-05, "loss": 0.2913, "step": 3119 }, { "epoch": 30.253012048192772, "grad_norm": 2.5184264183044434, "learning_rate": 3.485436893203884e-05, "loss": 0.2718, "step": 3120 }, { "epoch": 30.26265060240964, "grad_norm": 3.1085927486419678, "learning_rate": 3.48495145631068e-05, "loss": 0.2597, "step": 3121 }, { "epoch": 30.272289156626506, "grad_norm": 4.855727672576904, "learning_rate": 3.484466019417476e-05, "loss": 0.285, "step": 3122 }, { "epoch": 30.281927710843373, "grad_norm": 7.0907087326049805, "learning_rate": 3.4839805825242725e-05, "loss": 0.2109, "step": 3123 }, { "epoch": 30.29156626506024, "grad_norm": 3.913987636566162, "learning_rate": 3.483495145631068e-05, "loss": 0.2565, "step": 3124 }, { "epoch": 30.301204819277107, "grad_norm": 7.60823917388916, "learning_rate": 3.483009708737864e-05, "loss": 0.3318, "step": 3125 }, { "epoch": 30.310843373493977, "grad_norm": 7.2436089515686035, "learning_rate": 3.4825242718446605e-05, "loss": 0.1983, "step": 3126 }, { "epoch": 30.320481927710844, "grad_norm": 13.34964656829834, "learning_rate": 3.482038834951456e-05, "loss": 0.4063, "step": 3127 }, { "epoch": 30.33012048192771, "grad_norm": 24.94346809387207, "learning_rate": 3.481553398058253e-05, "loss": 0.252, "step": 3128 }, { "epoch": 30.339759036144578, "grad_norm": 6.8111572265625, "learning_rate": 3.4810679611650485e-05, "loss": 0.3319, "step": 3129 }, { "epoch": 30.349397590361445, "grad_norm": 4.26215934753418, "learning_rate": 3.480582524271845e-05, "loss": 0.3202, "step": 3130 }, { "epoch": 30.35903614457831, "grad_norm": 13.417375564575195, "learning_rate": 3.480097087378641e-05, "loss": 0.2164, "step": 3131 }, { "epoch": 30.368674698795182, "grad_norm": 5.892012119293213, "learning_rate": 3.479611650485437e-05, "loss": 0.3419, "step": 3132 }, { "epoch": 30.37831325301205, "grad_norm": 2.3068854808807373, "learning_rate": 3.479126213592233e-05, "loss": 0.2662, "step": 3133 }, { "epoch": 30.387951807228916, "grad_norm": 5.854962348937988, "learning_rate": 3.4786407766990294e-05, "loss": 0.2081, "step": 3134 }, { "epoch": 30.397590361445783, "grad_norm": 2.001908302307129, "learning_rate": 3.478155339805825e-05, "loss": 0.1575, "step": 3135 }, { "epoch": 30.40722891566265, "grad_norm": 2.643446207046509, "learning_rate": 3.4776699029126216e-05, "loss": 0.1498, "step": 3136 }, { "epoch": 30.416867469879517, "grad_norm": 49.34981918334961, "learning_rate": 3.4771844660194174e-05, "loss": 0.312, "step": 3137 }, { "epoch": 30.426506024096387, "grad_norm": 7.756667137145996, "learning_rate": 3.476699029126214e-05, "loss": 0.1948, "step": 3138 }, { "epoch": 30.436144578313254, "grad_norm": 5.4747185707092285, "learning_rate": 3.4762135922330096e-05, "loss": 0.3387, "step": 3139 }, { "epoch": 30.44578313253012, "grad_norm": 5.157493591308594, "learning_rate": 3.475728155339806e-05, "loss": 0.2809, "step": 3140 }, { "epoch": 30.455421686746988, "grad_norm": 2.631053924560547, "learning_rate": 3.475242718446602e-05, "loss": 0.168, "step": 3141 }, { "epoch": 30.465060240963854, "grad_norm": 4.23048210144043, "learning_rate": 3.4747572815533977e-05, "loss": 0.296, "step": 3142 }, { "epoch": 30.47469879518072, "grad_norm": 2.4879226684570312, "learning_rate": 3.474271844660194e-05, "loss": 0.2799, "step": 3143 }, { "epoch": 30.48433734939759, "grad_norm": 2.5930721759796143, "learning_rate": 3.4737864077669906e-05, "loss": 0.2466, "step": 3144 }, { "epoch": 30.49397590361446, "grad_norm": 3.7813074588775635, "learning_rate": 3.473300970873787e-05, "loss": 0.3075, "step": 3145 }, { "epoch": 30.503614457831326, "grad_norm": 4.6919097900390625, "learning_rate": 3.472815533980583e-05, "loss": 0.2981, "step": 3146 }, { "epoch": 30.513253012048192, "grad_norm": 15.185790061950684, "learning_rate": 3.472330097087379e-05, "loss": 0.3912, "step": 3147 }, { "epoch": 30.52289156626506, "grad_norm": 5.319023132324219, "learning_rate": 3.471844660194175e-05, "loss": 0.1771, "step": 3148 }, { "epoch": 30.532530120481926, "grad_norm": 6.499231815338135, "learning_rate": 3.4713592233009715e-05, "loss": 0.3202, "step": 3149 }, { "epoch": 30.542168674698797, "grad_norm": 3.474330186843872, "learning_rate": 3.470873786407767e-05, "loss": 0.2019, "step": 3150 }, { "epoch": 30.551807228915663, "grad_norm": 8.925653457641602, "learning_rate": 3.470388349514564e-05, "loss": 0.2197, "step": 3151 }, { "epoch": 30.56144578313253, "grad_norm": 7.053655624389648, "learning_rate": 3.4699029126213595e-05, "loss": 0.2314, "step": 3152 }, { "epoch": 30.571084337349397, "grad_norm": 3.9207441806793213, "learning_rate": 3.469417475728155e-05, "loss": 0.2781, "step": 3153 }, { "epoch": 30.580722891566264, "grad_norm": 3.0672106742858887, "learning_rate": 3.468932038834952e-05, "loss": 0.2286, "step": 3154 }, { "epoch": 30.59036144578313, "grad_norm": 3.8442697525024414, "learning_rate": 3.4684466019417475e-05, "loss": 0.2029, "step": 3155 }, { "epoch": 30.6, "grad_norm": 4.269445896148682, "learning_rate": 3.467961165048544e-05, "loss": 0.1145, "step": 3156 }, { "epoch": 30.60963855421687, "grad_norm": 5.578938007354736, "learning_rate": 3.46747572815534e-05, "loss": 0.2534, "step": 3157 }, { "epoch": 30.619277108433735, "grad_norm": 6.253801345825195, "learning_rate": 3.466990291262136e-05, "loss": 0.4023, "step": 3158 }, { "epoch": 30.628915662650602, "grad_norm": 3.105879068374634, "learning_rate": 3.466504854368932e-05, "loss": 0.3842, "step": 3159 }, { "epoch": 30.63855421686747, "grad_norm": 5.124316692352295, "learning_rate": 3.4660194174757284e-05, "loss": 0.5578, "step": 3160 }, { "epoch": 30.648192771084336, "grad_norm": 10.084878921508789, "learning_rate": 3.465533980582524e-05, "loss": 0.4597, "step": 3161 }, { "epoch": 30.657831325301206, "grad_norm": 9.32447338104248, "learning_rate": 3.4650485436893206e-05, "loss": 0.3527, "step": 3162 }, { "epoch": 30.667469879518073, "grad_norm": 4.1917619705200195, "learning_rate": 3.4645631067961164e-05, "loss": 0.3848, "step": 3163 }, { "epoch": 30.67710843373494, "grad_norm": 3.411648750305176, "learning_rate": 3.464077669902913e-05, "loss": 0.2879, "step": 3164 }, { "epoch": 30.686746987951807, "grad_norm": 2.3433420658111572, "learning_rate": 3.4635922330097086e-05, "loss": 0.2303, "step": 3165 }, { "epoch": 30.696385542168674, "grad_norm": 22.82244110107422, "learning_rate": 3.463106796116505e-05, "loss": 0.6131, "step": 3166 }, { "epoch": 30.70602409638554, "grad_norm": 1.1248559951782227, "learning_rate": 3.462621359223301e-05, "loss": 0.0598, "step": 3167 }, { "epoch": 30.71566265060241, "grad_norm": 4.8537092208862305, "learning_rate": 3.462135922330097e-05, "loss": 0.3958, "step": 3168 }, { "epoch": 30.725301204819278, "grad_norm": 6.844879627227783, "learning_rate": 3.461650485436894e-05, "loss": 0.3023, "step": 3169 }, { "epoch": 30.734939759036145, "grad_norm": 5.21240758895874, "learning_rate": 3.4611650485436895e-05, "loss": 0.2402, "step": 3170 }, { "epoch": 30.74457831325301, "grad_norm": 4.154244899749756, "learning_rate": 3.460679611650486e-05, "loss": 0.2193, "step": 3171 }, { "epoch": 30.75421686746988, "grad_norm": 7.423879146575928, "learning_rate": 3.460194174757282e-05, "loss": 0.3486, "step": 3172 }, { "epoch": 30.763855421686745, "grad_norm": 6.559839725494385, "learning_rate": 3.459708737864078e-05, "loss": 0.3027, "step": 3173 }, { "epoch": 30.773493975903616, "grad_norm": 2.1546459197998047, "learning_rate": 3.459223300970874e-05, "loss": 0.2116, "step": 3174 }, { "epoch": 30.783132530120483, "grad_norm": 3.6451170444488525, "learning_rate": 3.4587378640776704e-05, "loss": 0.2406, "step": 3175 }, { "epoch": 30.79277108433735, "grad_norm": 1.936198115348816, "learning_rate": 3.458252427184466e-05, "loss": 0.0754, "step": 3176 }, { "epoch": 30.802409638554217, "grad_norm": 1.5832247734069824, "learning_rate": 3.457766990291263e-05, "loss": 0.1007, "step": 3177 }, { "epoch": 30.812048192771083, "grad_norm": 15.301730155944824, "learning_rate": 3.4572815533980585e-05, "loss": 0.1382, "step": 3178 }, { "epoch": 30.82168674698795, "grad_norm": 18.551916122436523, "learning_rate": 3.456796116504855e-05, "loss": 0.2337, "step": 3179 }, { "epoch": 30.83132530120482, "grad_norm": 2.5231025218963623, "learning_rate": 3.456310679611651e-05, "loss": 0.1923, "step": 3180 }, { "epoch": 30.840963855421688, "grad_norm": 3.854816198348999, "learning_rate": 3.4558252427184465e-05, "loss": 0.3646, "step": 3181 }, { "epoch": 30.850602409638554, "grad_norm": 5.003353118896484, "learning_rate": 3.455339805825243e-05, "loss": 0.4958, "step": 3182 }, { "epoch": 30.86024096385542, "grad_norm": 5.326602935791016, "learning_rate": 3.454854368932039e-05, "loss": 0.2218, "step": 3183 }, { "epoch": 30.86987951807229, "grad_norm": 3.0940024852752686, "learning_rate": 3.454368932038835e-05, "loss": 0.2064, "step": 3184 }, { "epoch": 30.879518072289155, "grad_norm": 11.293947219848633, "learning_rate": 3.453883495145631e-05, "loss": 0.2597, "step": 3185 }, { "epoch": 30.889156626506026, "grad_norm": 6.241562366485596, "learning_rate": 3.4533980582524274e-05, "loss": 0.3278, "step": 3186 }, { "epoch": 30.898795180722892, "grad_norm": 2.9462504386901855, "learning_rate": 3.452912621359223e-05, "loss": 0.1945, "step": 3187 }, { "epoch": 30.90843373493976, "grad_norm": 4.3767266273498535, "learning_rate": 3.4524271844660196e-05, "loss": 0.3627, "step": 3188 }, { "epoch": 30.918072289156626, "grad_norm": 4.177987575531006, "learning_rate": 3.4519417475728154e-05, "loss": 0.254, "step": 3189 }, { "epoch": 30.927710843373493, "grad_norm": 3.5814690589904785, "learning_rate": 3.451456310679612e-05, "loss": 0.3455, "step": 3190 }, { "epoch": 30.93734939759036, "grad_norm": 3.0170156955718994, "learning_rate": 3.4509708737864076e-05, "loss": 0.2848, "step": 3191 }, { "epoch": 30.94698795180723, "grad_norm": 1.6720775365829468, "learning_rate": 3.450485436893204e-05, "loss": 0.1983, "step": 3192 }, { "epoch": 30.956626506024097, "grad_norm": 6.458762168884277, "learning_rate": 3.45e-05, "loss": 0.3501, "step": 3193 }, { "epoch": 30.966265060240964, "grad_norm": 4.084224224090576, "learning_rate": 3.449514563106796e-05, "loss": 0.1611, "step": 3194 }, { "epoch": 30.97590361445783, "grad_norm": 5.171408653259277, "learning_rate": 3.449029126213593e-05, "loss": 0.336, "step": 3195 }, { "epoch": 30.985542168674698, "grad_norm": 2.875931739807129, "learning_rate": 3.4485436893203885e-05, "loss": 0.1336, "step": 3196 }, { "epoch": 31.00120481927711, "grad_norm": 7.21079158782959, "learning_rate": 3.448058252427185e-05, "loss": 0.2833, "step": 3197 }, { "epoch": 31.010843373493977, "grad_norm": 4.8602614402771, "learning_rate": 3.447572815533981e-05, "loss": 0.3166, "step": 3198 }, { "epoch": 31.020481927710843, "grad_norm": 5.553462028503418, "learning_rate": 3.447087378640777e-05, "loss": 0.2599, "step": 3199 }, { "epoch": 31.03012048192771, "grad_norm": 4.448892116546631, "learning_rate": 3.446601941747573e-05, "loss": 0.325, "step": 3200 }, { "epoch": 31.039759036144577, "grad_norm": 4.577796459197998, "learning_rate": 3.4461165048543694e-05, "loss": 0.3047, "step": 3201 }, { "epoch": 31.049397590361444, "grad_norm": 6.014092922210693, "learning_rate": 3.445631067961165e-05, "loss": 0.2206, "step": 3202 }, { "epoch": 31.059036144578315, "grad_norm": 5.780372619628906, "learning_rate": 3.4451456310679617e-05, "loss": 0.3432, "step": 3203 }, { "epoch": 31.06867469879518, "grad_norm": 5.005234718322754, "learning_rate": 3.4446601941747574e-05, "loss": 0.2051, "step": 3204 }, { "epoch": 31.07831325301205, "grad_norm": 1.3403631448745728, "learning_rate": 3.444174757281554e-05, "loss": 0.0772, "step": 3205 }, { "epoch": 31.087951807228915, "grad_norm": 11.002201080322266, "learning_rate": 3.44368932038835e-05, "loss": 0.2401, "step": 3206 }, { "epoch": 31.097590361445782, "grad_norm": 4.824349403381348, "learning_rate": 3.443203883495146e-05, "loss": 0.3182, "step": 3207 }, { "epoch": 31.10722891566265, "grad_norm": 6.713106155395508, "learning_rate": 3.442718446601942e-05, "loss": 0.2498, "step": 3208 }, { "epoch": 31.11686746987952, "grad_norm": 6.803249359130859, "learning_rate": 3.442233009708738e-05, "loss": 0.2374, "step": 3209 }, { "epoch": 31.126506024096386, "grad_norm": 2.4888100624084473, "learning_rate": 3.441747572815534e-05, "loss": 0.2442, "step": 3210 }, { "epoch": 31.136144578313253, "grad_norm": 15.127033233642578, "learning_rate": 3.44126213592233e-05, "loss": 0.3077, "step": 3211 }, { "epoch": 31.14578313253012, "grad_norm": 9.16222095489502, "learning_rate": 3.4407766990291263e-05, "loss": 0.2911, "step": 3212 }, { "epoch": 31.155421686746987, "grad_norm": 2.512866258621216, "learning_rate": 3.440291262135922e-05, "loss": 0.2462, "step": 3213 }, { "epoch": 31.165060240963854, "grad_norm": 2.7915189266204834, "learning_rate": 3.4398058252427186e-05, "loss": 0.2695, "step": 3214 }, { "epoch": 31.174698795180724, "grad_norm": 4.772029399871826, "learning_rate": 3.4393203883495144e-05, "loss": 0.2331, "step": 3215 }, { "epoch": 31.18433734939759, "grad_norm": 1.617180585861206, "learning_rate": 3.438834951456311e-05, "loss": 0.1421, "step": 3216 }, { "epoch": 31.193975903614458, "grad_norm": 6.060153007507324, "learning_rate": 3.4383495145631066e-05, "loss": 0.3134, "step": 3217 }, { "epoch": 31.203614457831325, "grad_norm": 6.73675012588501, "learning_rate": 3.437864077669903e-05, "loss": 0.222, "step": 3218 }, { "epoch": 31.21325301204819, "grad_norm": 6.970015048980713, "learning_rate": 3.4373786407766995e-05, "loss": 0.1755, "step": 3219 }, { "epoch": 31.22289156626506, "grad_norm": 3.0402259826660156, "learning_rate": 3.436893203883495e-05, "loss": 0.2622, "step": 3220 }, { "epoch": 31.23253012048193, "grad_norm": 6.159809112548828, "learning_rate": 3.436407766990292e-05, "loss": 0.3652, "step": 3221 }, { "epoch": 31.242168674698796, "grad_norm": 6.886103630065918, "learning_rate": 3.4359223300970875e-05, "loss": 0.3129, "step": 3222 }, { "epoch": 31.251807228915663, "grad_norm": 12.678312301635742, "learning_rate": 3.435436893203884e-05, "loss": 0.1623, "step": 3223 }, { "epoch": 31.26144578313253, "grad_norm": 4.917787075042725, "learning_rate": 3.43495145631068e-05, "loss": 0.2654, "step": 3224 }, { "epoch": 31.271084337349397, "grad_norm": 10.436903953552246, "learning_rate": 3.434466019417476e-05, "loss": 0.2481, "step": 3225 }, { "epoch": 31.280722891566263, "grad_norm": 3.3223400115966797, "learning_rate": 3.433980582524272e-05, "loss": 0.2086, "step": 3226 }, { "epoch": 31.290361445783134, "grad_norm": 2.4139859676361084, "learning_rate": 3.4334951456310684e-05, "loss": 0.2816, "step": 3227 }, { "epoch": 31.3, "grad_norm": 1.9977940320968628, "learning_rate": 3.433009708737864e-05, "loss": 0.1704, "step": 3228 }, { "epoch": 31.309638554216868, "grad_norm": 9.0844144821167, "learning_rate": 3.4325242718446606e-05, "loss": 0.1681, "step": 3229 }, { "epoch": 31.319277108433734, "grad_norm": 3.679206132888794, "learning_rate": 3.4320388349514564e-05, "loss": 0.2678, "step": 3230 }, { "epoch": 31.3289156626506, "grad_norm": 8.522587776184082, "learning_rate": 3.431553398058253e-05, "loss": 0.5678, "step": 3231 }, { "epoch": 31.33855421686747, "grad_norm": 5.23179817199707, "learning_rate": 3.4310679611650486e-05, "loss": 0.1455, "step": 3232 }, { "epoch": 31.34819277108434, "grad_norm": 10.858872413635254, "learning_rate": 3.430582524271845e-05, "loss": 0.2779, "step": 3233 }, { "epoch": 31.357831325301206, "grad_norm": 2.6265087127685547, "learning_rate": 3.430097087378641e-05, "loss": 0.2043, "step": 3234 }, { "epoch": 31.367469879518072, "grad_norm": 7.686380863189697, "learning_rate": 3.4296116504854366e-05, "loss": 0.7394, "step": 3235 }, { "epoch": 31.37710843373494, "grad_norm": 2.8018364906311035, "learning_rate": 3.429126213592233e-05, "loss": 0.1873, "step": 3236 }, { "epoch": 31.386746987951806, "grad_norm": 8.149333953857422, "learning_rate": 3.428640776699029e-05, "loss": 0.3629, "step": 3237 }, { "epoch": 31.396385542168673, "grad_norm": 7.92181396484375, "learning_rate": 3.428155339805825e-05, "loss": 0.3893, "step": 3238 }, { "epoch": 31.406024096385543, "grad_norm": 3.6965420246124268, "learning_rate": 3.427669902912621e-05, "loss": 0.2135, "step": 3239 }, { "epoch": 31.41566265060241, "grad_norm": 5.679554462432861, "learning_rate": 3.4271844660194176e-05, "loss": 0.1593, "step": 3240 }, { "epoch": 31.425301204819277, "grad_norm": 2.7603816986083984, "learning_rate": 3.426699029126213e-05, "loss": 0.1597, "step": 3241 }, { "epoch": 31.434939759036144, "grad_norm": 3.9177768230438232, "learning_rate": 3.42621359223301e-05, "loss": 0.1747, "step": 3242 }, { "epoch": 31.44457831325301, "grad_norm": 3.7225890159606934, "learning_rate": 3.4257281553398056e-05, "loss": 0.2409, "step": 3243 }, { "epoch": 31.454216867469878, "grad_norm": 6.647243022918701, "learning_rate": 3.425242718446603e-05, "loss": 0.346, "step": 3244 }, { "epoch": 31.46385542168675, "grad_norm": 2.6682193279266357, "learning_rate": 3.4247572815533985e-05, "loss": 0.1459, "step": 3245 }, { "epoch": 31.473493975903615, "grad_norm": 6.538060665130615, "learning_rate": 3.424271844660194e-05, "loss": 0.4476, "step": 3246 }, { "epoch": 31.483132530120482, "grad_norm": 5.892810821533203, "learning_rate": 3.423786407766991e-05, "loss": 0.3431, "step": 3247 }, { "epoch": 31.49277108433735, "grad_norm": 9.080096244812012, "learning_rate": 3.4233009708737865e-05, "loss": 0.401, "step": 3248 }, { "epoch": 31.502409638554216, "grad_norm": 4.235206127166748, "learning_rate": 3.422815533980583e-05, "loss": 0.2171, "step": 3249 }, { "epoch": 31.512048192771083, "grad_norm": 2.282806634902954, "learning_rate": 3.422330097087379e-05, "loss": 0.1822, "step": 3250 }, { "epoch": 31.521686746987953, "grad_norm": 4.866219997406006, "learning_rate": 3.421844660194175e-05, "loss": 0.2875, "step": 3251 }, { "epoch": 31.53132530120482, "grad_norm": 3.027822494506836, "learning_rate": 3.421359223300971e-05, "loss": 0.3277, "step": 3252 }, { "epoch": 31.540963855421687, "grad_norm": 4.315542221069336, "learning_rate": 3.4208737864077674e-05, "loss": 0.1715, "step": 3253 }, { "epoch": 31.550602409638554, "grad_norm": 5.7876200675964355, "learning_rate": 3.420388349514563e-05, "loss": 0.4059, "step": 3254 }, { "epoch": 31.56024096385542, "grad_norm": 4.522871017456055, "learning_rate": 3.4199029126213596e-05, "loss": 0.2121, "step": 3255 }, { "epoch": 31.569879518072288, "grad_norm": 5.837906837463379, "learning_rate": 3.4194174757281554e-05, "loss": 0.2897, "step": 3256 }, { "epoch": 31.579518072289158, "grad_norm": 3.8675382137298584, "learning_rate": 3.418932038834952e-05, "loss": 0.2637, "step": 3257 }, { "epoch": 31.589156626506025, "grad_norm": 4.585886001586914, "learning_rate": 3.4184466019417476e-05, "loss": 0.238, "step": 3258 }, { "epoch": 31.59879518072289, "grad_norm": 2.552978277206421, "learning_rate": 3.417961165048544e-05, "loss": 0.2483, "step": 3259 }, { "epoch": 31.60843373493976, "grad_norm": 3.703484535217285, "learning_rate": 3.41747572815534e-05, "loss": 0.2463, "step": 3260 }, { "epoch": 31.618072289156625, "grad_norm": 2.244563102722168, "learning_rate": 3.416990291262136e-05, "loss": 0.0888, "step": 3261 }, { "epoch": 31.627710843373492, "grad_norm": 8.383594512939453, "learning_rate": 3.416504854368932e-05, "loss": 0.4853, "step": 3262 }, { "epoch": 31.637349397590363, "grad_norm": 4.7502336502075195, "learning_rate": 3.416019417475728e-05, "loss": 0.1489, "step": 3263 }, { "epoch": 31.64698795180723, "grad_norm": 6.622154712677002, "learning_rate": 3.415533980582524e-05, "loss": 0.2636, "step": 3264 }, { "epoch": 31.656626506024097, "grad_norm": 5.332326889038086, "learning_rate": 3.41504854368932e-05, "loss": 0.4006, "step": 3265 }, { "epoch": 31.666265060240963, "grad_norm": 3.727595806121826, "learning_rate": 3.4145631067961165e-05, "loss": 0.4229, "step": 3266 }, { "epoch": 31.67590361445783, "grad_norm": 3.6350417137145996, "learning_rate": 3.414077669902912e-05, "loss": 0.24, "step": 3267 }, { "epoch": 31.685542168674697, "grad_norm": 3.834932327270508, "learning_rate": 3.413592233009709e-05, "loss": 0.3203, "step": 3268 }, { "epoch": 31.695180722891568, "grad_norm": 4.728969573974609, "learning_rate": 3.4131067961165045e-05, "loss": 0.376, "step": 3269 }, { "epoch": 31.704819277108435, "grad_norm": 3.6188879013061523, "learning_rate": 3.412621359223302e-05, "loss": 0.1627, "step": 3270 }, { "epoch": 31.7144578313253, "grad_norm": 10.705889701843262, "learning_rate": 3.4121359223300974e-05, "loss": 0.4088, "step": 3271 }, { "epoch": 31.72409638554217, "grad_norm": 3.225133180618286, "learning_rate": 3.411650485436894e-05, "loss": 0.3305, "step": 3272 }, { "epoch": 31.733734939759035, "grad_norm": 3.766146659851074, "learning_rate": 3.41116504854369e-05, "loss": 0.3017, "step": 3273 }, { "epoch": 31.743373493975902, "grad_norm": 3.6336371898651123, "learning_rate": 3.4106796116504855e-05, "loss": 0.3819, "step": 3274 }, { "epoch": 31.753012048192772, "grad_norm": 7.945107460021973, "learning_rate": 3.410194174757282e-05, "loss": 0.2494, "step": 3275 }, { "epoch": 31.76265060240964, "grad_norm": 4.741551399230957, "learning_rate": 3.409708737864078e-05, "loss": 0.2511, "step": 3276 }, { "epoch": 31.772289156626506, "grad_norm": 13.18803882598877, "learning_rate": 3.409223300970874e-05, "loss": 0.2481, "step": 3277 }, { "epoch": 31.781927710843373, "grad_norm": 5.990051746368408, "learning_rate": 3.40873786407767e-05, "loss": 0.249, "step": 3278 }, { "epoch": 31.79156626506024, "grad_norm": 3.69091796875, "learning_rate": 3.4082524271844664e-05, "loss": 0.2062, "step": 3279 }, { "epoch": 31.801204819277107, "grad_norm": 7.106801509857178, "learning_rate": 3.407766990291262e-05, "loss": 0.3155, "step": 3280 }, { "epoch": 31.810843373493977, "grad_norm": 9.909941673278809, "learning_rate": 3.4072815533980586e-05, "loss": 0.2367, "step": 3281 }, { "epoch": 31.820481927710844, "grad_norm": 20.699804306030273, "learning_rate": 3.4067961165048544e-05, "loss": 0.1599, "step": 3282 }, { "epoch": 31.83012048192771, "grad_norm": 5.54306697845459, "learning_rate": 3.406310679611651e-05, "loss": 0.3568, "step": 3283 }, { "epoch": 31.839759036144578, "grad_norm": 8.235846519470215, "learning_rate": 3.4058252427184466e-05, "loss": 0.4811, "step": 3284 }, { "epoch": 31.849397590361445, "grad_norm": 3.0733206272125244, "learning_rate": 3.405339805825243e-05, "loss": 0.1742, "step": 3285 }, { "epoch": 31.85903614457831, "grad_norm": 5.369982719421387, "learning_rate": 3.404854368932039e-05, "loss": 0.3752, "step": 3286 }, { "epoch": 31.868674698795182, "grad_norm": 3.3957431316375732, "learning_rate": 3.404368932038835e-05, "loss": 0.1304, "step": 3287 }, { "epoch": 31.87831325301205, "grad_norm": 2.657822370529175, "learning_rate": 3.403883495145631e-05, "loss": 0.1498, "step": 3288 }, { "epoch": 31.887951807228916, "grad_norm": 2.42486834526062, "learning_rate": 3.4033980582524275e-05, "loss": 0.2375, "step": 3289 }, { "epoch": 31.897590361445783, "grad_norm": 6.996711730957031, "learning_rate": 3.402912621359223e-05, "loss": 0.4586, "step": 3290 }, { "epoch": 31.90722891566265, "grad_norm": 20.163663864135742, "learning_rate": 3.402427184466019e-05, "loss": 0.2264, "step": 3291 }, { "epoch": 31.916867469879517, "grad_norm": 15.009639739990234, "learning_rate": 3.4019417475728155e-05, "loss": 0.4814, "step": 3292 }, { "epoch": 31.926506024096387, "grad_norm": 3.550222873687744, "learning_rate": 3.401456310679611e-05, "loss": 0.1256, "step": 3293 }, { "epoch": 31.936144578313254, "grad_norm": 4.591677665710449, "learning_rate": 3.400970873786408e-05, "loss": 0.3227, "step": 3294 }, { "epoch": 31.94578313253012, "grad_norm": 5.649020671844482, "learning_rate": 3.400485436893204e-05, "loss": 0.2148, "step": 3295 }, { "epoch": 31.955421686746988, "grad_norm": 6.785184383392334, "learning_rate": 3.4000000000000007e-05, "loss": 0.1297, "step": 3296 }, { "epoch": 31.965060240963854, "grad_norm": 6.691347122192383, "learning_rate": 3.3995145631067964e-05, "loss": 0.3093, "step": 3297 }, { "epoch": 31.97469879518072, "grad_norm": 5.50929069519043, "learning_rate": 3.399029126213593e-05, "loss": 0.3502, "step": 3298 }, { "epoch": 31.98433734939759, "grad_norm": 2.8613998889923096, "learning_rate": 3.3985436893203887e-05, "loss": 0.3509, "step": 3299 }, { "epoch": 31.99397590361446, "grad_norm": 4.433318138122559, "learning_rate": 3.398058252427185e-05, "loss": 0.1798, "step": 3300 }, { "epoch": 32.00963855421687, "grad_norm": 4.118446350097656, "learning_rate": 3.397572815533981e-05, "loss": 0.1714, "step": 3301 }, { "epoch": 32.019277108433734, "grad_norm": 3.1939427852630615, "learning_rate": 3.3970873786407767e-05, "loss": 0.1142, "step": 3302 }, { "epoch": 32.028915662650604, "grad_norm": 7.744816780090332, "learning_rate": 3.396601941747573e-05, "loss": 0.3749, "step": 3303 }, { "epoch": 32.03855421686747, "grad_norm": 7.168838977813721, "learning_rate": 3.396116504854369e-05, "loss": 0.2536, "step": 3304 }, { "epoch": 32.04819277108434, "grad_norm": 2.267075777053833, "learning_rate": 3.3956310679611653e-05, "loss": 0.2129, "step": 3305 }, { "epoch": 32.0578313253012, "grad_norm": 5.282995223999023, "learning_rate": 3.395145631067961e-05, "loss": 0.2471, "step": 3306 }, { "epoch": 32.06746987951807, "grad_norm": 3.5782086849212646, "learning_rate": 3.3946601941747576e-05, "loss": 0.2452, "step": 3307 }, { "epoch": 32.07710843373494, "grad_norm": 2.7037062644958496, "learning_rate": 3.3941747572815533e-05, "loss": 0.1701, "step": 3308 }, { "epoch": 32.086746987951805, "grad_norm": 2.968053102493286, "learning_rate": 3.39368932038835e-05, "loss": 0.2582, "step": 3309 }, { "epoch": 32.096385542168676, "grad_norm": 5.823120594024658, "learning_rate": 3.3932038834951456e-05, "loss": 0.3151, "step": 3310 }, { "epoch": 32.10602409638554, "grad_norm": 5.191715717315674, "learning_rate": 3.392718446601942e-05, "loss": 0.1977, "step": 3311 }, { "epoch": 32.11566265060241, "grad_norm": 5.194666385650635, "learning_rate": 3.392233009708738e-05, "loss": 0.2336, "step": 3312 }, { "epoch": 32.12530120481928, "grad_norm": 8.38235855102539, "learning_rate": 3.391747572815534e-05, "loss": 0.4448, "step": 3313 }, { "epoch": 32.13493975903614, "grad_norm": 4.020598888397217, "learning_rate": 3.39126213592233e-05, "loss": 0.1954, "step": 3314 }, { "epoch": 32.144578313253014, "grad_norm": 5.4902024269104, "learning_rate": 3.3907766990291265e-05, "loss": 0.245, "step": 3315 }, { "epoch": 32.15421686746988, "grad_norm": 8.463093757629395, "learning_rate": 3.390291262135922e-05, "loss": 0.2193, "step": 3316 }, { "epoch": 32.16385542168675, "grad_norm": 4.008128643035889, "learning_rate": 3.389805825242719e-05, "loss": 0.2121, "step": 3317 }, { "epoch": 32.17349397590361, "grad_norm": 6.163240909576416, "learning_rate": 3.3893203883495145e-05, "loss": 0.2576, "step": 3318 }, { "epoch": 32.18313253012048, "grad_norm": 4.105266571044922, "learning_rate": 3.38883495145631e-05, "loss": 0.1993, "step": 3319 }, { "epoch": 32.19277108433735, "grad_norm": 4.399973392486572, "learning_rate": 3.3883495145631074e-05, "loss": 0.3429, "step": 3320 }, { "epoch": 32.202409638554215, "grad_norm": 3.8668212890625, "learning_rate": 3.387864077669903e-05, "loss": 0.2504, "step": 3321 }, { "epoch": 32.212048192771086, "grad_norm": 4.486428737640381, "learning_rate": 3.3873786407766996e-05, "loss": 0.3293, "step": 3322 }, { "epoch": 32.22168674698795, "grad_norm": 3.877149820327759, "learning_rate": 3.3868932038834954e-05, "loss": 0.1658, "step": 3323 }, { "epoch": 32.23132530120482, "grad_norm": 4.438838481903076, "learning_rate": 3.386407766990292e-05, "loss": 0.2989, "step": 3324 }, { "epoch": 32.24096385542169, "grad_norm": 11.445074081420898, "learning_rate": 3.3859223300970876e-05, "loss": 0.231, "step": 3325 }, { "epoch": 32.25060240963855, "grad_norm": 7.77293586730957, "learning_rate": 3.385436893203884e-05, "loss": 0.3623, "step": 3326 }, { "epoch": 32.26024096385542, "grad_norm": 12.9007568359375, "learning_rate": 3.38495145631068e-05, "loss": 0.4055, "step": 3327 }, { "epoch": 32.26987951807229, "grad_norm": 9.43838882446289, "learning_rate": 3.384466019417476e-05, "loss": 0.4321, "step": 3328 }, { "epoch": 32.27951807228916, "grad_norm": 2.541106700897217, "learning_rate": 3.383980582524272e-05, "loss": 0.1396, "step": 3329 }, { "epoch": 32.28915662650602, "grad_norm": 4.881346702575684, "learning_rate": 3.383495145631068e-05, "loss": 0.1501, "step": 3330 }, { "epoch": 32.29879518072289, "grad_norm": 1.8614845275878906, "learning_rate": 3.383009708737864e-05, "loss": 0.2144, "step": 3331 }, { "epoch": 32.30843373493976, "grad_norm": 4.168946743011475, "learning_rate": 3.38252427184466e-05, "loss": 0.2573, "step": 3332 }, { "epoch": 32.318072289156625, "grad_norm": 2.6870932579040527, "learning_rate": 3.3820388349514566e-05, "loss": 0.1549, "step": 3333 }, { "epoch": 32.327710843373495, "grad_norm": 13.04495906829834, "learning_rate": 3.381553398058252e-05, "loss": 0.2954, "step": 3334 }, { "epoch": 32.33734939759036, "grad_norm": 6.171628952026367, "learning_rate": 3.381067961165049e-05, "loss": 0.197, "step": 3335 }, { "epoch": 32.34698795180723, "grad_norm": 2.9086203575134277, "learning_rate": 3.3805825242718446e-05, "loss": 0.2118, "step": 3336 }, { "epoch": 32.3566265060241, "grad_norm": 2.92903208732605, "learning_rate": 3.380097087378641e-05, "loss": 0.2903, "step": 3337 }, { "epoch": 32.36626506024096, "grad_norm": 4.080216407775879, "learning_rate": 3.379611650485437e-05, "loss": 0.32, "step": 3338 }, { "epoch": 32.37590361445783, "grad_norm": 7.072318077087402, "learning_rate": 3.379126213592233e-05, "loss": 0.4073, "step": 3339 }, { "epoch": 32.3855421686747, "grad_norm": 4.436951160430908, "learning_rate": 3.378640776699029e-05, "loss": 0.2313, "step": 3340 }, { "epoch": 32.39518072289157, "grad_norm": 4.853715419769287, "learning_rate": 3.3781553398058255e-05, "loss": 0.1763, "step": 3341 }, { "epoch": 32.40481927710843, "grad_norm": 6.963066577911377, "learning_rate": 3.377669902912621e-05, "loss": 0.3592, "step": 3342 }, { "epoch": 32.4144578313253, "grad_norm": 6.888186454772949, "learning_rate": 3.377184466019418e-05, "loss": 0.2755, "step": 3343 }, { "epoch": 32.42409638554217, "grad_norm": 1.1834393739700317, "learning_rate": 3.3766990291262135e-05, "loss": 0.0826, "step": 3344 }, { "epoch": 32.433734939759034, "grad_norm": 4.7010416984558105, "learning_rate": 3.37621359223301e-05, "loss": 0.1428, "step": 3345 }, { "epoch": 32.443373493975905, "grad_norm": 3.336900234222412, "learning_rate": 3.3757281553398064e-05, "loss": 0.2925, "step": 3346 }, { "epoch": 32.45301204819277, "grad_norm": 6.353626251220703, "learning_rate": 3.375242718446602e-05, "loss": 0.3866, "step": 3347 }, { "epoch": 32.46265060240964, "grad_norm": 6.9259934425354, "learning_rate": 3.3747572815533986e-05, "loss": 0.66, "step": 3348 }, { "epoch": 32.47228915662651, "grad_norm": 5.700876235961914, "learning_rate": 3.3742718446601944e-05, "loss": 0.2819, "step": 3349 }, { "epoch": 32.48192771084337, "grad_norm": 5.5847930908203125, "learning_rate": 3.373786407766991e-05, "loss": 0.2921, "step": 3350 }, { "epoch": 32.49156626506024, "grad_norm": 7.318751811981201, "learning_rate": 3.3733009708737866e-05, "loss": 0.2894, "step": 3351 }, { "epoch": 32.501204819277106, "grad_norm": 4.49857759475708, "learning_rate": 3.372815533980583e-05, "loss": 0.1686, "step": 3352 }, { "epoch": 32.51084337349398, "grad_norm": 1.9531809091567993, "learning_rate": 3.372330097087379e-05, "loss": 0.2125, "step": 3353 }, { "epoch": 32.52048192771084, "grad_norm": 5.929538249969482, "learning_rate": 3.371844660194175e-05, "loss": 0.2979, "step": 3354 }, { "epoch": 32.53012048192771, "grad_norm": 3.163273811340332, "learning_rate": 3.371359223300971e-05, "loss": 0.1533, "step": 3355 }, { "epoch": 32.53975903614458, "grad_norm": 6.7033514976501465, "learning_rate": 3.3708737864077675e-05, "loss": 0.2899, "step": 3356 }, { "epoch": 32.549397590361444, "grad_norm": 5.580545425415039, "learning_rate": 3.370388349514563e-05, "loss": 0.1737, "step": 3357 }, { "epoch": 32.559036144578315, "grad_norm": 3.745936393737793, "learning_rate": 3.369902912621359e-05, "loss": 0.3061, "step": 3358 }, { "epoch": 32.56867469879518, "grad_norm": 7.14634895324707, "learning_rate": 3.3694174757281555e-05, "loss": 0.2277, "step": 3359 }, { "epoch": 32.57831325301205, "grad_norm": 5.619051933288574, "learning_rate": 3.368932038834951e-05, "loss": 0.2915, "step": 3360 }, { "epoch": 32.58795180722892, "grad_norm": 7.406181335449219, "learning_rate": 3.368446601941748e-05, "loss": 0.3112, "step": 3361 }, { "epoch": 32.59759036144578, "grad_norm": 6.699826240539551, "learning_rate": 3.3679611650485435e-05, "loss": 0.1799, "step": 3362 }, { "epoch": 32.60722891566265, "grad_norm": 4.125115871429443, "learning_rate": 3.36747572815534e-05, "loss": 0.1485, "step": 3363 }, { "epoch": 32.616867469879516, "grad_norm": 5.66654634475708, "learning_rate": 3.366990291262136e-05, "loss": 0.1887, "step": 3364 }, { "epoch": 32.626506024096386, "grad_norm": 11.751638412475586, "learning_rate": 3.366504854368932e-05, "loss": 0.3274, "step": 3365 }, { "epoch": 32.63614457831325, "grad_norm": 3.8811628818511963, "learning_rate": 3.366019417475728e-05, "loss": 0.1708, "step": 3366 }, { "epoch": 32.64578313253012, "grad_norm": 7.424455165863037, "learning_rate": 3.3655339805825244e-05, "loss": 0.495, "step": 3367 }, { "epoch": 32.65542168674699, "grad_norm": 6.117343902587891, "learning_rate": 3.36504854368932e-05, "loss": 0.1327, "step": 3368 }, { "epoch": 32.665060240963854, "grad_norm": 4.284964561462402, "learning_rate": 3.364563106796117e-05, "loss": 0.3587, "step": 3369 }, { "epoch": 32.674698795180724, "grad_norm": 1.9533618688583374, "learning_rate": 3.3640776699029125e-05, "loss": 0.0866, "step": 3370 }, { "epoch": 32.68433734939759, "grad_norm": 7.438155651092529, "learning_rate": 3.363592233009709e-05, "loss": 0.4077, "step": 3371 }, { "epoch": 32.69397590361446, "grad_norm": 5.360297679901123, "learning_rate": 3.3631067961165054e-05, "loss": 0.3494, "step": 3372 }, { "epoch": 32.70361445783133, "grad_norm": 4.435668468475342, "learning_rate": 3.362621359223301e-05, "loss": 0.4451, "step": 3373 }, { "epoch": 32.71325301204819, "grad_norm": 5.566134452819824, "learning_rate": 3.3621359223300976e-05, "loss": 0.4668, "step": 3374 }, { "epoch": 32.72289156626506, "grad_norm": 3.0661537647247314, "learning_rate": 3.3616504854368934e-05, "loss": 0.1709, "step": 3375 }, { "epoch": 32.732530120481925, "grad_norm": 2.5472373962402344, "learning_rate": 3.36116504854369e-05, "loss": 0.1082, "step": 3376 }, { "epoch": 32.742168674698796, "grad_norm": 1.5460624694824219, "learning_rate": 3.3606796116504856e-05, "loss": 0.0988, "step": 3377 }, { "epoch": 32.75180722891566, "grad_norm": 3.4894280433654785, "learning_rate": 3.360194174757282e-05, "loss": 0.1807, "step": 3378 }, { "epoch": 32.76144578313253, "grad_norm": 4.335073947906494, "learning_rate": 3.359708737864078e-05, "loss": 0.321, "step": 3379 }, { "epoch": 32.7710843373494, "grad_norm": 4.915331840515137, "learning_rate": 3.359223300970874e-05, "loss": 0.2428, "step": 3380 }, { "epoch": 32.78072289156626, "grad_norm": 3.4896719455718994, "learning_rate": 3.35873786407767e-05, "loss": 0.1218, "step": 3381 }, { "epoch": 32.790361445783134, "grad_norm": 6.740962505340576, "learning_rate": 3.3582524271844665e-05, "loss": 0.2318, "step": 3382 }, { "epoch": 32.8, "grad_norm": 4.16269588470459, "learning_rate": 3.357766990291262e-05, "loss": 0.1192, "step": 3383 }, { "epoch": 32.80963855421687, "grad_norm": 3.7304115295410156, "learning_rate": 3.357281553398058e-05, "loss": 0.1975, "step": 3384 }, { "epoch": 32.81927710843374, "grad_norm": 2.605294704437256, "learning_rate": 3.3567961165048545e-05, "loss": 0.1774, "step": 3385 }, { "epoch": 32.8289156626506, "grad_norm": 3.989806652069092, "learning_rate": 3.35631067961165e-05, "loss": 0.2782, "step": 3386 }, { "epoch": 32.83855421686747, "grad_norm": 3.596440315246582, "learning_rate": 3.355825242718447e-05, "loss": 0.1554, "step": 3387 }, { "epoch": 32.848192771084335, "grad_norm": 1.767595887184143, "learning_rate": 3.3553398058252425e-05, "loss": 0.1759, "step": 3388 }, { "epoch": 32.857831325301206, "grad_norm": 9.867527961730957, "learning_rate": 3.354854368932039e-05, "loss": 0.3354, "step": 3389 }, { "epoch": 32.86746987951807, "grad_norm": 2.7767860889434814, "learning_rate": 3.354368932038835e-05, "loss": 0.2691, "step": 3390 }, { "epoch": 32.87710843373494, "grad_norm": 4.924851417541504, "learning_rate": 3.353883495145631e-05, "loss": 0.3605, "step": 3391 }, { "epoch": 32.88674698795181, "grad_norm": 2.6628708839416504, "learning_rate": 3.353398058252427e-05, "loss": 0.2145, "step": 3392 }, { "epoch": 32.89638554216867, "grad_norm": 8.393813133239746, "learning_rate": 3.3529126213592234e-05, "loss": 0.2281, "step": 3393 }, { "epoch": 32.90602409638554, "grad_norm": 4.675219535827637, "learning_rate": 3.352427184466019e-05, "loss": 0.4529, "step": 3394 }, { "epoch": 32.91566265060241, "grad_norm": 3.670679807662964, "learning_rate": 3.3519417475728157e-05, "loss": 0.3564, "step": 3395 }, { "epoch": 32.92530120481928, "grad_norm": 4.415266513824463, "learning_rate": 3.351456310679612e-05, "loss": 0.2465, "step": 3396 }, { "epoch": 32.93493975903615, "grad_norm": 3.0392229557037354, "learning_rate": 3.350970873786408e-05, "loss": 0.2967, "step": 3397 }, { "epoch": 32.94457831325301, "grad_norm": 5.890130043029785, "learning_rate": 3.350485436893204e-05, "loss": 0.4469, "step": 3398 }, { "epoch": 32.95421686746988, "grad_norm": 2.4414303302764893, "learning_rate": 3.35e-05, "loss": 0.226, "step": 3399 }, { "epoch": 32.963855421686745, "grad_norm": 2.492427349090576, "learning_rate": 3.3495145631067966e-05, "loss": 0.1229, "step": 3400 }, { "epoch": 32.973493975903615, "grad_norm": 4.236627578735352, "learning_rate": 3.3490291262135923e-05, "loss": 0.2033, "step": 3401 }, { "epoch": 32.98313253012048, "grad_norm": 2.8976638317108154, "learning_rate": 3.348543689320389e-05, "loss": 0.2377, "step": 3402 }, { "epoch": 32.99277108433735, "grad_norm": 3.768838882446289, "learning_rate": 3.3480582524271846e-05, "loss": 0.3656, "step": 3403 }, { "epoch": 33.00843373493976, "grad_norm": 14.691397666931152, "learning_rate": 3.347572815533981e-05, "loss": 0.1682, "step": 3404 }, { "epoch": 33.01807228915663, "grad_norm": 2.465489149093628, "learning_rate": 3.347087378640777e-05, "loss": 0.3548, "step": 3405 }, { "epoch": 33.02771084337349, "grad_norm": 16.669240951538086, "learning_rate": 3.346601941747573e-05, "loss": 0.2724, "step": 3406 }, { "epoch": 33.03734939759036, "grad_norm": 1.9180883169174194, "learning_rate": 3.346116504854369e-05, "loss": 0.2485, "step": 3407 }, { "epoch": 33.04698795180723, "grad_norm": 3.819139242172241, "learning_rate": 3.3456310679611655e-05, "loss": 0.3957, "step": 3408 }, { "epoch": 33.056626506024095, "grad_norm": 24.162641525268555, "learning_rate": 3.345145631067961e-05, "loss": 0.2775, "step": 3409 }, { "epoch": 33.066265060240966, "grad_norm": 10.241257667541504, "learning_rate": 3.344660194174758e-05, "loss": 0.1847, "step": 3410 }, { "epoch": 33.07590361445783, "grad_norm": 4.3638176918029785, "learning_rate": 3.3441747572815535e-05, "loss": 0.3558, "step": 3411 }, { "epoch": 33.0855421686747, "grad_norm": 2.334998607635498, "learning_rate": 3.343689320388349e-05, "loss": 0.1816, "step": 3412 }, { "epoch": 33.09518072289157, "grad_norm": 12.426309585571289, "learning_rate": 3.343203883495146e-05, "loss": 0.2907, "step": 3413 }, { "epoch": 33.10481927710843, "grad_norm": 3.043905019760132, "learning_rate": 3.3427184466019415e-05, "loss": 0.2744, "step": 3414 }, { "epoch": 33.1144578313253, "grad_norm": 3.2992639541625977, "learning_rate": 3.342233009708738e-05, "loss": 0.2222, "step": 3415 }, { "epoch": 33.12409638554217, "grad_norm": 1.244226336479187, "learning_rate": 3.341747572815534e-05, "loss": 0.299, "step": 3416 }, { "epoch": 33.13373493975904, "grad_norm": 7.075989246368408, "learning_rate": 3.34126213592233e-05, "loss": 0.2042, "step": 3417 }, { "epoch": 33.1433734939759, "grad_norm": 6.947476387023926, "learning_rate": 3.340776699029126e-05, "loss": 0.2841, "step": 3418 }, { "epoch": 33.15301204819277, "grad_norm": 9.447361946105957, "learning_rate": 3.3402912621359224e-05, "loss": 0.34, "step": 3419 }, { "epoch": 33.16265060240964, "grad_norm": 20.77237319946289, "learning_rate": 3.339805825242718e-05, "loss": 0.3201, "step": 3420 }, { "epoch": 33.172289156626505, "grad_norm": 1.6279988288879395, "learning_rate": 3.339320388349515e-05, "loss": 0.1864, "step": 3421 }, { "epoch": 33.181927710843375, "grad_norm": 8.541754722595215, "learning_rate": 3.338834951456311e-05, "loss": 0.2188, "step": 3422 }, { "epoch": 33.19156626506024, "grad_norm": 2.4182090759277344, "learning_rate": 3.338349514563107e-05, "loss": 0.4179, "step": 3423 }, { "epoch": 33.20120481927711, "grad_norm": 32.05785369873047, "learning_rate": 3.337864077669903e-05, "loss": 0.4156, "step": 3424 }, { "epoch": 33.21084337349398, "grad_norm": 23.6837215423584, "learning_rate": 3.337378640776699e-05, "loss": 0.2297, "step": 3425 }, { "epoch": 33.22048192771084, "grad_norm": 2.655033826828003, "learning_rate": 3.3368932038834955e-05, "loss": 0.1128, "step": 3426 }, { "epoch": 33.23012048192771, "grad_norm": 5.193277835845947, "learning_rate": 3.336407766990291e-05, "loss": 0.2474, "step": 3427 }, { "epoch": 33.23975903614458, "grad_norm": 14.666878700256348, "learning_rate": 3.335922330097088e-05, "loss": 0.4563, "step": 3428 }, { "epoch": 33.24939759036145, "grad_norm": 18.632078170776367, "learning_rate": 3.3354368932038836e-05, "loss": 0.2532, "step": 3429 }, { "epoch": 33.25903614457831, "grad_norm": 19.805156707763672, "learning_rate": 3.33495145631068e-05, "loss": 0.2492, "step": 3430 }, { "epoch": 33.26867469879518, "grad_norm": 1.752922773361206, "learning_rate": 3.334466019417476e-05, "loss": 0.0939, "step": 3431 }, { "epoch": 33.27831325301205, "grad_norm": 20.81438636779785, "learning_rate": 3.333980582524272e-05, "loss": 0.1966, "step": 3432 }, { "epoch": 33.287951807228914, "grad_norm": 1.8223901987075806, "learning_rate": 3.333495145631068e-05, "loss": 0.2274, "step": 3433 }, { "epoch": 33.297590361445785, "grad_norm": 1.1632524728775024, "learning_rate": 3.3330097087378645e-05, "loss": 0.284, "step": 3434 }, { "epoch": 33.30722891566265, "grad_norm": 2.510695219039917, "learning_rate": 3.33252427184466e-05, "loss": 0.3103, "step": 3435 }, { "epoch": 33.31686746987952, "grad_norm": 1.4946500062942505, "learning_rate": 3.332038834951457e-05, "loss": 0.2533, "step": 3436 }, { "epoch": 33.32650602409639, "grad_norm": 2.8361072540283203, "learning_rate": 3.3315533980582525e-05, "loss": 0.1454, "step": 3437 }, { "epoch": 33.33614457831325, "grad_norm": 14.023506164550781, "learning_rate": 3.331067961165049e-05, "loss": 0.5156, "step": 3438 }, { "epoch": 33.34578313253012, "grad_norm": 6.301320552825928, "learning_rate": 3.330582524271845e-05, "loss": 0.2489, "step": 3439 }, { "epoch": 33.355421686746986, "grad_norm": 10.289031028747559, "learning_rate": 3.3300970873786405e-05, "loss": 0.2375, "step": 3440 }, { "epoch": 33.36506024096386, "grad_norm": 2.7230563163757324, "learning_rate": 3.329611650485437e-05, "loss": 0.1192, "step": 3441 }, { "epoch": 33.37469879518072, "grad_norm": 3.5718908309936523, "learning_rate": 3.329126213592233e-05, "loss": 0.153, "step": 3442 }, { "epoch": 33.38433734939759, "grad_norm": 17.299644470214844, "learning_rate": 3.328640776699029e-05, "loss": 0.1945, "step": 3443 }, { "epoch": 33.39397590361446, "grad_norm": 3.1758151054382324, "learning_rate": 3.328155339805825e-05, "loss": 0.4139, "step": 3444 }, { "epoch": 33.403614457831324, "grad_norm": 4.729641914367676, "learning_rate": 3.3276699029126214e-05, "loss": 0.412, "step": 3445 }, { "epoch": 33.413253012048195, "grad_norm": 5.146904945373535, "learning_rate": 3.327184466019418e-05, "loss": 0.2166, "step": 3446 }, { "epoch": 33.42289156626506, "grad_norm": 40.883243560791016, "learning_rate": 3.326699029126214e-05, "loss": 0.3102, "step": 3447 }, { "epoch": 33.43253012048193, "grad_norm": 23.205312728881836, "learning_rate": 3.32621359223301e-05, "loss": 0.4157, "step": 3448 }, { "epoch": 33.44216867469879, "grad_norm": 29.114105224609375, "learning_rate": 3.3257281553398065e-05, "loss": 0.182, "step": 3449 }, { "epoch": 33.45180722891566, "grad_norm": 5.478951454162598, "learning_rate": 3.325242718446602e-05, "loss": 0.2967, "step": 3450 }, { "epoch": 33.46144578313253, "grad_norm": 15.39492130279541, "learning_rate": 3.324757281553398e-05, "loss": 0.16, "step": 3451 }, { "epoch": 33.471084337349396, "grad_norm": 10.558228492736816, "learning_rate": 3.3242718446601945e-05, "loss": 0.2743, "step": 3452 }, { "epoch": 33.480722891566266, "grad_norm": 1.7091954946517944, "learning_rate": 3.32378640776699e-05, "loss": 0.275, "step": 3453 }, { "epoch": 33.49036144578313, "grad_norm": 5.472003936767578, "learning_rate": 3.323300970873787e-05, "loss": 0.2117, "step": 3454 }, { "epoch": 33.5, "grad_norm": 7.052769184112549, "learning_rate": 3.3228155339805825e-05, "loss": 0.2601, "step": 3455 }, { "epoch": 33.50963855421687, "grad_norm": 19.840543746948242, "learning_rate": 3.322330097087379e-05, "loss": 0.3807, "step": 3456 }, { "epoch": 33.519277108433734, "grad_norm": 1.7582685947418213, "learning_rate": 3.321844660194175e-05, "loss": 0.2532, "step": 3457 }, { "epoch": 33.528915662650604, "grad_norm": 6.800018787384033, "learning_rate": 3.321359223300971e-05, "loss": 0.4052, "step": 3458 }, { "epoch": 33.53855421686747, "grad_norm": 11.425762176513672, "learning_rate": 3.320873786407767e-05, "loss": 0.1917, "step": 3459 }, { "epoch": 33.54819277108434, "grad_norm": 30.37370491027832, "learning_rate": 3.3203883495145634e-05, "loss": 0.528, "step": 3460 }, { "epoch": 33.55783132530121, "grad_norm": 5.192528247833252, "learning_rate": 3.319902912621359e-05, "loss": 0.1578, "step": 3461 }, { "epoch": 33.56746987951807, "grad_norm": 10.533684730529785, "learning_rate": 3.319417475728156e-05, "loss": 0.1596, "step": 3462 }, { "epoch": 33.57710843373494, "grad_norm": 15.82174015045166, "learning_rate": 3.3189320388349514e-05, "loss": 0.2519, "step": 3463 }, { "epoch": 33.586746987951805, "grad_norm": 4.028344631195068, "learning_rate": 3.318446601941748e-05, "loss": 0.2659, "step": 3464 }, { "epoch": 33.596385542168676, "grad_norm": 2.609952688217163, "learning_rate": 3.317961165048544e-05, "loss": 0.2698, "step": 3465 }, { "epoch": 33.60602409638554, "grad_norm": 5.508668422698975, "learning_rate": 3.31747572815534e-05, "loss": 0.1376, "step": 3466 }, { "epoch": 33.61566265060241, "grad_norm": 17.888952255249023, "learning_rate": 3.316990291262136e-05, "loss": 0.3143, "step": 3467 }, { "epoch": 33.62530120481928, "grad_norm": 18.03384017944336, "learning_rate": 3.316504854368932e-05, "loss": 0.3008, "step": 3468 }, { "epoch": 33.63493975903614, "grad_norm": 13.531493186950684, "learning_rate": 3.316019417475728e-05, "loss": 0.2409, "step": 3469 }, { "epoch": 33.644578313253014, "grad_norm": 10.965315818786621, "learning_rate": 3.315533980582524e-05, "loss": 0.19, "step": 3470 }, { "epoch": 33.65421686746988, "grad_norm": 8.760797500610352, "learning_rate": 3.3150485436893204e-05, "loss": 0.2869, "step": 3471 }, { "epoch": 33.66385542168675, "grad_norm": 4.890308856964111, "learning_rate": 3.314563106796117e-05, "loss": 0.1985, "step": 3472 }, { "epoch": 33.67349397590361, "grad_norm": 15.946928977966309, "learning_rate": 3.314077669902913e-05, "loss": 0.2276, "step": 3473 }, { "epoch": 33.68313253012048, "grad_norm": 18.02727699279785, "learning_rate": 3.313592233009709e-05, "loss": 0.3514, "step": 3474 }, { "epoch": 33.69277108433735, "grad_norm": 10.521312713623047, "learning_rate": 3.3131067961165055e-05, "loss": 0.1881, "step": 3475 }, { "epoch": 33.702409638554215, "grad_norm": 11.230083465576172, "learning_rate": 3.312621359223301e-05, "loss": 0.3246, "step": 3476 }, { "epoch": 33.712048192771086, "grad_norm": 8.175868034362793, "learning_rate": 3.312135922330098e-05, "loss": 0.2726, "step": 3477 }, { "epoch": 33.72168674698795, "grad_norm": 14.577780723571777, "learning_rate": 3.3116504854368935e-05, "loss": 0.2681, "step": 3478 }, { "epoch": 33.73132530120482, "grad_norm": 3.8501293659210205, "learning_rate": 3.311165048543689e-05, "loss": 0.2387, "step": 3479 }, { "epoch": 33.74096385542169, "grad_norm": 7.719604969024658, "learning_rate": 3.310679611650486e-05, "loss": 0.4391, "step": 3480 }, { "epoch": 33.75060240963855, "grad_norm": 19.38561248779297, "learning_rate": 3.3101941747572815e-05, "loss": 0.1654, "step": 3481 }, { "epoch": 33.76024096385542, "grad_norm": 3.316598415374756, "learning_rate": 3.309708737864078e-05, "loss": 0.2707, "step": 3482 }, { "epoch": 33.76987951807229, "grad_norm": 17.2416934967041, "learning_rate": 3.309223300970874e-05, "loss": 0.3223, "step": 3483 }, { "epoch": 33.77951807228916, "grad_norm": 1.1010401248931885, "learning_rate": 3.30873786407767e-05, "loss": 0.133, "step": 3484 }, { "epoch": 33.78915662650603, "grad_norm": 23.064485549926758, "learning_rate": 3.308252427184466e-05, "loss": 0.337, "step": 3485 }, { "epoch": 33.79879518072289, "grad_norm": 26.16805076599121, "learning_rate": 3.3077669902912624e-05, "loss": 0.5861, "step": 3486 }, { "epoch": 33.80843373493976, "grad_norm": 3.5488178730010986, "learning_rate": 3.307281553398058e-05, "loss": 0.2122, "step": 3487 }, { "epoch": 33.818072289156625, "grad_norm": 7.135069370269775, "learning_rate": 3.3067961165048546e-05, "loss": 0.2209, "step": 3488 }, { "epoch": 33.827710843373495, "grad_norm": 8.009391784667969, "learning_rate": 3.3063106796116504e-05, "loss": 0.3159, "step": 3489 }, { "epoch": 33.83734939759036, "grad_norm": 11.802508354187012, "learning_rate": 3.305825242718447e-05, "loss": 0.2817, "step": 3490 }, { "epoch": 33.84698795180723, "grad_norm": 7.009512901306152, "learning_rate": 3.3053398058252427e-05, "loss": 0.3531, "step": 3491 }, { "epoch": 33.8566265060241, "grad_norm": 9.105693817138672, "learning_rate": 3.304854368932039e-05, "loss": 0.2075, "step": 3492 }, { "epoch": 33.86626506024096, "grad_norm": 58.90906524658203, "learning_rate": 3.304368932038835e-05, "loss": 0.2634, "step": 3493 }, { "epoch": 33.87590361445783, "grad_norm": 6.668754577636719, "learning_rate": 3.303883495145631e-05, "loss": 0.2269, "step": 3494 }, { "epoch": 33.8855421686747, "grad_norm": 4.387411117553711, "learning_rate": 3.303398058252427e-05, "loss": 0.2857, "step": 3495 }, { "epoch": 33.89518072289157, "grad_norm": 4.2215118408203125, "learning_rate": 3.302912621359223e-05, "loss": 0.2487, "step": 3496 }, { "epoch": 33.90481927710843, "grad_norm": 1.531717300415039, "learning_rate": 3.30242718446602e-05, "loss": 0.1913, "step": 3497 }, { "epoch": 33.9144578313253, "grad_norm": 6.301485538482666, "learning_rate": 3.301941747572816e-05, "loss": 0.2416, "step": 3498 }, { "epoch": 33.92409638554217, "grad_norm": 12.07851791381836, "learning_rate": 3.301456310679612e-05, "loss": 0.236, "step": 3499 }, { "epoch": 33.933734939759034, "grad_norm": 4.3974289894104, "learning_rate": 3.300970873786408e-05, "loss": 0.1194, "step": 3500 }, { "epoch": 33.943373493975905, "grad_norm": 6.221624374389648, "learning_rate": 3.3004854368932045e-05, "loss": 0.1935, "step": 3501 }, { "epoch": 33.95301204819277, "grad_norm": 14.482217788696289, "learning_rate": 3.3e-05, "loss": 0.2801, "step": 3502 }, { "epoch": 33.96265060240964, "grad_norm": 3.0100133419036865, "learning_rate": 3.299514563106797e-05, "loss": 0.1807, "step": 3503 }, { "epoch": 33.97228915662651, "grad_norm": 10.064688682556152, "learning_rate": 3.2990291262135925e-05, "loss": 0.1362, "step": 3504 }, { "epoch": 33.98192771084337, "grad_norm": 4.8150811195373535, "learning_rate": 3.298543689320389e-05, "loss": 0.4001, "step": 3505 }, { "epoch": 33.99156626506024, "grad_norm": 10.984674453735352, "learning_rate": 3.298058252427185e-05, "loss": 0.2106, "step": 3506 }, { "epoch": 34.00722891566265, "grad_norm": 13.351523399353027, "learning_rate": 3.2975728155339805e-05, "loss": 0.3022, "step": 3507 }, { "epoch": 34.01686746987952, "grad_norm": 8.142618179321289, "learning_rate": 3.297087378640777e-05, "loss": 0.3271, "step": 3508 }, { "epoch": 34.026506024096385, "grad_norm": 4.554386138916016, "learning_rate": 3.296601941747573e-05, "loss": 0.2384, "step": 3509 }, { "epoch": 34.036144578313255, "grad_norm": 2.118495225906372, "learning_rate": 3.296116504854369e-05, "loss": 0.186, "step": 3510 }, { "epoch": 34.04578313253012, "grad_norm": 2.927530527114868, "learning_rate": 3.295631067961165e-05, "loss": 0.2358, "step": 3511 }, { "epoch": 34.05542168674699, "grad_norm": 1.5089186429977417, "learning_rate": 3.2951456310679614e-05, "loss": 0.201, "step": 3512 }, { "epoch": 34.06506024096385, "grad_norm": 8.734161376953125, "learning_rate": 3.294660194174757e-05, "loss": 0.38, "step": 3513 }, { "epoch": 34.07469879518072, "grad_norm": 5.497509479522705, "learning_rate": 3.2941747572815536e-05, "loss": 0.1631, "step": 3514 }, { "epoch": 34.08433734939759, "grad_norm": 1.9267503023147583, "learning_rate": 3.2936893203883494e-05, "loss": 0.1631, "step": 3515 }, { "epoch": 34.09397590361446, "grad_norm": 10.491135597229004, "learning_rate": 3.293203883495146e-05, "loss": 0.1641, "step": 3516 }, { "epoch": 34.10361445783133, "grad_norm": 16.477832794189453, "learning_rate": 3.2927184466019416e-05, "loss": 0.2173, "step": 3517 }, { "epoch": 34.11325301204819, "grad_norm": 1.5153292417526245, "learning_rate": 3.292233009708738e-05, "loss": 0.2264, "step": 3518 }, { "epoch": 34.12289156626506, "grad_norm": 5.238208293914795, "learning_rate": 3.291747572815534e-05, "loss": 0.0942, "step": 3519 }, { "epoch": 34.13253012048193, "grad_norm": 4.22210693359375, "learning_rate": 3.29126213592233e-05, "loss": 0.2991, "step": 3520 }, { "epoch": 34.142168674698794, "grad_norm": 13.453289031982422, "learning_rate": 3.290776699029126e-05, "loss": 0.2899, "step": 3521 }, { "epoch": 34.151807228915665, "grad_norm": 7.292661666870117, "learning_rate": 3.2902912621359225e-05, "loss": 0.3237, "step": 3522 }, { "epoch": 34.16144578313253, "grad_norm": 10.02364444732666, "learning_rate": 3.289805825242719e-05, "loss": 0.341, "step": 3523 }, { "epoch": 34.1710843373494, "grad_norm": 9.404743194580078, "learning_rate": 3.289320388349515e-05, "loss": 0.279, "step": 3524 }, { "epoch": 34.18072289156626, "grad_norm": 2.0281546115875244, "learning_rate": 3.288834951456311e-05, "loss": 0.3099, "step": 3525 }, { "epoch": 34.19036144578313, "grad_norm": 7.4737348556518555, "learning_rate": 3.288349514563107e-05, "loss": 0.2401, "step": 3526 }, { "epoch": 34.2, "grad_norm": 4.553856372833252, "learning_rate": 3.2878640776699035e-05, "loss": 0.2511, "step": 3527 }, { "epoch": 34.209638554216866, "grad_norm": 4.276704788208008, "learning_rate": 3.287378640776699e-05, "loss": 0.2488, "step": 3528 }, { "epoch": 34.21927710843374, "grad_norm": 1.466089129447937, "learning_rate": 3.286893203883496e-05, "loss": 0.2847, "step": 3529 }, { "epoch": 34.2289156626506, "grad_norm": 6.1018195152282715, "learning_rate": 3.2864077669902915e-05, "loss": 0.165, "step": 3530 }, { "epoch": 34.23855421686747, "grad_norm": 2.8432867527008057, "learning_rate": 3.285922330097088e-05, "loss": 0.1097, "step": 3531 }, { "epoch": 34.24819277108434, "grad_norm": 14.484807014465332, "learning_rate": 3.285436893203884e-05, "loss": 0.2819, "step": 3532 }, { "epoch": 34.257831325301204, "grad_norm": 20.87466812133789, "learning_rate": 3.2849514563106795e-05, "loss": 0.4038, "step": 3533 }, { "epoch": 34.267469879518075, "grad_norm": 11.338109016418457, "learning_rate": 3.284466019417476e-05, "loss": 0.0965, "step": 3534 }, { "epoch": 34.27710843373494, "grad_norm": 3.159616470336914, "learning_rate": 3.283980582524272e-05, "loss": 0.2499, "step": 3535 }, { "epoch": 34.28674698795181, "grad_norm": 3.3051953315734863, "learning_rate": 3.283495145631068e-05, "loss": 0.3007, "step": 3536 }, { "epoch": 34.29638554216867, "grad_norm": 15.0205717086792, "learning_rate": 3.283009708737864e-05, "loss": 0.308, "step": 3537 }, { "epoch": 34.30602409638554, "grad_norm": 8.114134788513184, "learning_rate": 3.2825242718446604e-05, "loss": 0.3299, "step": 3538 }, { "epoch": 34.31566265060241, "grad_norm": 3.128953456878662, "learning_rate": 3.282038834951456e-05, "loss": 0.1441, "step": 3539 }, { "epoch": 34.325301204819276, "grad_norm": 11.641457557678223, "learning_rate": 3.2815533980582526e-05, "loss": 0.3661, "step": 3540 }, { "epoch": 34.334939759036146, "grad_norm": 3.699099063873291, "learning_rate": 3.2810679611650484e-05, "loss": 0.1719, "step": 3541 }, { "epoch": 34.34457831325301, "grad_norm": 5.5693769454956055, "learning_rate": 3.280582524271845e-05, "loss": 0.4036, "step": 3542 }, { "epoch": 34.35421686746988, "grad_norm": 18.86063003540039, "learning_rate": 3.2800970873786406e-05, "loss": 0.2166, "step": 3543 }, { "epoch": 34.36385542168675, "grad_norm": 2.699877977371216, "learning_rate": 3.279611650485437e-05, "loss": 0.2452, "step": 3544 }, { "epoch": 34.373493975903614, "grad_norm": 2.5372562408447266, "learning_rate": 3.279126213592233e-05, "loss": 0.1211, "step": 3545 }, { "epoch": 34.383132530120484, "grad_norm": 11.94820499420166, "learning_rate": 3.278640776699029e-05, "loss": 0.3303, "step": 3546 }, { "epoch": 34.39277108433735, "grad_norm": 6.063055038452148, "learning_rate": 3.278155339805826e-05, "loss": 0.2319, "step": 3547 }, { "epoch": 34.40240963855422, "grad_norm": 10.014002799987793, "learning_rate": 3.2776699029126215e-05, "loss": 0.3142, "step": 3548 }, { "epoch": 34.41204819277108, "grad_norm": 7.963150978088379, "learning_rate": 3.277184466019418e-05, "loss": 0.2005, "step": 3549 }, { "epoch": 34.42168674698795, "grad_norm": 4.519878387451172, "learning_rate": 3.276699029126214e-05, "loss": 0.1831, "step": 3550 }, { "epoch": 34.43132530120482, "grad_norm": 5.135452747344971, "learning_rate": 3.27621359223301e-05, "loss": 0.2059, "step": 3551 }, { "epoch": 34.440963855421685, "grad_norm": 1.2033802270889282, "learning_rate": 3.275728155339806e-05, "loss": 0.2117, "step": 3552 }, { "epoch": 34.450602409638556, "grad_norm": 11.4978666305542, "learning_rate": 3.2752427184466024e-05, "loss": 0.3591, "step": 3553 }, { "epoch": 34.46024096385542, "grad_norm": 15.843090057373047, "learning_rate": 3.274757281553398e-05, "loss": 0.3224, "step": 3554 }, { "epoch": 34.46987951807229, "grad_norm": 1.2851552963256836, "learning_rate": 3.274271844660195e-05, "loss": 0.1916, "step": 3555 }, { "epoch": 34.47951807228916, "grad_norm": 11.249265670776367, "learning_rate": 3.2737864077669904e-05, "loss": 0.4774, "step": 3556 }, { "epoch": 34.48915662650602, "grad_norm": 3.1788713932037354, "learning_rate": 3.273300970873787e-05, "loss": 0.5093, "step": 3557 }, { "epoch": 34.498795180722894, "grad_norm": 3.0239298343658447, "learning_rate": 3.272815533980583e-05, "loss": 0.2442, "step": 3558 }, { "epoch": 34.50843373493976, "grad_norm": 16.48545265197754, "learning_rate": 3.272330097087379e-05, "loss": 0.2345, "step": 3559 }, { "epoch": 34.51807228915663, "grad_norm": 11.37020492553711, "learning_rate": 3.271844660194175e-05, "loss": 0.2741, "step": 3560 }, { "epoch": 34.52771084337349, "grad_norm": 3.9934682846069336, "learning_rate": 3.271359223300971e-05, "loss": 0.2734, "step": 3561 }, { "epoch": 34.53734939759036, "grad_norm": 11.972067832946777, "learning_rate": 3.270873786407767e-05, "loss": 0.1976, "step": 3562 }, { "epoch": 34.54698795180723, "grad_norm": 8.42177963256836, "learning_rate": 3.270388349514563e-05, "loss": 0.3024, "step": 3563 }, { "epoch": 34.556626506024095, "grad_norm": 9.46635627746582, "learning_rate": 3.2699029126213594e-05, "loss": 0.3749, "step": 3564 }, { "epoch": 34.566265060240966, "grad_norm": 4.266604900360107, "learning_rate": 3.269417475728155e-05, "loss": 0.2399, "step": 3565 }, { "epoch": 34.57590361445783, "grad_norm": 7.56987190246582, "learning_rate": 3.2689320388349516e-05, "loss": 0.3344, "step": 3566 }, { "epoch": 34.5855421686747, "grad_norm": 10.312796592712402, "learning_rate": 3.2684466019417474e-05, "loss": 0.1648, "step": 3567 }, { "epoch": 34.59518072289157, "grad_norm": 18.823383331298828, "learning_rate": 3.267961165048544e-05, "loss": 0.3162, "step": 3568 }, { "epoch": 34.60481927710843, "grad_norm": 6.709329605102539, "learning_rate": 3.2674757281553396e-05, "loss": 0.3035, "step": 3569 }, { "epoch": 34.6144578313253, "grad_norm": 10.91307544708252, "learning_rate": 3.266990291262136e-05, "loss": 0.1964, "step": 3570 }, { "epoch": 34.62409638554217, "grad_norm": 6.610882759094238, "learning_rate": 3.266504854368932e-05, "loss": 0.3085, "step": 3571 }, { "epoch": 34.63373493975904, "grad_norm": 15.037514686584473, "learning_rate": 3.266019417475728e-05, "loss": 0.2485, "step": 3572 }, { "epoch": 34.6433734939759, "grad_norm": 5.453941345214844, "learning_rate": 3.265533980582525e-05, "loss": 0.3505, "step": 3573 }, { "epoch": 34.65301204819277, "grad_norm": 3.4796969890594482, "learning_rate": 3.2650485436893205e-05, "loss": 0.2304, "step": 3574 }, { "epoch": 34.66265060240964, "grad_norm": 18.02227783203125, "learning_rate": 3.264563106796117e-05, "loss": 0.1896, "step": 3575 }, { "epoch": 34.672289156626505, "grad_norm": 9.728682518005371, "learning_rate": 3.264077669902913e-05, "loss": 0.1888, "step": 3576 }, { "epoch": 34.681927710843375, "grad_norm": 14.66132926940918, "learning_rate": 3.263592233009709e-05, "loss": 0.1409, "step": 3577 }, { "epoch": 34.69156626506024, "grad_norm": 4.12381649017334, "learning_rate": 3.263106796116505e-05, "loss": 0.1596, "step": 3578 }, { "epoch": 34.70120481927711, "grad_norm": 0.7779523134231567, "learning_rate": 3.2626213592233014e-05, "loss": 0.2115, "step": 3579 }, { "epoch": 34.71084337349397, "grad_norm": 2.587639093399048, "learning_rate": 3.262135922330097e-05, "loss": 0.2147, "step": 3580 }, { "epoch": 34.72048192771084, "grad_norm": 23.944869995117188, "learning_rate": 3.2616504854368936e-05, "loss": 0.5051, "step": 3581 }, { "epoch": 34.73012048192771, "grad_norm": 11.583670616149902, "learning_rate": 3.2611650485436894e-05, "loss": 0.2434, "step": 3582 }, { "epoch": 34.73975903614458, "grad_norm": 2.629016637802124, "learning_rate": 3.260679611650486e-05, "loss": 0.1581, "step": 3583 }, { "epoch": 34.74939759036145, "grad_norm": 10.632452964782715, "learning_rate": 3.2601941747572816e-05, "loss": 0.4457, "step": 3584 }, { "epoch": 34.75903614457831, "grad_norm": 5.537632942199707, "learning_rate": 3.259708737864078e-05, "loss": 0.1897, "step": 3585 }, { "epoch": 34.76867469879518, "grad_norm": 9.547481536865234, "learning_rate": 3.259223300970874e-05, "loss": 0.484, "step": 3586 }, { "epoch": 34.77831325301205, "grad_norm": 10.800271034240723, "learning_rate": 3.25873786407767e-05, "loss": 0.3053, "step": 3587 }, { "epoch": 34.787951807228914, "grad_norm": 6.696488380432129, "learning_rate": 3.258252427184466e-05, "loss": 0.2953, "step": 3588 }, { "epoch": 34.797590361445785, "grad_norm": 2.1872739791870117, "learning_rate": 3.257766990291262e-05, "loss": 0.2565, "step": 3589 }, { "epoch": 34.80722891566265, "grad_norm": 6.779180526733398, "learning_rate": 3.257281553398058e-05, "loss": 0.2292, "step": 3590 }, { "epoch": 34.81686746987952, "grad_norm": 11.749774932861328, "learning_rate": 3.256796116504854e-05, "loss": 0.2069, "step": 3591 }, { "epoch": 34.82650602409639, "grad_norm": 4.258011341094971, "learning_rate": 3.2563106796116506e-05, "loss": 0.2212, "step": 3592 }, { "epoch": 34.83614457831325, "grad_norm": 3.0936155319213867, "learning_rate": 3.2558252427184463e-05, "loss": 0.1493, "step": 3593 }, { "epoch": 34.84578313253012, "grad_norm": 6.13456392288208, "learning_rate": 3.255339805825243e-05, "loss": 0.3425, "step": 3594 }, { "epoch": 34.855421686746986, "grad_norm": 15.123964309692383, "learning_rate": 3.2548543689320386e-05, "loss": 0.277, "step": 3595 }, { "epoch": 34.86506024096386, "grad_norm": 3.0119407176971436, "learning_rate": 3.254368932038835e-05, "loss": 0.1453, "step": 3596 }, { "epoch": 34.87469879518072, "grad_norm": 21.64486312866211, "learning_rate": 3.253883495145631e-05, "loss": 0.3916, "step": 3597 }, { "epoch": 34.88433734939759, "grad_norm": 15.425394058227539, "learning_rate": 3.253398058252428e-05, "loss": 0.2126, "step": 3598 }, { "epoch": 34.89397590361446, "grad_norm": 14.07368278503418, "learning_rate": 3.252912621359224e-05, "loss": 0.3503, "step": 3599 }, { "epoch": 34.903614457831324, "grad_norm": 39.85719680786133, "learning_rate": 3.2524271844660195e-05, "loss": 0.2249, "step": 3600 }, { "epoch": 34.913253012048195, "grad_norm": 4.507078170776367, "learning_rate": 3.251941747572816e-05, "loss": 0.3467, "step": 3601 }, { "epoch": 34.92289156626506, "grad_norm": 3.4133245944976807, "learning_rate": 3.251456310679612e-05, "loss": 0.1574, "step": 3602 }, { "epoch": 34.93253012048193, "grad_norm": 41.73252868652344, "learning_rate": 3.250970873786408e-05, "loss": 0.3229, "step": 3603 }, { "epoch": 34.94216867469879, "grad_norm": 1.723146915435791, "learning_rate": 3.250485436893204e-05, "loss": 0.1461, "step": 3604 }, { "epoch": 34.95180722891566, "grad_norm": 9.419759750366211, "learning_rate": 3.2500000000000004e-05, "loss": 0.428, "step": 3605 }, { "epoch": 34.96144578313253, "grad_norm": 9.839902877807617, "learning_rate": 3.249514563106796e-05, "loss": 0.2673, "step": 3606 }, { "epoch": 34.971084337349396, "grad_norm": 14.24150562286377, "learning_rate": 3.2490291262135926e-05, "loss": 0.3559, "step": 3607 }, { "epoch": 34.980722891566266, "grad_norm": 3.4080252647399902, "learning_rate": 3.2485436893203884e-05, "loss": 0.1686, "step": 3608 }, { "epoch": 34.99036144578313, "grad_norm": 6.785669326782227, "learning_rate": 3.248058252427185e-05, "loss": 0.1514, "step": 3609 }, { "epoch": 35.006024096385545, "grad_norm": 7.881360054016113, "learning_rate": 3.2475728155339806e-05, "loss": 0.2263, "step": 3610 }, { "epoch": 35.01566265060241, "grad_norm": 3.6292037963867188, "learning_rate": 3.247087378640777e-05, "loss": 0.3946, "step": 3611 }, { "epoch": 35.02530120481928, "grad_norm": 12.142203330993652, "learning_rate": 3.246601941747573e-05, "loss": 0.1007, "step": 3612 }, { "epoch": 35.03493975903614, "grad_norm": 2.123485803604126, "learning_rate": 3.246116504854369e-05, "loss": 0.2154, "step": 3613 }, { "epoch": 35.04457831325301, "grad_norm": 3.6354970932006836, "learning_rate": 3.245631067961165e-05, "loss": 0.2745, "step": 3614 }, { "epoch": 35.05421686746988, "grad_norm": 4.14511775970459, "learning_rate": 3.2451456310679615e-05, "loss": 0.4047, "step": 3615 }, { "epoch": 35.063855421686746, "grad_norm": 7.623467445373535, "learning_rate": 3.244660194174757e-05, "loss": 0.2972, "step": 3616 }, { "epoch": 35.07349397590362, "grad_norm": 3.804047107696533, "learning_rate": 3.244174757281553e-05, "loss": 0.2425, "step": 3617 }, { "epoch": 35.08313253012048, "grad_norm": 1.8926769495010376, "learning_rate": 3.2436893203883495e-05, "loss": 0.0896, "step": 3618 }, { "epoch": 35.09277108433735, "grad_norm": 12.020818710327148, "learning_rate": 3.243203883495145e-05, "loss": 0.1938, "step": 3619 }, { "epoch": 35.102409638554214, "grad_norm": 3.003613233566284, "learning_rate": 3.242718446601942e-05, "loss": 0.34, "step": 3620 }, { "epoch": 35.112048192771084, "grad_norm": 6.652443885803223, "learning_rate": 3.2422330097087375e-05, "loss": 0.2549, "step": 3621 }, { "epoch": 35.121686746987955, "grad_norm": 33.12248229980469, "learning_rate": 3.241747572815534e-05, "loss": 0.2932, "step": 3622 }, { "epoch": 35.13132530120482, "grad_norm": 3.0109574794769287, "learning_rate": 3.2412621359223305e-05, "loss": 0.2605, "step": 3623 }, { "epoch": 35.14096385542169, "grad_norm": 46.38442611694336, "learning_rate": 3.240776699029127e-05, "loss": 0.1837, "step": 3624 }, { "epoch": 35.15060240963855, "grad_norm": 3.11746883392334, "learning_rate": 3.240291262135923e-05, "loss": 0.2334, "step": 3625 }, { "epoch": 35.16024096385542, "grad_norm": 3.491021156311035, "learning_rate": 3.239805825242719e-05, "loss": 0.2864, "step": 3626 }, { "epoch": 35.16987951807229, "grad_norm": 21.17428207397461, "learning_rate": 3.239320388349515e-05, "loss": 0.1175, "step": 3627 }, { "epoch": 35.179518072289156, "grad_norm": 5.177911758422852, "learning_rate": 3.238834951456311e-05, "loss": 0.3593, "step": 3628 }, { "epoch": 35.189156626506026, "grad_norm": 3.6669857501983643, "learning_rate": 3.238349514563107e-05, "loss": 0.3727, "step": 3629 }, { "epoch": 35.19879518072289, "grad_norm": 4.054187774658203, "learning_rate": 3.237864077669903e-05, "loss": 0.4166, "step": 3630 }, { "epoch": 35.20843373493976, "grad_norm": 5.069972991943359, "learning_rate": 3.2373786407766994e-05, "loss": 0.2723, "step": 3631 }, { "epoch": 35.21807228915662, "grad_norm": 2.797213315963745, "learning_rate": 3.236893203883495e-05, "loss": 0.1128, "step": 3632 }, { "epoch": 35.227710843373494, "grad_norm": 26.570232391357422, "learning_rate": 3.2364077669902916e-05, "loss": 0.1735, "step": 3633 }, { "epoch": 35.237349397590364, "grad_norm": 5.989343166351318, "learning_rate": 3.2359223300970874e-05, "loss": 0.1429, "step": 3634 }, { "epoch": 35.24698795180723, "grad_norm": 10.438506126403809, "learning_rate": 3.235436893203884e-05, "loss": 0.3196, "step": 3635 }, { "epoch": 35.2566265060241, "grad_norm": 6.918240070343018, "learning_rate": 3.2349514563106796e-05, "loss": 0.3696, "step": 3636 }, { "epoch": 35.26626506024096, "grad_norm": 20.321796417236328, "learning_rate": 3.234466019417476e-05, "loss": 0.3859, "step": 3637 }, { "epoch": 35.27590361445783, "grad_norm": 17.645526885986328, "learning_rate": 3.233980582524272e-05, "loss": 0.1353, "step": 3638 }, { "epoch": 35.2855421686747, "grad_norm": 4.485650539398193, "learning_rate": 3.233495145631068e-05, "loss": 0.1691, "step": 3639 }, { "epoch": 35.295180722891565, "grad_norm": 4.276099681854248, "learning_rate": 3.233009708737864e-05, "loss": 0.2727, "step": 3640 }, { "epoch": 35.304819277108436, "grad_norm": 4.1684041023254395, "learning_rate": 3.2325242718446605e-05, "loss": 0.1465, "step": 3641 }, { "epoch": 35.3144578313253, "grad_norm": 4.127691268920898, "learning_rate": 3.232038834951456e-05, "loss": 0.3453, "step": 3642 }, { "epoch": 35.32409638554217, "grad_norm": 27.763601303100586, "learning_rate": 3.231553398058253e-05, "loss": 0.5268, "step": 3643 }, { "epoch": 35.33373493975903, "grad_norm": 15.315250396728516, "learning_rate": 3.2310679611650485e-05, "loss": 0.506, "step": 3644 }, { "epoch": 35.3433734939759, "grad_norm": 6.226547718048096, "learning_rate": 3.230582524271844e-05, "loss": 0.209, "step": 3645 }, { "epoch": 35.353012048192774, "grad_norm": 1.941263198852539, "learning_rate": 3.230097087378641e-05, "loss": 0.1264, "step": 3646 }, { "epoch": 35.36265060240964, "grad_norm": 4.5898213386535645, "learning_rate": 3.2296116504854365e-05, "loss": 0.5053, "step": 3647 }, { "epoch": 35.37228915662651, "grad_norm": 5.077364921569824, "learning_rate": 3.2291262135922337e-05, "loss": 0.095, "step": 3648 }, { "epoch": 35.38192771084337, "grad_norm": 23.517518997192383, "learning_rate": 3.2286407766990294e-05, "loss": 0.3922, "step": 3649 }, { "epoch": 35.39156626506024, "grad_norm": 3.974107503890991, "learning_rate": 3.228155339805826e-05, "loss": 0.2832, "step": 3650 }, { "epoch": 35.40120481927711, "grad_norm": 11.576991081237793, "learning_rate": 3.227669902912622e-05, "loss": 0.3315, "step": 3651 }, { "epoch": 35.410843373493975, "grad_norm": 1.4233671426773071, "learning_rate": 3.227184466019418e-05, "loss": 0.2348, "step": 3652 }, { "epoch": 35.420481927710846, "grad_norm": 1.7445217370986938, "learning_rate": 3.226699029126214e-05, "loss": 0.2451, "step": 3653 }, { "epoch": 35.43012048192771, "grad_norm": 2.5256097316741943, "learning_rate": 3.2262135922330103e-05, "loss": 0.2996, "step": 3654 }, { "epoch": 35.43975903614458, "grad_norm": 6.133781909942627, "learning_rate": 3.225728155339806e-05, "loss": 0.2726, "step": 3655 }, { "epoch": 35.44939759036144, "grad_norm": 1.8580065965652466, "learning_rate": 3.225242718446602e-05, "loss": 0.159, "step": 3656 }, { "epoch": 35.45903614457831, "grad_norm": 6.5638861656188965, "learning_rate": 3.2247572815533984e-05, "loss": 0.109, "step": 3657 }, { "epoch": 35.46867469879518, "grad_norm": 21.280216217041016, "learning_rate": 3.224271844660194e-05, "loss": 0.2912, "step": 3658 }, { "epoch": 35.47831325301205, "grad_norm": 11.336487770080566, "learning_rate": 3.2237864077669906e-05, "loss": 0.3358, "step": 3659 }, { "epoch": 35.48795180722892, "grad_norm": 4.52598762512207, "learning_rate": 3.2233009708737864e-05, "loss": 0.3574, "step": 3660 }, { "epoch": 35.49759036144578, "grad_norm": 11.237055778503418, "learning_rate": 3.222815533980583e-05, "loss": 0.2471, "step": 3661 }, { "epoch": 35.50722891566265, "grad_norm": 15.181192398071289, "learning_rate": 3.2223300970873786e-05, "loss": 0.5367, "step": 3662 }, { "epoch": 35.51686746987952, "grad_norm": 5.634272575378418, "learning_rate": 3.221844660194175e-05, "loss": 0.175, "step": 3663 }, { "epoch": 35.526506024096385, "grad_norm": 17.312824249267578, "learning_rate": 3.221359223300971e-05, "loss": 0.4715, "step": 3664 }, { "epoch": 35.536144578313255, "grad_norm": 1.513866901397705, "learning_rate": 3.220873786407767e-05, "loss": 0.2768, "step": 3665 }, { "epoch": 35.54578313253012, "grad_norm": 5.323310852050781, "learning_rate": 3.220388349514563e-05, "loss": 0.1232, "step": 3666 }, { "epoch": 35.55542168674699, "grad_norm": 9.889528274536133, "learning_rate": 3.2199029126213595e-05, "loss": 0.2439, "step": 3667 }, { "epoch": 35.56506024096385, "grad_norm": 12.701312065124512, "learning_rate": 3.219417475728155e-05, "loss": 0.2028, "step": 3668 }, { "epoch": 35.57469879518072, "grad_norm": 6.0996809005737305, "learning_rate": 3.218932038834952e-05, "loss": 0.4081, "step": 3669 }, { "epoch": 35.58433734939759, "grad_norm": 7.840542316436768, "learning_rate": 3.2184466019417475e-05, "loss": 0.4497, "step": 3670 }, { "epoch": 35.59397590361446, "grad_norm": 24.788002014160156, "learning_rate": 3.217961165048543e-05, "loss": 0.3668, "step": 3671 }, { "epoch": 35.60361445783133, "grad_norm": 2.9619405269622803, "learning_rate": 3.21747572815534e-05, "loss": 0.1657, "step": 3672 }, { "epoch": 35.61325301204819, "grad_norm": 4.634776592254639, "learning_rate": 3.2169902912621355e-05, "loss": 0.2271, "step": 3673 }, { "epoch": 35.62289156626506, "grad_norm": 3.895322322845459, "learning_rate": 3.2165048543689326e-05, "loss": 0.2226, "step": 3674 }, { "epoch": 35.63253012048193, "grad_norm": 2.0316998958587646, "learning_rate": 3.2160194174757284e-05, "loss": 0.0606, "step": 3675 }, { "epoch": 35.642168674698794, "grad_norm": 4.363604545593262, "learning_rate": 3.215533980582525e-05, "loss": 0.3275, "step": 3676 }, { "epoch": 35.651807228915665, "grad_norm": 13.41970157623291, "learning_rate": 3.2150485436893206e-05, "loss": 0.4111, "step": 3677 }, { "epoch": 35.66144578313253, "grad_norm": 12.138230323791504, "learning_rate": 3.214563106796117e-05, "loss": 0.3242, "step": 3678 }, { "epoch": 35.6710843373494, "grad_norm": 4.59397554397583, "learning_rate": 3.214077669902913e-05, "loss": 0.2933, "step": 3679 }, { "epoch": 35.68072289156626, "grad_norm": 3.1449904441833496, "learning_rate": 3.213592233009709e-05, "loss": 0.0577, "step": 3680 }, { "epoch": 35.69036144578313, "grad_norm": 14.14247989654541, "learning_rate": 3.213106796116505e-05, "loss": 0.2885, "step": 3681 }, { "epoch": 35.7, "grad_norm": 3.4201459884643555, "learning_rate": 3.212621359223301e-05, "loss": 0.1843, "step": 3682 }, { "epoch": 35.709638554216866, "grad_norm": 8.915380477905273, "learning_rate": 3.212135922330097e-05, "loss": 0.2588, "step": 3683 }, { "epoch": 35.71927710843374, "grad_norm": 13.312655448913574, "learning_rate": 3.211650485436893e-05, "loss": 0.2608, "step": 3684 }, { "epoch": 35.7289156626506, "grad_norm": 5.1515631675720215, "learning_rate": 3.2111650485436896e-05, "loss": 0.2663, "step": 3685 }, { "epoch": 35.73855421686747, "grad_norm": 18.746885299682617, "learning_rate": 3.210679611650485e-05, "loss": 0.3527, "step": 3686 }, { "epoch": 35.74819277108434, "grad_norm": 6.18277645111084, "learning_rate": 3.210194174757282e-05, "loss": 0.3119, "step": 3687 }, { "epoch": 35.757831325301204, "grad_norm": 19.767730712890625, "learning_rate": 3.2097087378640776e-05, "loss": 0.3577, "step": 3688 }, { "epoch": 35.767469879518075, "grad_norm": 5.120597839355469, "learning_rate": 3.209223300970874e-05, "loss": 0.1885, "step": 3689 }, { "epoch": 35.77710843373494, "grad_norm": 2.3532087802886963, "learning_rate": 3.20873786407767e-05, "loss": 0.1342, "step": 3690 }, { "epoch": 35.78674698795181, "grad_norm": 15.949131965637207, "learning_rate": 3.208252427184466e-05, "loss": 0.3464, "step": 3691 }, { "epoch": 35.79638554216867, "grad_norm": 11.014365196228027, "learning_rate": 3.207766990291262e-05, "loss": 0.2816, "step": 3692 }, { "epoch": 35.80602409638554, "grad_norm": 5.756566047668457, "learning_rate": 3.2072815533980585e-05, "loss": 0.4389, "step": 3693 }, { "epoch": 35.81566265060241, "grad_norm": 20.337533950805664, "learning_rate": 3.206796116504854e-05, "loss": 0.1309, "step": 3694 }, { "epoch": 35.825301204819276, "grad_norm": 11.152670860290527, "learning_rate": 3.206310679611651e-05, "loss": 0.27, "step": 3695 }, { "epoch": 35.834939759036146, "grad_norm": 4.181135177612305, "learning_rate": 3.2058252427184465e-05, "loss": 0.2274, "step": 3696 }, { "epoch": 35.84457831325301, "grad_norm": 7.747771263122559, "learning_rate": 3.205339805825243e-05, "loss": 0.2394, "step": 3697 }, { "epoch": 35.85421686746988, "grad_norm": 15.583064079284668, "learning_rate": 3.204854368932039e-05, "loss": 0.3356, "step": 3698 }, { "epoch": 35.86385542168675, "grad_norm": 5.8975396156311035, "learning_rate": 3.204368932038835e-05, "loss": 0.283, "step": 3699 }, { "epoch": 35.873493975903614, "grad_norm": 9.496764183044434, "learning_rate": 3.2038834951456316e-05, "loss": 0.1886, "step": 3700 }, { "epoch": 35.883132530120484, "grad_norm": 2.5378589630126953, "learning_rate": 3.2033980582524274e-05, "loss": 0.2043, "step": 3701 }, { "epoch": 35.89277108433735, "grad_norm": 1.828116536140442, "learning_rate": 3.202912621359224e-05, "loss": 0.1737, "step": 3702 }, { "epoch": 35.90240963855422, "grad_norm": 14.666244506835938, "learning_rate": 3.2024271844660196e-05, "loss": 0.249, "step": 3703 }, { "epoch": 35.91204819277108, "grad_norm": 4.750869274139404, "learning_rate": 3.201941747572816e-05, "loss": 0.2938, "step": 3704 }, { "epoch": 35.92168674698795, "grad_norm": 27.285484313964844, "learning_rate": 3.201456310679612e-05, "loss": 0.1945, "step": 3705 }, { "epoch": 35.93132530120482, "grad_norm": 18.168725967407227, "learning_rate": 3.200970873786408e-05, "loss": 0.2135, "step": 3706 }, { "epoch": 35.940963855421685, "grad_norm": 5.027111530303955, "learning_rate": 3.200485436893204e-05, "loss": 0.2516, "step": 3707 }, { "epoch": 35.950602409638556, "grad_norm": 8.203144073486328, "learning_rate": 3.2000000000000005e-05, "loss": 0.1493, "step": 3708 }, { "epoch": 35.96024096385542, "grad_norm": 4.530794620513916, "learning_rate": 3.199514563106796e-05, "loss": 0.1745, "step": 3709 }, { "epoch": 35.96987951807229, "grad_norm": 3.447352409362793, "learning_rate": 3.199029126213592e-05, "loss": 0.1155, "step": 3710 }, { "epoch": 35.97951807228916, "grad_norm": 12.726161003112793, "learning_rate": 3.1985436893203885e-05, "loss": 0.2544, "step": 3711 }, { "epoch": 35.98915662650602, "grad_norm": 8.83637809753418, "learning_rate": 3.198058252427184e-05, "loss": 0.3537, "step": 3712 }, { "epoch": 36.00481927710843, "grad_norm": 35.85462188720703, "learning_rate": 3.197572815533981e-05, "loss": 0.3667, "step": 3713 }, { "epoch": 36.0144578313253, "grad_norm": 4.676975250244141, "learning_rate": 3.1970873786407765e-05, "loss": 0.1972, "step": 3714 }, { "epoch": 36.024096385542165, "grad_norm": 3.525930881500244, "learning_rate": 3.196601941747573e-05, "loss": 0.1731, "step": 3715 }, { "epoch": 36.033734939759036, "grad_norm": 11.19109058380127, "learning_rate": 3.196116504854369e-05, "loss": 0.3742, "step": 3716 }, { "epoch": 36.043373493975906, "grad_norm": 12.026206016540527, "learning_rate": 3.195631067961165e-05, "loss": 0.2851, "step": 3717 }, { "epoch": 36.05301204819277, "grad_norm": 4.1349077224731445, "learning_rate": 3.195145631067961e-05, "loss": 0.1602, "step": 3718 }, { "epoch": 36.06265060240964, "grad_norm": 8.437797546386719, "learning_rate": 3.1946601941747575e-05, "loss": 0.1551, "step": 3719 }, { "epoch": 36.0722891566265, "grad_norm": 2.202714443206787, "learning_rate": 3.194174757281553e-05, "loss": 0.1534, "step": 3720 }, { "epoch": 36.081927710843374, "grad_norm": 6.089652061462402, "learning_rate": 3.19368932038835e-05, "loss": 0.3984, "step": 3721 }, { "epoch": 36.091566265060244, "grad_norm": 1.4001100063323975, "learning_rate": 3.1932038834951455e-05, "loss": 0.2129, "step": 3722 }, { "epoch": 36.10120481927711, "grad_norm": 16.434917449951172, "learning_rate": 3.192718446601942e-05, "loss": 0.2666, "step": 3723 }, { "epoch": 36.11084337349398, "grad_norm": 4.518642902374268, "learning_rate": 3.1922330097087384e-05, "loss": 0.2231, "step": 3724 }, { "epoch": 36.12048192771084, "grad_norm": 22.614028930664062, "learning_rate": 3.191747572815534e-05, "loss": 0.2558, "step": 3725 }, { "epoch": 36.13012048192771, "grad_norm": 12.586929321289062, "learning_rate": 3.1912621359223306e-05, "loss": 0.1998, "step": 3726 }, { "epoch": 36.139759036144575, "grad_norm": 3.612849473953247, "learning_rate": 3.1907766990291264e-05, "loss": 0.1525, "step": 3727 }, { "epoch": 36.149397590361446, "grad_norm": 10.464812278747559, "learning_rate": 3.190291262135923e-05, "loss": 0.3724, "step": 3728 }, { "epoch": 36.159036144578316, "grad_norm": 15.314769744873047, "learning_rate": 3.1898058252427186e-05, "loss": 0.3575, "step": 3729 }, { "epoch": 36.16867469879518, "grad_norm": 3.8517489433288574, "learning_rate": 3.189320388349515e-05, "loss": 0.1759, "step": 3730 }, { "epoch": 36.17831325301205, "grad_norm": 5.173516750335693, "learning_rate": 3.188834951456311e-05, "loss": 0.5923, "step": 3731 }, { "epoch": 36.18795180722891, "grad_norm": 4.246673583984375, "learning_rate": 3.188349514563107e-05, "loss": 0.3699, "step": 3732 }, { "epoch": 36.19759036144578, "grad_norm": 2.16977858543396, "learning_rate": 3.187864077669903e-05, "loss": 0.0991, "step": 3733 }, { "epoch": 36.207228915662654, "grad_norm": 14.567904472351074, "learning_rate": 3.1873786407766995e-05, "loss": 0.2754, "step": 3734 }, { "epoch": 36.21686746987952, "grad_norm": 10.769712448120117, "learning_rate": 3.186893203883495e-05, "loss": 0.2249, "step": 3735 }, { "epoch": 36.22650602409639, "grad_norm": 3.9768898487091064, "learning_rate": 3.186407766990292e-05, "loss": 0.1518, "step": 3736 }, { "epoch": 36.23614457831325, "grad_norm": 3.8788866996765137, "learning_rate": 3.1859223300970875e-05, "loss": 0.1583, "step": 3737 }, { "epoch": 36.24578313253012, "grad_norm": 3.8397064208984375, "learning_rate": 3.185436893203883e-05, "loss": 0.4974, "step": 3738 }, { "epoch": 36.255421686746985, "grad_norm": 8.4107027053833, "learning_rate": 3.18495145631068e-05, "loss": 0.3237, "step": 3739 }, { "epoch": 36.265060240963855, "grad_norm": 10.172269821166992, "learning_rate": 3.1844660194174755e-05, "loss": 0.1406, "step": 3740 }, { "epoch": 36.274698795180726, "grad_norm": 9.459904670715332, "learning_rate": 3.183980582524272e-05, "loss": 0.3947, "step": 3741 }, { "epoch": 36.28433734939759, "grad_norm": 5.009860038757324, "learning_rate": 3.183495145631068e-05, "loss": 0.2579, "step": 3742 }, { "epoch": 36.29397590361446, "grad_norm": 3.997122049331665, "learning_rate": 3.183009708737864e-05, "loss": 0.1424, "step": 3743 }, { "epoch": 36.30361445783132, "grad_norm": 4.212774276733398, "learning_rate": 3.18252427184466e-05, "loss": 0.5173, "step": 3744 }, { "epoch": 36.31325301204819, "grad_norm": 4.1593732833862305, "learning_rate": 3.1820388349514564e-05, "loss": 0.2611, "step": 3745 }, { "epoch": 36.32289156626506, "grad_norm": 3.0739567279815674, "learning_rate": 3.181553398058252e-05, "loss": 0.2444, "step": 3746 }, { "epoch": 36.33253012048193, "grad_norm": 15.879332542419434, "learning_rate": 3.181067961165049e-05, "loss": 0.2298, "step": 3747 }, { "epoch": 36.3421686746988, "grad_norm": 6.573129177093506, "learning_rate": 3.1805825242718444e-05, "loss": 0.2001, "step": 3748 }, { "epoch": 36.35180722891566, "grad_norm": 13.983720779418945, "learning_rate": 3.180097087378641e-05, "loss": 0.3088, "step": 3749 }, { "epoch": 36.36144578313253, "grad_norm": 12.194174766540527, "learning_rate": 3.1796116504854373e-05, "loss": 0.2776, "step": 3750 }, { "epoch": 36.371084337349394, "grad_norm": 2.747573137283325, "learning_rate": 3.179126213592233e-05, "loss": 0.2029, "step": 3751 }, { "epoch": 36.380722891566265, "grad_norm": 19.438112258911133, "learning_rate": 3.1786407766990296e-05, "loss": 0.2477, "step": 3752 }, { "epoch": 36.390361445783135, "grad_norm": 4.263461589813232, "learning_rate": 3.1781553398058253e-05, "loss": 0.3021, "step": 3753 }, { "epoch": 36.4, "grad_norm": 6.443726539611816, "learning_rate": 3.177669902912622e-05, "loss": 0.2183, "step": 3754 }, { "epoch": 36.40963855421687, "grad_norm": 11.17184829711914, "learning_rate": 3.1771844660194176e-05, "loss": 0.3287, "step": 3755 }, { "epoch": 36.41927710843373, "grad_norm": 13.234028816223145, "learning_rate": 3.176699029126214e-05, "loss": 0.2185, "step": 3756 }, { "epoch": 36.4289156626506, "grad_norm": 2.8170814514160156, "learning_rate": 3.17621359223301e-05, "loss": 0.2376, "step": 3757 }, { "epoch": 36.43855421686747, "grad_norm": 2.9969470500946045, "learning_rate": 3.175728155339806e-05, "loss": 0.3392, "step": 3758 }, { "epoch": 36.44819277108434, "grad_norm": 8.265358924865723, "learning_rate": 3.175242718446602e-05, "loss": 0.1489, "step": 3759 }, { "epoch": 36.45783132530121, "grad_norm": 11.252638816833496, "learning_rate": 3.1747572815533985e-05, "loss": 0.2361, "step": 3760 }, { "epoch": 36.46746987951807, "grad_norm": 7.054643154144287, "learning_rate": 3.174271844660194e-05, "loss": 0.3736, "step": 3761 }, { "epoch": 36.47710843373494, "grad_norm": 1.4251213073730469, "learning_rate": 3.173786407766991e-05, "loss": 0.1107, "step": 3762 }, { "epoch": 36.486746987951804, "grad_norm": 4.22744083404541, "learning_rate": 3.1733009708737865e-05, "loss": 0.2209, "step": 3763 }, { "epoch": 36.496385542168674, "grad_norm": 13.711938858032227, "learning_rate": 3.172815533980583e-05, "loss": 0.21, "step": 3764 }, { "epoch": 36.506024096385545, "grad_norm": 3.050912857055664, "learning_rate": 3.172330097087379e-05, "loss": 0.0709, "step": 3765 }, { "epoch": 36.51566265060241, "grad_norm": 5.52048921585083, "learning_rate": 3.1718446601941745e-05, "loss": 0.3704, "step": 3766 }, { "epoch": 36.52530120481928, "grad_norm": 3.866006374359131, "learning_rate": 3.171359223300971e-05, "loss": 0.2175, "step": 3767 }, { "epoch": 36.53493975903614, "grad_norm": 9.322967529296875, "learning_rate": 3.170873786407767e-05, "loss": 0.1715, "step": 3768 }, { "epoch": 36.54457831325301, "grad_norm": 2.4305741786956787, "learning_rate": 3.170388349514563e-05, "loss": 0.2666, "step": 3769 }, { "epoch": 36.55421686746988, "grad_norm": 3.569823980331421, "learning_rate": 3.169902912621359e-05, "loss": 0.3179, "step": 3770 }, { "epoch": 36.563855421686746, "grad_norm": 2.164686918258667, "learning_rate": 3.1694174757281554e-05, "loss": 0.1494, "step": 3771 }, { "epoch": 36.57349397590362, "grad_norm": 16.118152618408203, "learning_rate": 3.168932038834951e-05, "loss": 0.5709, "step": 3772 }, { "epoch": 36.58313253012048, "grad_norm": 5.8048906326293945, "learning_rate": 3.1684466019417476e-05, "loss": 0.2793, "step": 3773 }, { "epoch": 36.59277108433735, "grad_norm": 7.845499515533447, "learning_rate": 3.1679611650485434e-05, "loss": 0.2669, "step": 3774 }, { "epoch": 36.602409638554214, "grad_norm": 9.30047607421875, "learning_rate": 3.1674757281553405e-05, "loss": 0.4015, "step": 3775 }, { "epoch": 36.612048192771084, "grad_norm": 7.768826007843018, "learning_rate": 3.166990291262136e-05, "loss": 0.521, "step": 3776 }, { "epoch": 36.621686746987955, "grad_norm": 9.230484008789062, "learning_rate": 3.166504854368932e-05, "loss": 0.2629, "step": 3777 }, { "epoch": 36.63132530120482, "grad_norm": 4.751834869384766, "learning_rate": 3.1660194174757286e-05, "loss": 0.2073, "step": 3778 }, { "epoch": 36.64096385542169, "grad_norm": 13.256014823913574, "learning_rate": 3.165533980582524e-05, "loss": 0.3856, "step": 3779 }, { "epoch": 36.65060240963855, "grad_norm": 4.395449161529541, "learning_rate": 3.165048543689321e-05, "loss": 0.1288, "step": 3780 }, { "epoch": 36.66024096385542, "grad_norm": 16.726058959960938, "learning_rate": 3.1645631067961166e-05, "loss": 0.1718, "step": 3781 }, { "epoch": 36.66987951807229, "grad_norm": 3.9865121841430664, "learning_rate": 3.164077669902913e-05, "loss": 0.1323, "step": 3782 }, { "epoch": 36.679518072289156, "grad_norm": 3.687438488006592, "learning_rate": 3.163592233009709e-05, "loss": 0.217, "step": 3783 }, { "epoch": 36.689156626506026, "grad_norm": 3.0163025856018066, "learning_rate": 3.163106796116505e-05, "loss": 0.3478, "step": 3784 }, { "epoch": 36.69879518072289, "grad_norm": 37.7343864440918, "learning_rate": 3.162621359223301e-05, "loss": 0.392, "step": 3785 }, { "epoch": 36.70843373493976, "grad_norm": 3.0956907272338867, "learning_rate": 3.1621359223300975e-05, "loss": 0.2157, "step": 3786 }, { "epoch": 36.71807228915662, "grad_norm": 16.06728744506836, "learning_rate": 3.161650485436893e-05, "loss": 0.3052, "step": 3787 }, { "epoch": 36.727710843373494, "grad_norm": 1.6391276121139526, "learning_rate": 3.16116504854369e-05, "loss": 0.2576, "step": 3788 }, { "epoch": 36.737349397590364, "grad_norm": 9.701706886291504, "learning_rate": 3.1606796116504855e-05, "loss": 0.3522, "step": 3789 }, { "epoch": 36.74698795180723, "grad_norm": 6.645168304443359, "learning_rate": 3.160194174757282e-05, "loss": 0.3066, "step": 3790 }, { "epoch": 36.7566265060241, "grad_norm": 9.953474998474121, "learning_rate": 3.159708737864078e-05, "loss": 0.2079, "step": 3791 }, { "epoch": 36.76626506024096, "grad_norm": 20.40345573425293, "learning_rate": 3.159223300970874e-05, "loss": 0.2219, "step": 3792 }, { "epoch": 36.77590361445783, "grad_norm": 4.172938823699951, "learning_rate": 3.15873786407767e-05, "loss": 0.1692, "step": 3793 }, { "epoch": 36.7855421686747, "grad_norm": 8.586750984191895, "learning_rate": 3.158252427184466e-05, "loss": 0.1029, "step": 3794 }, { "epoch": 36.795180722891565, "grad_norm": 1.9913654327392578, "learning_rate": 3.157766990291262e-05, "loss": 0.1373, "step": 3795 }, { "epoch": 36.804819277108436, "grad_norm": 6.135474681854248, "learning_rate": 3.157281553398058e-05, "loss": 0.1465, "step": 3796 }, { "epoch": 36.8144578313253, "grad_norm": 2.9748709201812744, "learning_rate": 3.1567961165048544e-05, "loss": 0.2485, "step": 3797 }, { "epoch": 36.82409638554217, "grad_norm": 1.9424171447753906, "learning_rate": 3.15631067961165e-05, "loss": 0.1939, "step": 3798 }, { "epoch": 36.83373493975903, "grad_norm": 7.775018215179443, "learning_rate": 3.1558252427184466e-05, "loss": 0.3517, "step": 3799 }, { "epoch": 36.8433734939759, "grad_norm": 15.68675708770752, "learning_rate": 3.155339805825243e-05, "loss": 0.2905, "step": 3800 }, { "epoch": 36.853012048192774, "grad_norm": 31.048860549926758, "learning_rate": 3.1548543689320395e-05, "loss": 0.1882, "step": 3801 }, { "epoch": 36.86265060240964, "grad_norm": 4.049633502960205, "learning_rate": 3.154368932038835e-05, "loss": 0.2356, "step": 3802 }, { "epoch": 36.87228915662651, "grad_norm": 1.7718031406402588, "learning_rate": 3.153883495145631e-05, "loss": 0.2838, "step": 3803 }, { "epoch": 36.88192771084337, "grad_norm": 6.98295783996582, "learning_rate": 3.1533980582524275e-05, "loss": 0.4375, "step": 3804 }, { "epoch": 36.89156626506024, "grad_norm": 14.414008140563965, "learning_rate": 3.152912621359223e-05, "loss": 0.3756, "step": 3805 }, { "epoch": 36.90120481927711, "grad_norm": 14.505987167358398, "learning_rate": 3.15242718446602e-05, "loss": 0.2691, "step": 3806 }, { "epoch": 36.910843373493975, "grad_norm": 21.50482940673828, "learning_rate": 3.1519417475728155e-05, "loss": 0.3742, "step": 3807 }, { "epoch": 36.920481927710846, "grad_norm": 23.78220558166504, "learning_rate": 3.151456310679612e-05, "loss": 0.1933, "step": 3808 }, { "epoch": 36.93012048192771, "grad_norm": 5.527628421783447, "learning_rate": 3.150970873786408e-05, "loss": 0.4275, "step": 3809 }, { "epoch": 36.93975903614458, "grad_norm": 2.696761131286621, "learning_rate": 3.150485436893204e-05, "loss": 0.2896, "step": 3810 }, { "epoch": 36.94939759036144, "grad_norm": 15.782647132873535, "learning_rate": 3.15e-05, "loss": 0.3049, "step": 3811 }, { "epoch": 36.95903614457831, "grad_norm": 2.1207642555236816, "learning_rate": 3.1495145631067964e-05, "loss": 0.1602, "step": 3812 }, { "epoch": 36.96867469879518, "grad_norm": 2.966310739517212, "learning_rate": 3.149029126213592e-05, "loss": 0.2308, "step": 3813 }, { "epoch": 36.97831325301205, "grad_norm": 8.751526832580566, "learning_rate": 3.148543689320389e-05, "loss": 0.3726, "step": 3814 }, { "epoch": 36.98795180722892, "grad_norm": 15.376397132873535, "learning_rate": 3.1480582524271845e-05, "loss": 0.3134, "step": 3815 }, { "epoch": 37.003614457831326, "grad_norm": 10.627942085266113, "learning_rate": 3.147572815533981e-05, "loss": 0.269, "step": 3816 }, { "epoch": 37.013253012048196, "grad_norm": 16.010854721069336, "learning_rate": 3.147087378640777e-05, "loss": 0.431, "step": 3817 }, { "epoch": 37.02289156626506, "grad_norm": 3.1558051109313965, "learning_rate": 3.146601941747573e-05, "loss": 0.205, "step": 3818 }, { "epoch": 37.03253012048193, "grad_norm": 3.285747766494751, "learning_rate": 3.146116504854369e-05, "loss": 0.2742, "step": 3819 }, { "epoch": 37.04216867469879, "grad_norm": 11.615933418273926, "learning_rate": 3.145631067961165e-05, "loss": 0.4358, "step": 3820 }, { "epoch": 37.05180722891566, "grad_norm": 2.198326587677002, "learning_rate": 3.145145631067961e-05, "loss": 0.2873, "step": 3821 }, { "epoch": 37.06144578313253, "grad_norm": 8.184213638305664, "learning_rate": 3.144660194174757e-05, "loss": 0.1872, "step": 3822 }, { "epoch": 37.0710843373494, "grad_norm": 25.972320556640625, "learning_rate": 3.1441747572815534e-05, "loss": 0.4467, "step": 3823 }, { "epoch": 37.08072289156627, "grad_norm": 5.271875858306885, "learning_rate": 3.143689320388349e-05, "loss": 0.3956, "step": 3824 }, { "epoch": 37.09036144578313, "grad_norm": 7.763338565826416, "learning_rate": 3.143203883495146e-05, "loss": 0.4438, "step": 3825 }, { "epoch": 37.1, "grad_norm": 7.200943946838379, "learning_rate": 3.142718446601942e-05, "loss": 0.2779, "step": 3826 }, { "epoch": 37.109638554216865, "grad_norm": 4.492236137390137, "learning_rate": 3.1422330097087385e-05, "loss": 0.37, "step": 3827 }, { "epoch": 37.119277108433735, "grad_norm": 3.0471749305725098, "learning_rate": 3.141747572815534e-05, "loss": 0.2116, "step": 3828 }, { "epoch": 37.128915662650606, "grad_norm": 10.21556568145752, "learning_rate": 3.141262135922331e-05, "loss": 0.2529, "step": 3829 }, { "epoch": 37.13855421686747, "grad_norm": 2.2933523654937744, "learning_rate": 3.1407766990291265e-05, "loss": 0.1528, "step": 3830 }, { "epoch": 37.14819277108434, "grad_norm": 8.051271438598633, "learning_rate": 3.140291262135922e-05, "loss": 0.1092, "step": 3831 }, { "epoch": 37.1578313253012, "grad_norm": 3.911621570587158, "learning_rate": 3.139805825242719e-05, "loss": 0.262, "step": 3832 }, { "epoch": 37.16746987951807, "grad_norm": 3.507220506668091, "learning_rate": 3.1393203883495145e-05, "loss": 0.2777, "step": 3833 }, { "epoch": 37.17710843373494, "grad_norm": 12.707159996032715, "learning_rate": 3.138834951456311e-05, "loss": 0.158, "step": 3834 }, { "epoch": 37.18674698795181, "grad_norm": 3.0656561851501465, "learning_rate": 3.138349514563107e-05, "loss": 0.2174, "step": 3835 }, { "epoch": 37.19638554216868, "grad_norm": 6.974880695343018, "learning_rate": 3.137864077669903e-05, "loss": 0.3165, "step": 3836 }, { "epoch": 37.20602409638554, "grad_norm": 4.3949151039123535, "learning_rate": 3.137378640776699e-05, "loss": 0.1493, "step": 3837 }, { "epoch": 37.21566265060241, "grad_norm": 12.37830924987793, "learning_rate": 3.1368932038834954e-05, "loss": 0.3097, "step": 3838 }, { "epoch": 37.225301204819274, "grad_norm": 5.631002902984619, "learning_rate": 3.136407766990291e-05, "loss": 0.3251, "step": 3839 }, { "epoch": 37.234939759036145, "grad_norm": 9.104805946350098, "learning_rate": 3.1359223300970877e-05, "loss": 0.1945, "step": 3840 }, { "epoch": 37.244578313253015, "grad_norm": 3.0536723136901855, "learning_rate": 3.1354368932038834e-05, "loss": 0.2084, "step": 3841 }, { "epoch": 37.25421686746988, "grad_norm": 2.7247583866119385, "learning_rate": 3.13495145631068e-05, "loss": 0.1545, "step": 3842 }, { "epoch": 37.26385542168675, "grad_norm": 3.6751537322998047, "learning_rate": 3.134466019417476e-05, "loss": 0.1948, "step": 3843 }, { "epoch": 37.27349397590361, "grad_norm": 5.672779560089111, "learning_rate": 3.133980582524272e-05, "loss": 0.3845, "step": 3844 }, { "epoch": 37.28313253012048, "grad_norm": 5.136353492736816, "learning_rate": 3.133495145631068e-05, "loss": 0.2885, "step": 3845 }, { "epoch": 37.292771084337346, "grad_norm": 5.4902191162109375, "learning_rate": 3.1330097087378643e-05, "loss": 0.3098, "step": 3846 }, { "epoch": 37.30240963855422, "grad_norm": 10.800734519958496, "learning_rate": 3.13252427184466e-05, "loss": 0.3534, "step": 3847 }, { "epoch": 37.31204819277109, "grad_norm": 28.25017738342285, "learning_rate": 3.132038834951456e-05, "loss": 0.5558, "step": 3848 }, { "epoch": 37.32168674698795, "grad_norm": 5.259549617767334, "learning_rate": 3.1315533980582523e-05, "loss": 0.2057, "step": 3849 }, { "epoch": 37.33132530120482, "grad_norm": 9.753128051757812, "learning_rate": 3.131067961165049e-05, "loss": 0.2202, "step": 3850 }, { "epoch": 37.340963855421684, "grad_norm": 8.342279434204102, "learning_rate": 3.130582524271845e-05, "loss": 0.2033, "step": 3851 }, { "epoch": 37.350602409638554, "grad_norm": 12.003661155700684, "learning_rate": 3.130097087378641e-05, "loss": 0.2711, "step": 3852 }, { "epoch": 37.360240963855425, "grad_norm": 2.2236580848693848, "learning_rate": 3.1296116504854375e-05, "loss": 0.1935, "step": 3853 }, { "epoch": 37.36987951807229, "grad_norm": 6.555978298187256, "learning_rate": 3.129126213592233e-05, "loss": 0.2081, "step": 3854 }, { "epoch": 37.37951807228916, "grad_norm": 7.097365379333496, "learning_rate": 3.12864077669903e-05, "loss": 0.1946, "step": 3855 }, { "epoch": 37.38915662650602, "grad_norm": 10.61046028137207, "learning_rate": 3.1281553398058255e-05, "loss": 0.0996, "step": 3856 }, { "epoch": 37.39879518072289, "grad_norm": 2.6893880367279053, "learning_rate": 3.127669902912622e-05, "loss": 0.0787, "step": 3857 }, { "epoch": 37.408433734939756, "grad_norm": 5.125006675720215, "learning_rate": 3.127184466019418e-05, "loss": 0.3712, "step": 3858 }, { "epoch": 37.418072289156626, "grad_norm": 12.385262489318848, "learning_rate": 3.1266990291262135e-05, "loss": 0.2151, "step": 3859 }, { "epoch": 37.4277108433735, "grad_norm": 6.247659206390381, "learning_rate": 3.12621359223301e-05, "loss": 0.3005, "step": 3860 }, { "epoch": 37.43734939759036, "grad_norm": 3.6352155208587646, "learning_rate": 3.125728155339806e-05, "loss": 0.2504, "step": 3861 }, { "epoch": 37.44698795180723, "grad_norm": 5.756077766418457, "learning_rate": 3.125242718446602e-05, "loss": 0.3526, "step": 3862 }, { "epoch": 37.456626506024094, "grad_norm": 7.264728546142578, "learning_rate": 3.124757281553398e-05, "loss": 0.2613, "step": 3863 }, { "epoch": 37.466265060240964, "grad_norm": 9.491683006286621, "learning_rate": 3.1242718446601944e-05, "loss": 0.2657, "step": 3864 }, { "epoch": 37.475903614457835, "grad_norm": 8.332192420959473, "learning_rate": 3.12378640776699e-05, "loss": 0.2557, "step": 3865 }, { "epoch": 37.4855421686747, "grad_norm": 11.177675247192383, "learning_rate": 3.1233009708737866e-05, "loss": 0.1676, "step": 3866 }, { "epoch": 37.49518072289157, "grad_norm": 3.6235172748565674, "learning_rate": 3.1228155339805824e-05, "loss": 0.3427, "step": 3867 }, { "epoch": 37.50481927710843, "grad_norm": 11.241495132446289, "learning_rate": 3.122330097087379e-05, "loss": 0.2905, "step": 3868 }, { "epoch": 37.5144578313253, "grad_norm": 1.9603030681610107, "learning_rate": 3.1218446601941746e-05, "loss": 0.2447, "step": 3869 }, { "epoch": 37.524096385542165, "grad_norm": 4.896584987640381, "learning_rate": 3.121359223300971e-05, "loss": 0.3471, "step": 3870 }, { "epoch": 37.533734939759036, "grad_norm": 13.590662956237793, "learning_rate": 3.120873786407767e-05, "loss": 0.3208, "step": 3871 }, { "epoch": 37.543373493975906, "grad_norm": 9.826837539672852, "learning_rate": 3.120388349514563e-05, "loss": 0.1975, "step": 3872 }, { "epoch": 37.55301204819277, "grad_norm": 1.602063536643982, "learning_rate": 3.119902912621359e-05, "loss": 0.0832, "step": 3873 }, { "epoch": 37.56265060240964, "grad_norm": 1.8930798768997192, "learning_rate": 3.1194174757281556e-05, "loss": 0.1697, "step": 3874 }, { "epoch": 37.5722891566265, "grad_norm": 7.5269551277160645, "learning_rate": 3.118932038834951e-05, "loss": 0.2752, "step": 3875 }, { "epoch": 37.581927710843374, "grad_norm": 2.545400619506836, "learning_rate": 3.118446601941748e-05, "loss": 0.2161, "step": 3876 }, { "epoch": 37.591566265060244, "grad_norm": 3.1533663272857666, "learning_rate": 3.117961165048544e-05, "loss": 0.2295, "step": 3877 }, { "epoch": 37.60120481927711, "grad_norm": 6.9843645095825195, "learning_rate": 3.11747572815534e-05, "loss": 0.3669, "step": 3878 }, { "epoch": 37.61084337349398, "grad_norm": 4.989954471588135, "learning_rate": 3.1169902912621365e-05, "loss": 0.1498, "step": 3879 }, { "epoch": 37.62048192771084, "grad_norm": 5.635016918182373, "learning_rate": 3.116504854368932e-05, "loss": 0.3244, "step": 3880 }, { "epoch": 37.63012048192771, "grad_norm": 2.413867235183716, "learning_rate": 3.116019417475729e-05, "loss": 0.0769, "step": 3881 }, { "epoch": 37.639759036144575, "grad_norm": 21.424959182739258, "learning_rate": 3.1155339805825245e-05, "loss": 0.3152, "step": 3882 }, { "epoch": 37.649397590361446, "grad_norm": 2.573258638381958, "learning_rate": 3.115048543689321e-05, "loss": 0.1862, "step": 3883 }, { "epoch": 37.659036144578316, "grad_norm": 3.4245097637176514, "learning_rate": 3.114563106796117e-05, "loss": 0.2098, "step": 3884 }, { "epoch": 37.66867469879518, "grad_norm": 13.215850830078125, "learning_rate": 3.114077669902913e-05, "loss": 0.3831, "step": 3885 }, { "epoch": 37.67831325301205, "grad_norm": 2.8825955390930176, "learning_rate": 3.113592233009709e-05, "loss": 0.3144, "step": 3886 }, { "epoch": 37.68795180722891, "grad_norm": 6.536292552947998, "learning_rate": 3.113106796116505e-05, "loss": 0.2544, "step": 3887 }, { "epoch": 37.69759036144578, "grad_norm": 7.062137126922607, "learning_rate": 3.112621359223301e-05, "loss": 0.1451, "step": 3888 }, { "epoch": 37.707228915662654, "grad_norm": 23.084630966186523, "learning_rate": 3.112135922330097e-05, "loss": 0.2578, "step": 3889 }, { "epoch": 37.71686746987952, "grad_norm": 5.959013938903809, "learning_rate": 3.1116504854368934e-05, "loss": 0.1262, "step": 3890 }, { "epoch": 37.72650602409639, "grad_norm": 7.817070484161377, "learning_rate": 3.111165048543689e-05, "loss": 0.2534, "step": 3891 }, { "epoch": 37.73614457831325, "grad_norm": 13.293998718261719, "learning_rate": 3.1106796116504856e-05, "loss": 0.154, "step": 3892 }, { "epoch": 37.74578313253012, "grad_norm": 3.795100450515747, "learning_rate": 3.1101941747572814e-05, "loss": 0.3787, "step": 3893 }, { "epoch": 37.755421686746985, "grad_norm": 3.44169282913208, "learning_rate": 3.109708737864078e-05, "loss": 0.1685, "step": 3894 }, { "epoch": 37.765060240963855, "grad_norm": 2.162357807159424, "learning_rate": 3.1092233009708736e-05, "loss": 0.0951, "step": 3895 }, { "epoch": 37.774698795180726, "grad_norm": 3.6864705085754395, "learning_rate": 3.10873786407767e-05, "loss": 0.2345, "step": 3896 }, { "epoch": 37.78433734939759, "grad_norm": 4.337101459503174, "learning_rate": 3.108252427184466e-05, "loss": 0.3212, "step": 3897 }, { "epoch": 37.79397590361446, "grad_norm": 12.592705726623535, "learning_rate": 3.107766990291262e-05, "loss": 0.289, "step": 3898 }, { "epoch": 37.80361445783132, "grad_norm": 4.792547225952148, "learning_rate": 3.107281553398058e-05, "loss": 0.2611, "step": 3899 }, { "epoch": 37.81325301204819, "grad_norm": 4.100082874298096, "learning_rate": 3.1067961165048545e-05, "loss": 0.11, "step": 3900 }, { "epoch": 37.82289156626506, "grad_norm": 4.801461219787598, "learning_rate": 3.106310679611651e-05, "loss": 0.3289, "step": 3901 }, { "epoch": 37.83253012048193, "grad_norm": 25.216045379638672, "learning_rate": 3.105825242718447e-05, "loss": 0.3084, "step": 3902 }, { "epoch": 37.8421686746988, "grad_norm": 18.121726989746094, "learning_rate": 3.105339805825243e-05, "loss": 0.3163, "step": 3903 }, { "epoch": 37.85180722891566, "grad_norm": 6.139388084411621, "learning_rate": 3.104854368932039e-05, "loss": 0.2863, "step": 3904 }, { "epoch": 37.86144578313253, "grad_norm": 1.534302830696106, "learning_rate": 3.1043689320388354e-05, "loss": 0.3505, "step": 3905 }, { "epoch": 37.871084337349394, "grad_norm": 2.680280923843384, "learning_rate": 3.103883495145631e-05, "loss": 0.2393, "step": 3906 }, { "epoch": 37.880722891566265, "grad_norm": 7.417754650115967, "learning_rate": 3.103398058252428e-05, "loss": 0.293, "step": 3907 }, { "epoch": 37.890361445783135, "grad_norm": 25.137027740478516, "learning_rate": 3.1029126213592234e-05, "loss": 0.3154, "step": 3908 }, { "epoch": 37.9, "grad_norm": 5.425989627838135, "learning_rate": 3.10242718446602e-05, "loss": 0.1163, "step": 3909 }, { "epoch": 37.90963855421687, "grad_norm": 3.5210416316986084, "learning_rate": 3.101941747572816e-05, "loss": 0.3556, "step": 3910 }, { "epoch": 37.91927710843373, "grad_norm": 12.173245429992676, "learning_rate": 3.101456310679612e-05, "loss": 0.1379, "step": 3911 }, { "epoch": 37.9289156626506, "grad_norm": 14.88095474243164, "learning_rate": 3.100970873786408e-05, "loss": 0.416, "step": 3912 }, { "epoch": 37.93855421686747, "grad_norm": 4.122864723205566, "learning_rate": 3.1004854368932044e-05, "loss": 0.2667, "step": 3913 }, { "epoch": 37.94819277108434, "grad_norm": 5.530112266540527, "learning_rate": 3.1e-05, "loss": 0.2122, "step": 3914 }, { "epoch": 37.95783132530121, "grad_norm": 4.952751636505127, "learning_rate": 3.099514563106796e-05, "loss": 0.4283, "step": 3915 }, { "epoch": 37.96746987951807, "grad_norm": 18.269929885864258, "learning_rate": 3.0990291262135924e-05, "loss": 0.3881, "step": 3916 }, { "epoch": 37.97710843373494, "grad_norm": 5.754243850708008, "learning_rate": 3.098543689320388e-05, "loss": 0.1145, "step": 3917 }, { "epoch": 37.986746987951804, "grad_norm": 5.294280529022217, "learning_rate": 3.0980582524271846e-05, "loss": 0.4736, "step": 3918 }, { "epoch": 38.00240963855422, "grad_norm": 3.8858609199523926, "learning_rate": 3.0975728155339804e-05, "loss": 0.2825, "step": 3919 }, { "epoch": 38.01204819277108, "grad_norm": 5.963010787963867, "learning_rate": 3.097087378640777e-05, "loss": 0.1514, "step": 3920 }, { "epoch": 38.02168674698795, "grad_norm": 6.756595134735107, "learning_rate": 3.0966019417475726e-05, "loss": 0.3611, "step": 3921 }, { "epoch": 38.03132530120482, "grad_norm": 6.079927921295166, "learning_rate": 3.096116504854369e-05, "loss": 0.2501, "step": 3922 }, { "epoch": 38.04096385542169, "grad_norm": 10.319441795349121, "learning_rate": 3.095631067961165e-05, "loss": 0.262, "step": 3923 }, { "epoch": 38.05060240963856, "grad_norm": 5.960400104522705, "learning_rate": 3.095145631067961e-05, "loss": 0.2666, "step": 3924 }, { "epoch": 38.06024096385542, "grad_norm": 4.151772975921631, "learning_rate": 3.094660194174757e-05, "loss": 0.3554, "step": 3925 }, { "epoch": 38.06987951807229, "grad_norm": 12.483469009399414, "learning_rate": 3.0941747572815535e-05, "loss": 0.266, "step": 3926 }, { "epoch": 38.079518072289154, "grad_norm": 14.609049797058105, "learning_rate": 3.09368932038835e-05, "loss": 0.228, "step": 3927 }, { "epoch": 38.089156626506025, "grad_norm": 4.661649227142334, "learning_rate": 3.093203883495146e-05, "loss": 0.1828, "step": 3928 }, { "epoch": 38.09879518072289, "grad_norm": 3.375347852706909, "learning_rate": 3.092718446601942e-05, "loss": 0.3202, "step": 3929 }, { "epoch": 38.10843373493976, "grad_norm": 7.022006511688232, "learning_rate": 3.092233009708738e-05, "loss": 0.3648, "step": 3930 }, { "epoch": 38.11807228915663, "grad_norm": 7.771312713623047, "learning_rate": 3.0917475728155344e-05, "loss": 0.1384, "step": 3931 }, { "epoch": 38.12771084337349, "grad_norm": 3.730710029602051, "learning_rate": 3.09126213592233e-05, "loss": 0.2391, "step": 3932 }, { "epoch": 38.13734939759036, "grad_norm": 5.846885681152344, "learning_rate": 3.0907766990291267e-05, "loss": 0.2292, "step": 3933 }, { "epoch": 38.146987951807226, "grad_norm": 4.644069671630859, "learning_rate": 3.0902912621359224e-05, "loss": 0.2918, "step": 3934 }, { "epoch": 38.1566265060241, "grad_norm": 2.493501901626587, "learning_rate": 3.089805825242719e-05, "loss": 0.2085, "step": 3935 }, { "epoch": 38.16626506024097, "grad_norm": 3.0838747024536133, "learning_rate": 3.0893203883495147e-05, "loss": 0.2106, "step": 3936 }, { "epoch": 38.17590361445783, "grad_norm": 21.431665420532227, "learning_rate": 3.088834951456311e-05, "loss": 0.1425, "step": 3937 }, { "epoch": 38.1855421686747, "grad_norm": 7.6214141845703125, "learning_rate": 3.088349514563107e-05, "loss": 0.2869, "step": 3938 }, { "epoch": 38.195180722891564, "grad_norm": 15.615657806396484, "learning_rate": 3.087864077669903e-05, "loss": 0.235, "step": 3939 }, { "epoch": 38.204819277108435, "grad_norm": 3.6799824237823486, "learning_rate": 3.087378640776699e-05, "loss": 0.2006, "step": 3940 }, { "epoch": 38.2144578313253, "grad_norm": 8.76290512084961, "learning_rate": 3.0868932038834956e-05, "loss": 0.2223, "step": 3941 }, { "epoch": 38.22409638554217, "grad_norm": 4.574057579040527, "learning_rate": 3.0864077669902913e-05, "loss": 0.2583, "step": 3942 }, { "epoch": 38.23373493975904, "grad_norm": 4.633878231048584, "learning_rate": 3.085922330097087e-05, "loss": 0.328, "step": 3943 }, { "epoch": 38.2433734939759, "grad_norm": 3.2829782962799072, "learning_rate": 3.0854368932038836e-05, "loss": 0.1751, "step": 3944 }, { "epoch": 38.25301204819277, "grad_norm": 22.77923011779785, "learning_rate": 3.0849514563106793e-05, "loss": 0.272, "step": 3945 }, { "epoch": 38.262650602409636, "grad_norm": 3.250244379043579, "learning_rate": 3.084466019417476e-05, "loss": 0.1684, "step": 3946 }, { "epoch": 38.272289156626506, "grad_norm": 2.4442968368530273, "learning_rate": 3.0839805825242716e-05, "loss": 0.1429, "step": 3947 }, { "epoch": 38.28192771084338, "grad_norm": 3.393639326095581, "learning_rate": 3.083495145631068e-05, "loss": 0.2125, "step": 3948 }, { "epoch": 38.29156626506024, "grad_norm": 6.133279800415039, "learning_rate": 3.083009708737864e-05, "loss": 0.2754, "step": 3949 }, { "epoch": 38.30120481927711, "grad_norm": 3.3670644760131836, "learning_rate": 3.08252427184466e-05, "loss": 0.2232, "step": 3950 }, { "epoch": 38.310843373493974, "grad_norm": 2.5417141914367676, "learning_rate": 3.082038834951457e-05, "loss": 0.2878, "step": 3951 }, { "epoch": 38.320481927710844, "grad_norm": 2.9880313873291016, "learning_rate": 3.0815533980582525e-05, "loss": 0.1436, "step": 3952 }, { "epoch": 38.33012048192771, "grad_norm": 5.01869010925293, "learning_rate": 3.081067961165049e-05, "loss": 0.139, "step": 3953 }, { "epoch": 38.33975903614458, "grad_norm": 4.318636417388916, "learning_rate": 3.080582524271845e-05, "loss": 0.3556, "step": 3954 }, { "epoch": 38.34939759036145, "grad_norm": 7.222469329833984, "learning_rate": 3.080097087378641e-05, "loss": 0.1495, "step": 3955 }, { "epoch": 38.35903614457831, "grad_norm": 5.508211612701416, "learning_rate": 3.079611650485437e-05, "loss": 0.2319, "step": 3956 }, { "epoch": 38.36867469879518, "grad_norm": 9.116722106933594, "learning_rate": 3.0791262135922334e-05, "loss": 0.3141, "step": 3957 }, { "epoch": 38.378313253012045, "grad_norm": 7.46067476272583, "learning_rate": 3.078640776699029e-05, "loss": 0.2681, "step": 3958 }, { "epoch": 38.387951807228916, "grad_norm": 5.291431427001953, "learning_rate": 3.0781553398058256e-05, "loss": 0.3414, "step": 3959 }, { "epoch": 38.397590361445786, "grad_norm": 11.220166206359863, "learning_rate": 3.0776699029126214e-05, "loss": 0.3578, "step": 3960 }, { "epoch": 38.40722891566265, "grad_norm": 9.210210800170898, "learning_rate": 3.077184466019418e-05, "loss": 0.2857, "step": 3961 }, { "epoch": 38.41686746987952, "grad_norm": 19.788515090942383, "learning_rate": 3.0766990291262136e-05, "loss": 0.2409, "step": 3962 }, { "epoch": 38.42650602409638, "grad_norm": 2.9568121433258057, "learning_rate": 3.07621359223301e-05, "loss": 0.1931, "step": 3963 }, { "epoch": 38.436144578313254, "grad_norm": 10.260224342346191, "learning_rate": 3.075728155339806e-05, "loss": 0.2757, "step": 3964 }, { "epoch": 38.44578313253012, "grad_norm": 7.880948066711426, "learning_rate": 3.075242718446602e-05, "loss": 0.2864, "step": 3965 }, { "epoch": 38.45542168674699, "grad_norm": 5.447768688201904, "learning_rate": 3.074757281553398e-05, "loss": 0.1642, "step": 3966 }, { "epoch": 38.46506024096386, "grad_norm": 12.318564414978027, "learning_rate": 3.0742718446601945e-05, "loss": 0.224, "step": 3967 }, { "epoch": 38.47469879518072, "grad_norm": 3.2538270950317383, "learning_rate": 3.07378640776699e-05, "loss": 0.1957, "step": 3968 }, { "epoch": 38.48433734939759, "grad_norm": 4.03075647354126, "learning_rate": 3.073300970873786e-05, "loss": 0.2229, "step": 3969 }, { "epoch": 38.493975903614455, "grad_norm": 4.7980217933654785, "learning_rate": 3.0728155339805826e-05, "loss": 0.2222, "step": 3970 }, { "epoch": 38.503614457831326, "grad_norm": 5.081109046936035, "learning_rate": 3.072330097087378e-05, "loss": 0.2414, "step": 3971 }, { "epoch": 38.513253012048196, "grad_norm": 6.727880477905273, "learning_rate": 3.071844660194175e-05, "loss": 0.4133, "step": 3972 }, { "epoch": 38.52289156626506, "grad_norm": 7.113897323608398, "learning_rate": 3.0713592233009706e-05, "loss": 0.3175, "step": 3973 }, { "epoch": 38.53253012048193, "grad_norm": 3.7795965671539307, "learning_rate": 3.070873786407767e-05, "loss": 0.2266, "step": 3974 }, { "epoch": 38.54216867469879, "grad_norm": 3.473268508911133, "learning_rate": 3.070388349514563e-05, "loss": 0.317, "step": 3975 }, { "epoch": 38.55180722891566, "grad_norm": 3.770070791244507, "learning_rate": 3.069902912621359e-05, "loss": 0.4231, "step": 3976 }, { "epoch": 38.56144578313253, "grad_norm": 10.6962251663208, "learning_rate": 3.069417475728156e-05, "loss": 0.478, "step": 3977 }, { "epoch": 38.5710843373494, "grad_norm": 4.161340713500977, "learning_rate": 3.068932038834952e-05, "loss": 0.2617, "step": 3978 }, { "epoch": 38.58072289156627, "grad_norm": 4.496932029724121, "learning_rate": 3.068446601941748e-05, "loss": 0.2853, "step": 3979 }, { "epoch": 38.59036144578313, "grad_norm": 3.7554798126220703, "learning_rate": 3.067961165048544e-05, "loss": 0.304, "step": 3980 }, { "epoch": 38.6, "grad_norm": 4.364859104156494, "learning_rate": 3.06747572815534e-05, "loss": 0.2332, "step": 3981 }, { "epoch": 38.609638554216865, "grad_norm": 6.057568073272705, "learning_rate": 3.066990291262136e-05, "loss": 0.3034, "step": 3982 }, { "epoch": 38.619277108433735, "grad_norm": 4.446816444396973, "learning_rate": 3.0665048543689324e-05, "loss": 0.3044, "step": 3983 }, { "epoch": 38.628915662650606, "grad_norm": 10.874717712402344, "learning_rate": 3.066019417475728e-05, "loss": 0.299, "step": 3984 }, { "epoch": 38.63855421686747, "grad_norm": 4.280611991882324, "learning_rate": 3.0655339805825246e-05, "loss": 0.2676, "step": 3985 }, { "epoch": 38.64819277108434, "grad_norm": 10.643095970153809, "learning_rate": 3.0650485436893204e-05, "loss": 0.1746, "step": 3986 }, { "epoch": 38.6578313253012, "grad_norm": 7.459696292877197, "learning_rate": 3.064563106796117e-05, "loss": 0.297, "step": 3987 }, { "epoch": 38.66746987951807, "grad_norm": 4.524436950683594, "learning_rate": 3.0640776699029126e-05, "loss": 0.2475, "step": 3988 }, { "epoch": 38.67710843373494, "grad_norm": 3.2997119426727295, "learning_rate": 3.063592233009709e-05, "loss": 0.2301, "step": 3989 }, { "epoch": 38.68674698795181, "grad_norm": 7.351307392120361, "learning_rate": 3.063106796116505e-05, "loss": 0.2843, "step": 3990 }, { "epoch": 38.69638554216868, "grad_norm": 29.56266212463379, "learning_rate": 3.062621359223301e-05, "loss": 0.1633, "step": 3991 }, { "epoch": 38.70602409638554, "grad_norm": 4.634955406188965, "learning_rate": 3.062135922330097e-05, "loss": 0.2343, "step": 3992 }, { "epoch": 38.71566265060241, "grad_norm": 4.6370158195495605, "learning_rate": 3.0616504854368935e-05, "loss": 0.4941, "step": 3993 }, { "epoch": 38.725301204819274, "grad_norm": 5.379044055938721, "learning_rate": 3.061165048543689e-05, "loss": 0.1727, "step": 3994 }, { "epoch": 38.734939759036145, "grad_norm": 10.278278350830078, "learning_rate": 3.060679611650486e-05, "loss": 0.1196, "step": 3995 }, { "epoch": 38.744578313253015, "grad_norm": 2.181342124938965, "learning_rate": 3.0601941747572815e-05, "loss": 0.0922, "step": 3996 }, { "epoch": 38.75421686746988, "grad_norm": 4.931771278381348, "learning_rate": 3.059708737864077e-05, "loss": 0.2745, "step": 3997 }, { "epoch": 38.76385542168675, "grad_norm": 3.276455879211426, "learning_rate": 3.059223300970874e-05, "loss": 0.1173, "step": 3998 }, { "epoch": 38.77349397590361, "grad_norm": 3.423940420150757, "learning_rate": 3.0587378640776695e-05, "loss": 0.3286, "step": 3999 }, { "epoch": 38.78313253012048, "grad_norm": 4.773199081420898, "learning_rate": 3.058252427184466e-05, "loss": 0.3024, "step": 4000 }, { "epoch": 38.792771084337346, "grad_norm": 3.5485951900482178, "learning_rate": 3.057766990291262e-05, "loss": 0.0863, "step": 4001 }, { "epoch": 38.80240963855422, "grad_norm": 25.224401473999023, "learning_rate": 3.057281553398059e-05, "loss": 0.4494, "step": 4002 }, { "epoch": 38.81204819277109, "grad_norm": 6.772531986236572, "learning_rate": 3.056796116504855e-05, "loss": 0.1902, "step": 4003 }, { "epoch": 38.82168674698795, "grad_norm": 2.527569532394409, "learning_rate": 3.056310679611651e-05, "loss": 0.1382, "step": 4004 }, { "epoch": 38.83132530120482, "grad_norm": 5.143944263458252, "learning_rate": 3.055825242718447e-05, "loss": 0.1284, "step": 4005 }, { "epoch": 38.840963855421684, "grad_norm": 5.961953639984131, "learning_rate": 3.0553398058252434e-05, "loss": 0.1919, "step": 4006 }, { "epoch": 38.850602409638554, "grad_norm": 10.757652282714844, "learning_rate": 3.054854368932039e-05, "loss": 0.2068, "step": 4007 }, { "epoch": 38.860240963855425, "grad_norm": 2.733447790145874, "learning_rate": 3.054368932038835e-05, "loss": 0.2329, "step": 4008 }, { "epoch": 38.86987951807229, "grad_norm": 6.0213727951049805, "learning_rate": 3.0538834951456314e-05, "loss": 0.3391, "step": 4009 }, { "epoch": 38.87951807228916, "grad_norm": 4.426414489746094, "learning_rate": 3.053398058252427e-05, "loss": 0.3046, "step": 4010 }, { "epoch": 38.88915662650602, "grad_norm": 1.5477880239486694, "learning_rate": 3.0529126213592236e-05, "loss": 0.1617, "step": 4011 }, { "epoch": 38.89879518072289, "grad_norm": 12.1758394241333, "learning_rate": 3.0524271844660194e-05, "loss": 0.2584, "step": 4012 }, { "epoch": 38.908433734939756, "grad_norm": 6.031268119812012, "learning_rate": 3.051941747572816e-05, "loss": 0.1845, "step": 4013 }, { "epoch": 38.918072289156626, "grad_norm": 1.4333187341690063, "learning_rate": 3.051456310679612e-05, "loss": 0.2252, "step": 4014 }, { "epoch": 38.9277108433735, "grad_norm": 2.6939444541931152, "learning_rate": 3.050970873786408e-05, "loss": 0.1423, "step": 4015 }, { "epoch": 38.93734939759036, "grad_norm": 4.1183013916015625, "learning_rate": 3.0504854368932038e-05, "loss": 0.2314, "step": 4016 }, { "epoch": 38.94698795180723, "grad_norm": 5.202914714813232, "learning_rate": 3.05e-05, "loss": 0.2728, "step": 4017 }, { "epoch": 38.956626506024094, "grad_norm": 4.63014554977417, "learning_rate": 3.049514563106796e-05, "loss": 0.2106, "step": 4018 }, { "epoch": 38.966265060240964, "grad_norm": 10.3475341796875, "learning_rate": 3.049029126213592e-05, "loss": 0.1988, "step": 4019 }, { "epoch": 38.975903614457835, "grad_norm": 2.8752281665802, "learning_rate": 3.0485436893203883e-05, "loss": 0.183, "step": 4020 }, { "epoch": 38.9855421686747, "grad_norm": 2.016409397125244, "learning_rate": 3.0480582524271844e-05, "loss": 0.1347, "step": 4021 }, { "epoch": 39.001204819277106, "grad_norm": 6.271273136138916, "learning_rate": 3.0475728155339805e-05, "loss": 0.5119, "step": 4022 }, { "epoch": 39.01084337349398, "grad_norm": 4.890040397644043, "learning_rate": 3.0470873786407766e-05, "loss": 0.39, "step": 4023 }, { "epoch": 39.02048192771084, "grad_norm": 5.865295886993408, "learning_rate": 3.0466019417475727e-05, "loss": 0.3058, "step": 4024 }, { "epoch": 39.03012048192771, "grad_norm": 4.118648529052734, "learning_rate": 3.046116504854369e-05, "loss": 0.3061, "step": 4025 }, { "epoch": 39.03975903614458, "grad_norm": 7.413031578063965, "learning_rate": 3.045631067961165e-05, "loss": 0.2217, "step": 4026 }, { "epoch": 39.049397590361444, "grad_norm": 4.625168800354004, "learning_rate": 3.0451456310679614e-05, "loss": 0.2274, "step": 4027 }, { "epoch": 39.059036144578315, "grad_norm": 2.764777660369873, "learning_rate": 3.0446601941747575e-05, "loss": 0.1532, "step": 4028 }, { "epoch": 39.06867469879518, "grad_norm": 3.7978899478912354, "learning_rate": 3.0441747572815536e-05, "loss": 0.1539, "step": 4029 }, { "epoch": 39.07831325301205, "grad_norm": 5.700887680053711, "learning_rate": 3.0436893203883498e-05, "loss": 0.3009, "step": 4030 }, { "epoch": 39.08795180722892, "grad_norm": 7.191398620605469, "learning_rate": 3.043203883495146e-05, "loss": 0.2586, "step": 4031 }, { "epoch": 39.09759036144578, "grad_norm": 4.174710750579834, "learning_rate": 3.042718446601942e-05, "loss": 0.2942, "step": 4032 }, { "epoch": 39.10722891566265, "grad_norm": 3.5997729301452637, "learning_rate": 3.042233009708738e-05, "loss": 0.1884, "step": 4033 }, { "epoch": 39.116867469879516, "grad_norm": 2.4958715438842773, "learning_rate": 3.0417475728155342e-05, "loss": 0.1352, "step": 4034 }, { "epoch": 39.126506024096386, "grad_norm": 4.499833106994629, "learning_rate": 3.0412621359223303e-05, "loss": 0.1651, "step": 4035 }, { "epoch": 39.13614457831325, "grad_norm": 3.0012130737304688, "learning_rate": 3.0407766990291265e-05, "loss": 0.2989, "step": 4036 }, { "epoch": 39.14578313253012, "grad_norm": 3.8780057430267334, "learning_rate": 3.0402912621359226e-05, "loss": 0.3061, "step": 4037 }, { "epoch": 39.15542168674699, "grad_norm": 5.351174831390381, "learning_rate": 3.0398058252427187e-05, "loss": 0.1181, "step": 4038 }, { "epoch": 39.165060240963854, "grad_norm": 3.434410333633423, "learning_rate": 3.0393203883495148e-05, "loss": 0.1376, "step": 4039 }, { "epoch": 39.174698795180724, "grad_norm": 3.736614465713501, "learning_rate": 3.038834951456311e-05, "loss": 0.4704, "step": 4040 }, { "epoch": 39.18433734939759, "grad_norm": 5.482179641723633, "learning_rate": 3.038349514563107e-05, "loss": 0.2347, "step": 4041 }, { "epoch": 39.19397590361446, "grad_norm": 4.426230430603027, "learning_rate": 3.037864077669903e-05, "loss": 0.344, "step": 4042 }, { "epoch": 39.20361445783133, "grad_norm": 4.87262487411499, "learning_rate": 3.0373786407766993e-05, "loss": 0.2739, "step": 4043 }, { "epoch": 39.21325301204819, "grad_norm": 5.922795295715332, "learning_rate": 3.036893203883495e-05, "loss": 0.3575, "step": 4044 }, { "epoch": 39.22289156626506, "grad_norm": 6.736359119415283, "learning_rate": 3.036407766990291e-05, "loss": 0.2011, "step": 4045 }, { "epoch": 39.232530120481925, "grad_norm": 11.896530151367188, "learning_rate": 3.0359223300970873e-05, "loss": 0.2205, "step": 4046 }, { "epoch": 39.242168674698796, "grad_norm": 2.496805191040039, "learning_rate": 3.0354368932038834e-05, "loss": 0.1972, "step": 4047 }, { "epoch": 39.25180722891566, "grad_norm": 7.680017948150635, "learning_rate": 3.0349514563106795e-05, "loss": 0.3139, "step": 4048 }, { "epoch": 39.26144578313253, "grad_norm": 3.916003942489624, "learning_rate": 3.0344660194174756e-05, "loss": 0.226, "step": 4049 }, { "epoch": 39.2710843373494, "grad_norm": 2.9188458919525146, "learning_rate": 3.0339805825242717e-05, "loss": 0.2992, "step": 4050 }, { "epoch": 39.28072289156626, "grad_norm": 11.432263374328613, "learning_rate": 3.033495145631068e-05, "loss": 0.2255, "step": 4051 }, { "epoch": 39.290361445783134, "grad_norm": 12.667027473449707, "learning_rate": 3.0330097087378646e-05, "loss": 0.242, "step": 4052 }, { "epoch": 39.3, "grad_norm": 6.315504550933838, "learning_rate": 3.0325242718446607e-05, "loss": 0.3563, "step": 4053 }, { "epoch": 39.30963855421687, "grad_norm": 3.6832990646362305, "learning_rate": 3.032038834951457e-05, "loss": 0.2943, "step": 4054 }, { "epoch": 39.31927710843374, "grad_norm": 16.609317779541016, "learning_rate": 3.0315533980582526e-05, "loss": 0.2599, "step": 4055 }, { "epoch": 39.3289156626506, "grad_norm": 4.070610523223877, "learning_rate": 3.0310679611650487e-05, "loss": 0.2744, "step": 4056 }, { "epoch": 39.33855421686747, "grad_norm": 5.208132266998291, "learning_rate": 3.030582524271845e-05, "loss": 0.402, "step": 4057 }, { "epoch": 39.348192771084335, "grad_norm": 6.0660319328308105, "learning_rate": 3.030097087378641e-05, "loss": 0.236, "step": 4058 }, { "epoch": 39.357831325301206, "grad_norm": 7.344162464141846, "learning_rate": 3.029611650485437e-05, "loss": 0.336, "step": 4059 }, { "epoch": 39.36746987951807, "grad_norm": 4.176175117492676, "learning_rate": 3.0291262135922332e-05, "loss": 0.2772, "step": 4060 }, { "epoch": 39.37710843373494, "grad_norm": 3.6549577713012695, "learning_rate": 3.0286407766990293e-05, "loss": 0.1683, "step": 4061 }, { "epoch": 39.38674698795181, "grad_norm": 3.8160226345062256, "learning_rate": 3.0281553398058254e-05, "loss": 0.3167, "step": 4062 }, { "epoch": 39.39638554216867, "grad_norm": 2.2843971252441406, "learning_rate": 3.0276699029126215e-05, "loss": 0.1547, "step": 4063 }, { "epoch": 39.40602409638554, "grad_norm": 8.07514476776123, "learning_rate": 3.0271844660194177e-05, "loss": 0.2634, "step": 4064 }, { "epoch": 39.41566265060241, "grad_norm": 4.684165000915527, "learning_rate": 3.0266990291262138e-05, "loss": 0.2045, "step": 4065 }, { "epoch": 39.42530120481928, "grad_norm": 4.205076217651367, "learning_rate": 3.02621359223301e-05, "loss": 0.2027, "step": 4066 }, { "epoch": 39.43493975903615, "grad_norm": 4.478663444519043, "learning_rate": 3.025728155339806e-05, "loss": 0.2502, "step": 4067 }, { "epoch": 39.44457831325301, "grad_norm": 3.801018476486206, "learning_rate": 3.025242718446602e-05, "loss": 0.1465, "step": 4068 }, { "epoch": 39.45421686746988, "grad_norm": 4.578624248504639, "learning_rate": 3.0247572815533982e-05, "loss": 0.223, "step": 4069 }, { "epoch": 39.463855421686745, "grad_norm": 3.953554153442383, "learning_rate": 3.0242718446601943e-05, "loss": 0.163, "step": 4070 }, { "epoch": 39.473493975903615, "grad_norm": 5.953367710113525, "learning_rate": 3.02378640776699e-05, "loss": 0.3987, "step": 4071 }, { "epoch": 39.48313253012048, "grad_norm": 2.952125072479248, "learning_rate": 3.0233009708737862e-05, "loss": 0.1836, "step": 4072 }, { "epoch": 39.49277108433735, "grad_norm": 3.4436981678009033, "learning_rate": 3.0228155339805824e-05, "loss": 0.3385, "step": 4073 }, { "epoch": 39.50240963855422, "grad_norm": 5.565181255340576, "learning_rate": 3.0223300970873785e-05, "loss": 0.1257, "step": 4074 }, { "epoch": 39.51204819277108, "grad_norm": 5.012477874755859, "learning_rate": 3.0218446601941746e-05, "loss": 0.2643, "step": 4075 }, { "epoch": 39.52168674698795, "grad_norm": 6.702556610107422, "learning_rate": 3.0213592233009707e-05, "loss": 0.2337, "step": 4076 }, { "epoch": 39.53132530120482, "grad_norm": 2.17336368560791, "learning_rate": 3.0208737864077668e-05, "loss": 0.1839, "step": 4077 }, { "epoch": 39.54096385542169, "grad_norm": 3.488110065460205, "learning_rate": 3.0203883495145636e-05, "loss": 0.2987, "step": 4078 }, { "epoch": 39.55060240963856, "grad_norm": 1.224576711654663, "learning_rate": 3.0199029126213597e-05, "loss": 0.0789, "step": 4079 }, { "epoch": 39.56024096385542, "grad_norm": 7.1903252601623535, "learning_rate": 3.019417475728156e-05, "loss": 0.2647, "step": 4080 }, { "epoch": 39.56987951807229, "grad_norm": 5.668027400970459, "learning_rate": 3.018932038834952e-05, "loss": 0.1755, "step": 4081 }, { "epoch": 39.579518072289154, "grad_norm": 2.5999958515167236, "learning_rate": 3.0184466019417477e-05, "loss": 0.1838, "step": 4082 }, { "epoch": 39.589156626506025, "grad_norm": 3.620694637298584, "learning_rate": 3.017961165048544e-05, "loss": 0.1708, "step": 4083 }, { "epoch": 39.59879518072289, "grad_norm": 3.9319562911987305, "learning_rate": 3.01747572815534e-05, "loss": 0.4007, "step": 4084 }, { "epoch": 39.60843373493976, "grad_norm": 3.839329957962036, "learning_rate": 3.016990291262136e-05, "loss": 0.2878, "step": 4085 }, { "epoch": 39.61807228915663, "grad_norm": 5.886047840118408, "learning_rate": 3.0165048543689322e-05, "loss": 0.5051, "step": 4086 }, { "epoch": 39.62771084337349, "grad_norm": 3.87821364402771, "learning_rate": 3.0160194174757283e-05, "loss": 0.3172, "step": 4087 }, { "epoch": 39.63734939759036, "grad_norm": 5.159091472625732, "learning_rate": 3.0155339805825244e-05, "loss": 0.2073, "step": 4088 }, { "epoch": 39.646987951807226, "grad_norm": 8.821125984191895, "learning_rate": 3.0150485436893205e-05, "loss": 0.2229, "step": 4089 }, { "epoch": 39.6566265060241, "grad_norm": 4.687078952789307, "learning_rate": 3.0145631067961166e-05, "loss": 0.2594, "step": 4090 }, { "epoch": 39.66626506024097, "grad_norm": 5.100292682647705, "learning_rate": 3.0140776699029128e-05, "loss": 0.3009, "step": 4091 }, { "epoch": 39.67590361445783, "grad_norm": 5.1001667976379395, "learning_rate": 3.013592233009709e-05, "loss": 0.27, "step": 4092 }, { "epoch": 39.6855421686747, "grad_norm": 4.822018146514893, "learning_rate": 3.013106796116505e-05, "loss": 0.3146, "step": 4093 }, { "epoch": 39.695180722891564, "grad_norm": 4.2090301513671875, "learning_rate": 3.012621359223301e-05, "loss": 0.2517, "step": 4094 }, { "epoch": 39.704819277108435, "grad_norm": 3.6826887130737305, "learning_rate": 3.0121359223300972e-05, "loss": 0.1523, "step": 4095 }, { "epoch": 39.7144578313253, "grad_norm": 3.7137508392333984, "learning_rate": 3.0116504854368933e-05, "loss": 0.3302, "step": 4096 }, { "epoch": 39.72409638554217, "grad_norm": 3.422055959701538, "learning_rate": 3.0111650485436894e-05, "loss": 0.1453, "step": 4097 }, { "epoch": 39.73373493975904, "grad_norm": 2.747610569000244, "learning_rate": 3.0106796116504856e-05, "loss": 0.2999, "step": 4098 }, { "epoch": 39.7433734939759, "grad_norm": 11.096556663513184, "learning_rate": 3.0101941747572813e-05, "loss": 0.3079, "step": 4099 }, { "epoch": 39.75301204819277, "grad_norm": 4.159609317779541, "learning_rate": 3.0097087378640774e-05, "loss": 0.2802, "step": 4100 }, { "epoch": 39.762650602409636, "grad_norm": 3.484647274017334, "learning_rate": 3.0092233009708736e-05, "loss": 0.1731, "step": 4101 }, { "epoch": 39.772289156626506, "grad_norm": 5.51072883605957, "learning_rate": 3.0087378640776697e-05, "loss": 0.2383, "step": 4102 }, { "epoch": 39.78192771084338, "grad_norm": 3.2389333248138428, "learning_rate": 3.0082524271844665e-05, "loss": 0.278, "step": 4103 }, { "epoch": 39.79156626506024, "grad_norm": 5.572632789611816, "learning_rate": 3.0077669902912626e-05, "loss": 0.2268, "step": 4104 }, { "epoch": 39.80120481927711, "grad_norm": 8.769523620605469, "learning_rate": 3.0072815533980587e-05, "loss": 0.2865, "step": 4105 }, { "epoch": 39.810843373493974, "grad_norm": 4.210314750671387, "learning_rate": 3.0067961165048548e-05, "loss": 0.207, "step": 4106 }, { "epoch": 39.820481927710844, "grad_norm": 6.188048839569092, "learning_rate": 3.006310679611651e-05, "loss": 0.2247, "step": 4107 }, { "epoch": 39.83012048192771, "grad_norm": 6.711730480194092, "learning_rate": 3.005825242718447e-05, "loss": 0.2644, "step": 4108 }, { "epoch": 39.83975903614458, "grad_norm": 4.85742712020874, "learning_rate": 3.005339805825243e-05, "loss": 0.2084, "step": 4109 }, { "epoch": 39.84939759036145, "grad_norm": 4.63380241394043, "learning_rate": 3.004854368932039e-05, "loss": 0.2918, "step": 4110 }, { "epoch": 39.85903614457831, "grad_norm": 3.2598884105682373, "learning_rate": 3.004368932038835e-05, "loss": 0.254, "step": 4111 }, { "epoch": 39.86867469879518, "grad_norm": 4.43694543838501, "learning_rate": 3.003883495145631e-05, "loss": 0.3736, "step": 4112 }, { "epoch": 39.878313253012045, "grad_norm": 4.0922017097473145, "learning_rate": 3.0033980582524273e-05, "loss": 0.1756, "step": 4113 }, { "epoch": 39.887951807228916, "grad_norm": 3.1272425651550293, "learning_rate": 3.0029126213592234e-05, "loss": 0.2021, "step": 4114 }, { "epoch": 39.897590361445786, "grad_norm": 4.297846794128418, "learning_rate": 3.0024271844660195e-05, "loss": 0.3177, "step": 4115 }, { "epoch": 39.90722891566265, "grad_norm": 6.299269199371338, "learning_rate": 3.0019417475728156e-05, "loss": 0.351, "step": 4116 }, { "epoch": 39.91686746987952, "grad_norm": 3.455357789993286, "learning_rate": 3.0014563106796117e-05, "loss": 0.2163, "step": 4117 }, { "epoch": 39.92650602409638, "grad_norm": 3.9645748138427734, "learning_rate": 3.000970873786408e-05, "loss": 0.2035, "step": 4118 }, { "epoch": 39.936144578313254, "grad_norm": 8.263472557067871, "learning_rate": 3.000485436893204e-05, "loss": 0.328, "step": 4119 }, { "epoch": 39.94578313253012, "grad_norm": 2.8580055236816406, "learning_rate": 3e-05, "loss": 0.2711, "step": 4120 }, { "epoch": 39.95542168674699, "grad_norm": 12.15593433380127, "learning_rate": 2.9995145631067962e-05, "loss": 0.2731, "step": 4121 }, { "epoch": 39.96506024096386, "grad_norm": 5.181646823883057, "learning_rate": 2.9990291262135923e-05, "loss": 0.4269, "step": 4122 }, { "epoch": 39.97469879518072, "grad_norm": 3.7152657508850098, "learning_rate": 2.9985436893203884e-05, "loss": 0.3863, "step": 4123 }, { "epoch": 39.98433734939759, "grad_norm": 10.386741638183594, "learning_rate": 2.9980582524271845e-05, "loss": 0.271, "step": 4124 }, { "epoch": 39.993975903614455, "grad_norm": 2.4698281288146973, "learning_rate": 2.9975728155339806e-05, "loss": 0.2332, "step": 4125 }, { "epoch": 40.00963855421687, "grad_norm": 7.690333843231201, "learning_rate": 2.9970873786407768e-05, "loss": 0.4907, "step": 4126 }, { "epoch": 40.019277108433734, "grad_norm": 5.02026891708374, "learning_rate": 2.9966019417475725e-05, "loss": 0.1287, "step": 4127 }, { "epoch": 40.028915662650604, "grad_norm": 6.744338512420654, "learning_rate": 2.9961165048543693e-05, "loss": 0.2878, "step": 4128 }, { "epoch": 40.03855421686747, "grad_norm": 3.1378531455993652, "learning_rate": 2.9956310679611654e-05, "loss": 0.1958, "step": 4129 }, { "epoch": 40.04819277108434, "grad_norm": 5.502594470977783, "learning_rate": 2.9951456310679616e-05, "loss": 0.2692, "step": 4130 }, { "epoch": 40.0578313253012, "grad_norm": 1.6503684520721436, "learning_rate": 2.9946601941747577e-05, "loss": 0.0848, "step": 4131 }, { "epoch": 40.06746987951807, "grad_norm": 3.5665977001190186, "learning_rate": 2.9941747572815538e-05, "loss": 0.154, "step": 4132 }, { "epoch": 40.07710843373494, "grad_norm": 3.484203577041626, "learning_rate": 2.99368932038835e-05, "loss": 0.1119, "step": 4133 }, { "epoch": 40.086746987951805, "grad_norm": 5.826859474182129, "learning_rate": 2.993203883495146e-05, "loss": 0.3596, "step": 4134 }, { "epoch": 40.096385542168676, "grad_norm": 4.753464221954346, "learning_rate": 2.992718446601942e-05, "loss": 0.3459, "step": 4135 }, { "epoch": 40.10602409638554, "grad_norm": 4.2151618003845215, "learning_rate": 2.9922330097087382e-05, "loss": 0.2866, "step": 4136 }, { "epoch": 40.11566265060241, "grad_norm": 4.343163013458252, "learning_rate": 2.9917475728155344e-05, "loss": 0.2174, "step": 4137 }, { "epoch": 40.12530120481928, "grad_norm": 3.5240910053253174, "learning_rate": 2.99126213592233e-05, "loss": 0.2527, "step": 4138 }, { "epoch": 40.13493975903614, "grad_norm": 4.6212358474731445, "learning_rate": 2.9907766990291263e-05, "loss": 0.2226, "step": 4139 }, { "epoch": 40.144578313253014, "grad_norm": 2.523941993713379, "learning_rate": 2.9902912621359224e-05, "loss": 0.097, "step": 4140 }, { "epoch": 40.15421686746988, "grad_norm": 5.364457607269287, "learning_rate": 2.9898058252427185e-05, "loss": 0.219, "step": 4141 }, { "epoch": 40.16385542168675, "grad_norm": 4.689367294311523, "learning_rate": 2.9893203883495146e-05, "loss": 0.2195, "step": 4142 }, { "epoch": 40.17349397590361, "grad_norm": 6.915067195892334, "learning_rate": 2.9888349514563107e-05, "loss": 0.3939, "step": 4143 }, { "epoch": 40.18313253012048, "grad_norm": 3.9664206504821777, "learning_rate": 2.9883495145631068e-05, "loss": 0.3635, "step": 4144 }, { "epoch": 40.19277108433735, "grad_norm": 3.1817946434020996, "learning_rate": 2.987864077669903e-05, "loss": 0.133, "step": 4145 }, { "epoch": 40.202409638554215, "grad_norm": 6.971929550170898, "learning_rate": 2.987378640776699e-05, "loss": 0.4337, "step": 4146 }, { "epoch": 40.212048192771086, "grad_norm": 15.610319137573242, "learning_rate": 2.9868932038834952e-05, "loss": 0.2673, "step": 4147 }, { "epoch": 40.22168674698795, "grad_norm": 4.09124755859375, "learning_rate": 2.9864077669902913e-05, "loss": 0.1152, "step": 4148 }, { "epoch": 40.23132530120482, "grad_norm": 3.359135627746582, "learning_rate": 2.9859223300970874e-05, "loss": 0.1208, "step": 4149 }, { "epoch": 40.24096385542169, "grad_norm": 8.064532279968262, "learning_rate": 2.9854368932038835e-05, "loss": 0.2331, "step": 4150 }, { "epoch": 40.25060240963855, "grad_norm": 3.2561776638031006, "learning_rate": 2.9849514563106796e-05, "loss": 0.1939, "step": 4151 }, { "epoch": 40.26024096385542, "grad_norm": 6.5562543869018555, "learning_rate": 2.9844660194174757e-05, "loss": 0.4892, "step": 4152 }, { "epoch": 40.26987951807229, "grad_norm": 3.826486349105835, "learning_rate": 2.9839805825242722e-05, "loss": 0.413, "step": 4153 }, { "epoch": 40.27951807228916, "grad_norm": 5.588705062866211, "learning_rate": 2.9834951456310683e-05, "loss": 0.3251, "step": 4154 }, { "epoch": 40.28915662650602, "grad_norm": 16.469789505004883, "learning_rate": 2.9830097087378644e-05, "loss": 0.354, "step": 4155 }, { "epoch": 40.29879518072289, "grad_norm": 5.1657023429870605, "learning_rate": 2.9825242718446605e-05, "loss": 0.4576, "step": 4156 }, { "epoch": 40.30843373493976, "grad_norm": 11.335503578186035, "learning_rate": 2.9820388349514567e-05, "loss": 0.2641, "step": 4157 }, { "epoch": 40.318072289156625, "grad_norm": 4.969460487365723, "learning_rate": 2.9815533980582528e-05, "loss": 0.1901, "step": 4158 }, { "epoch": 40.327710843373495, "grad_norm": 7.547362327575684, "learning_rate": 2.981067961165049e-05, "loss": 0.3118, "step": 4159 }, { "epoch": 40.33734939759036, "grad_norm": 5.5698561668396, "learning_rate": 2.980582524271845e-05, "loss": 0.299, "step": 4160 }, { "epoch": 40.34698795180723, "grad_norm": 2.1964433193206787, "learning_rate": 2.980097087378641e-05, "loss": 0.0907, "step": 4161 }, { "epoch": 40.3566265060241, "grad_norm": 7.146617412567139, "learning_rate": 2.9796116504854372e-05, "loss": 0.3166, "step": 4162 }, { "epoch": 40.36626506024096, "grad_norm": 6.2252912521362305, "learning_rate": 2.9791262135922333e-05, "loss": 0.2806, "step": 4163 }, { "epoch": 40.37590361445783, "grad_norm": 3.861011266708374, "learning_rate": 2.9786407766990295e-05, "loss": 0.2007, "step": 4164 }, { "epoch": 40.3855421686747, "grad_norm": 4.364879608154297, "learning_rate": 2.9781553398058252e-05, "loss": 0.25, "step": 4165 }, { "epoch": 40.39518072289157, "grad_norm": 3.272373914718628, "learning_rate": 2.9776699029126213e-05, "loss": 0.1647, "step": 4166 }, { "epoch": 40.40481927710843, "grad_norm": 2.62929630279541, "learning_rate": 2.9771844660194175e-05, "loss": 0.0966, "step": 4167 }, { "epoch": 40.4144578313253, "grad_norm": 4.140336990356445, "learning_rate": 2.9766990291262136e-05, "loss": 0.2108, "step": 4168 }, { "epoch": 40.42409638554217, "grad_norm": 5.996729373931885, "learning_rate": 2.9762135922330097e-05, "loss": 0.2754, "step": 4169 }, { "epoch": 40.433734939759034, "grad_norm": 8.785558700561523, "learning_rate": 2.9757281553398058e-05, "loss": 0.2117, "step": 4170 }, { "epoch": 40.443373493975905, "grad_norm": 7.8787641525268555, "learning_rate": 2.975242718446602e-05, "loss": 0.2538, "step": 4171 }, { "epoch": 40.45301204819277, "grad_norm": 8.457393646240234, "learning_rate": 2.974757281553398e-05, "loss": 0.3187, "step": 4172 }, { "epoch": 40.46265060240964, "grad_norm": 4.185866355895996, "learning_rate": 2.974271844660194e-05, "loss": 0.2813, "step": 4173 }, { "epoch": 40.47228915662651, "grad_norm": 4.247594833374023, "learning_rate": 2.9737864077669903e-05, "loss": 0.299, "step": 4174 }, { "epoch": 40.48192771084337, "grad_norm": 4.037968158721924, "learning_rate": 2.9733009708737864e-05, "loss": 0.2857, "step": 4175 }, { "epoch": 40.49156626506024, "grad_norm": 5.496686935424805, "learning_rate": 2.9728155339805825e-05, "loss": 0.313, "step": 4176 }, { "epoch": 40.501204819277106, "grad_norm": 12.96766185760498, "learning_rate": 2.9723300970873786e-05, "loss": 0.2242, "step": 4177 }, { "epoch": 40.51084337349398, "grad_norm": 4.813643932342529, "learning_rate": 2.9718446601941747e-05, "loss": 0.1453, "step": 4178 }, { "epoch": 40.52048192771084, "grad_norm": 6.238147258758545, "learning_rate": 2.9713592233009712e-05, "loss": 0.2412, "step": 4179 }, { "epoch": 40.53012048192771, "grad_norm": 7.598882675170898, "learning_rate": 2.9708737864077673e-05, "loss": 0.258, "step": 4180 }, { "epoch": 40.53975903614458, "grad_norm": 3.9463489055633545, "learning_rate": 2.9703883495145634e-05, "loss": 0.3665, "step": 4181 }, { "epoch": 40.549397590361444, "grad_norm": 4.665303707122803, "learning_rate": 2.9699029126213595e-05, "loss": 0.3396, "step": 4182 }, { "epoch": 40.559036144578315, "grad_norm": 6.022875785827637, "learning_rate": 2.9694174757281556e-05, "loss": 0.2286, "step": 4183 }, { "epoch": 40.56867469879518, "grad_norm": 4.749101161956787, "learning_rate": 2.9689320388349517e-05, "loss": 0.4185, "step": 4184 }, { "epoch": 40.57831325301205, "grad_norm": 7.362026691436768, "learning_rate": 2.968446601941748e-05, "loss": 0.4436, "step": 4185 }, { "epoch": 40.58795180722892, "grad_norm": 5.457221031188965, "learning_rate": 2.967961165048544e-05, "loss": 0.2552, "step": 4186 }, { "epoch": 40.59759036144578, "grad_norm": 3.1202802658081055, "learning_rate": 2.96747572815534e-05, "loss": 0.0889, "step": 4187 }, { "epoch": 40.60722891566265, "grad_norm": 2.983733892440796, "learning_rate": 2.9669902912621362e-05, "loss": 0.177, "step": 4188 }, { "epoch": 40.616867469879516, "grad_norm": 4.092185020446777, "learning_rate": 2.9665048543689323e-05, "loss": 0.2929, "step": 4189 }, { "epoch": 40.626506024096386, "grad_norm": 6.817102909088135, "learning_rate": 2.9660194174757284e-05, "loss": 0.2513, "step": 4190 }, { "epoch": 40.63614457831325, "grad_norm": 6.129679203033447, "learning_rate": 2.9655339805825245e-05, "loss": 0.4388, "step": 4191 }, { "epoch": 40.64578313253012, "grad_norm": 5.713778018951416, "learning_rate": 2.9650485436893207e-05, "loss": 0.2364, "step": 4192 }, { "epoch": 40.65542168674699, "grad_norm": 2.2917683124542236, "learning_rate": 2.9645631067961164e-05, "loss": 0.1178, "step": 4193 }, { "epoch": 40.665060240963854, "grad_norm": 2.933683395385742, "learning_rate": 2.9640776699029126e-05, "loss": 0.2008, "step": 4194 }, { "epoch": 40.674698795180724, "grad_norm": 13.994073867797852, "learning_rate": 2.9635922330097087e-05, "loss": 0.1311, "step": 4195 }, { "epoch": 40.68433734939759, "grad_norm": 3.9194703102111816, "learning_rate": 2.9631067961165048e-05, "loss": 0.2441, "step": 4196 }, { "epoch": 40.69397590361446, "grad_norm": 5.815675735473633, "learning_rate": 2.962621359223301e-05, "loss": 0.4962, "step": 4197 }, { "epoch": 40.70361445783133, "grad_norm": 6.555832862854004, "learning_rate": 2.962135922330097e-05, "loss": 0.4447, "step": 4198 }, { "epoch": 40.71325301204819, "grad_norm": 2.458528518676758, "learning_rate": 2.961650485436893e-05, "loss": 0.1315, "step": 4199 }, { "epoch": 40.72289156626506, "grad_norm": 3.138528347015381, "learning_rate": 2.9611650485436892e-05, "loss": 0.282, "step": 4200 }, { "epoch": 40.732530120481925, "grad_norm": 3.444549083709717, "learning_rate": 2.9606796116504854e-05, "loss": 0.1536, "step": 4201 }, { "epoch": 40.742168674698796, "grad_norm": 6.057636260986328, "learning_rate": 2.9601941747572815e-05, "loss": 0.3408, "step": 4202 }, { "epoch": 40.75180722891566, "grad_norm": 2.8491950035095215, "learning_rate": 2.9597087378640776e-05, "loss": 0.1557, "step": 4203 }, { "epoch": 40.76144578313253, "grad_norm": 3.217360734939575, "learning_rate": 2.959223300970874e-05, "loss": 0.2229, "step": 4204 }, { "epoch": 40.7710843373494, "grad_norm": 4.67536735534668, "learning_rate": 2.95873786407767e-05, "loss": 0.3079, "step": 4205 }, { "epoch": 40.78072289156626, "grad_norm": 5.75466775894165, "learning_rate": 2.9582524271844663e-05, "loss": 0.3315, "step": 4206 }, { "epoch": 40.790361445783134, "grad_norm": 3.255175828933716, "learning_rate": 2.9577669902912624e-05, "loss": 0.1534, "step": 4207 }, { "epoch": 40.8, "grad_norm": 3.3518757820129395, "learning_rate": 2.9572815533980585e-05, "loss": 0.1647, "step": 4208 }, { "epoch": 40.80963855421687, "grad_norm": 6.963761806488037, "learning_rate": 2.9567961165048546e-05, "loss": 0.4175, "step": 4209 }, { "epoch": 40.81927710843374, "grad_norm": 4.768377304077148, "learning_rate": 2.9563106796116507e-05, "loss": 0.1955, "step": 4210 }, { "epoch": 40.8289156626506, "grad_norm": 5.142149925231934, "learning_rate": 2.955825242718447e-05, "loss": 0.212, "step": 4211 }, { "epoch": 40.83855421686747, "grad_norm": 4.186707496643066, "learning_rate": 2.955339805825243e-05, "loss": 0.2578, "step": 4212 }, { "epoch": 40.848192771084335, "grad_norm": 2.386260986328125, "learning_rate": 2.954854368932039e-05, "loss": 0.1754, "step": 4213 }, { "epoch": 40.857831325301206, "grad_norm": 2.295095682144165, "learning_rate": 2.9543689320388352e-05, "loss": 0.186, "step": 4214 }, { "epoch": 40.86746987951807, "grad_norm": 5.272623062133789, "learning_rate": 2.9538834951456313e-05, "loss": 0.326, "step": 4215 }, { "epoch": 40.87710843373494, "grad_norm": 6.73133659362793, "learning_rate": 2.9533980582524274e-05, "loss": 0.2133, "step": 4216 }, { "epoch": 40.88674698795181, "grad_norm": 4.453538417816162, "learning_rate": 2.9529126213592235e-05, "loss": 0.1468, "step": 4217 }, { "epoch": 40.89638554216867, "grad_norm": 8.15123176574707, "learning_rate": 2.9524271844660196e-05, "loss": 0.1911, "step": 4218 }, { "epoch": 40.90602409638554, "grad_norm": 6.033295154571533, "learning_rate": 2.9519417475728158e-05, "loss": 0.1945, "step": 4219 }, { "epoch": 40.91566265060241, "grad_norm": 3.440103769302368, "learning_rate": 2.9514563106796115e-05, "loss": 0.1959, "step": 4220 }, { "epoch": 40.92530120481928, "grad_norm": 13.582926750183105, "learning_rate": 2.9509708737864076e-05, "loss": 0.227, "step": 4221 }, { "epoch": 40.93493975903615, "grad_norm": 3.1337287425994873, "learning_rate": 2.9504854368932038e-05, "loss": 0.1354, "step": 4222 }, { "epoch": 40.94457831325301, "grad_norm": 4.5136590003967285, "learning_rate": 2.95e-05, "loss": 0.1679, "step": 4223 }, { "epoch": 40.95421686746988, "grad_norm": 6.582216262817383, "learning_rate": 2.949514563106796e-05, "loss": 0.5511, "step": 4224 }, { "epoch": 40.963855421686745, "grad_norm": 3.4515631198883057, "learning_rate": 2.949029126213592e-05, "loss": 0.2058, "step": 4225 }, { "epoch": 40.973493975903615, "grad_norm": 1.8230681419372559, "learning_rate": 2.9485436893203882e-05, "loss": 0.1501, "step": 4226 }, { "epoch": 40.98313253012048, "grad_norm": 2.4191813468933105, "learning_rate": 2.9480582524271843e-05, "loss": 0.1322, "step": 4227 }, { "epoch": 40.99277108433735, "grad_norm": 2.842097759246826, "learning_rate": 2.9475728155339804e-05, "loss": 0.0987, "step": 4228 }, { "epoch": 41.00843373493976, "grad_norm": 8.441466331481934, "learning_rate": 2.9470873786407772e-05, "loss": 0.3095, "step": 4229 }, { "epoch": 41.01807228915663, "grad_norm": 6.35973596572876, "learning_rate": 2.9466019417475734e-05, "loss": 0.4283, "step": 4230 }, { "epoch": 41.02771084337349, "grad_norm": 2.5735769271850586, "learning_rate": 2.946116504854369e-05, "loss": 0.1477, "step": 4231 }, { "epoch": 41.03734939759036, "grad_norm": 4.106205463409424, "learning_rate": 2.9456310679611652e-05, "loss": 0.2238, "step": 4232 }, { "epoch": 41.04698795180723, "grad_norm": 8.44540023803711, "learning_rate": 2.9451456310679614e-05, "loss": 0.3685, "step": 4233 }, { "epoch": 41.056626506024095, "grad_norm": 1.34449303150177, "learning_rate": 2.9446601941747575e-05, "loss": 0.1604, "step": 4234 }, { "epoch": 41.066265060240966, "grad_norm": 2.3550047874450684, "learning_rate": 2.9441747572815536e-05, "loss": 0.2224, "step": 4235 }, { "epoch": 41.07590361445783, "grad_norm": 2.3980579376220703, "learning_rate": 2.9436893203883497e-05, "loss": 0.1014, "step": 4236 }, { "epoch": 41.0855421686747, "grad_norm": 1.3185549974441528, "learning_rate": 2.9432038834951458e-05, "loss": 0.2044, "step": 4237 }, { "epoch": 41.09518072289157, "grad_norm": 9.057443618774414, "learning_rate": 2.942718446601942e-05, "loss": 0.3081, "step": 4238 }, { "epoch": 41.10481927710843, "grad_norm": 2.4184978008270264, "learning_rate": 2.942233009708738e-05, "loss": 0.128, "step": 4239 }, { "epoch": 41.1144578313253, "grad_norm": 21.693735122680664, "learning_rate": 2.941747572815534e-05, "loss": 0.3108, "step": 4240 }, { "epoch": 41.12409638554217, "grad_norm": 8.453689575195312, "learning_rate": 2.9412621359223303e-05, "loss": 0.287, "step": 4241 }, { "epoch": 41.13373493975904, "grad_norm": 2.4344258308410645, "learning_rate": 2.9407766990291264e-05, "loss": 0.1995, "step": 4242 }, { "epoch": 41.1433734939759, "grad_norm": 5.420561790466309, "learning_rate": 2.9402912621359225e-05, "loss": 0.2942, "step": 4243 }, { "epoch": 41.15301204819277, "grad_norm": 3.113884687423706, "learning_rate": 2.9398058252427186e-05, "loss": 0.1363, "step": 4244 }, { "epoch": 41.16265060240964, "grad_norm": 3.579378604888916, "learning_rate": 2.9393203883495147e-05, "loss": 0.095, "step": 4245 }, { "epoch": 41.172289156626505, "grad_norm": 7.700334548950195, "learning_rate": 2.938834951456311e-05, "loss": 0.2762, "step": 4246 }, { "epoch": 41.181927710843375, "grad_norm": 9.299348831176758, "learning_rate": 2.938349514563107e-05, "loss": 0.2626, "step": 4247 }, { "epoch": 41.19156626506024, "grad_norm": 1.1748173236846924, "learning_rate": 2.9378640776699027e-05, "loss": 0.1701, "step": 4248 }, { "epoch": 41.20120481927711, "grad_norm": 3.7920870780944824, "learning_rate": 2.937378640776699e-05, "loss": 0.3597, "step": 4249 }, { "epoch": 41.21084337349398, "grad_norm": 4.867300033569336, "learning_rate": 2.936893203883495e-05, "loss": 0.223, "step": 4250 }, { "epoch": 41.22048192771084, "grad_norm": 8.089223861694336, "learning_rate": 2.936407766990291e-05, "loss": 0.1285, "step": 4251 }, { "epoch": 41.23012048192771, "grad_norm": 4.62794303894043, "learning_rate": 2.9359223300970872e-05, "loss": 0.3584, "step": 4252 }, { "epoch": 41.23975903614458, "grad_norm": 7.365750789642334, "learning_rate": 2.9354368932038833e-05, "loss": 0.2723, "step": 4253 }, { "epoch": 41.24939759036145, "grad_norm": 5.030642986297607, "learning_rate": 2.93495145631068e-05, "loss": 0.2274, "step": 4254 }, { "epoch": 41.25903614457831, "grad_norm": 14.488249778747559, "learning_rate": 2.9344660194174762e-05, "loss": 0.2102, "step": 4255 }, { "epoch": 41.26867469879518, "grad_norm": 6.034298896789551, "learning_rate": 2.9339805825242723e-05, "loss": 0.2238, "step": 4256 }, { "epoch": 41.27831325301205, "grad_norm": 19.059154510498047, "learning_rate": 2.9334951456310685e-05, "loss": 0.3149, "step": 4257 }, { "epoch": 41.287951807228914, "grad_norm": 3.5054197311401367, "learning_rate": 2.9330097087378646e-05, "loss": 0.1432, "step": 4258 }, { "epoch": 41.297590361445785, "grad_norm": 11.468584060668945, "learning_rate": 2.9325242718446603e-05, "loss": 0.3916, "step": 4259 }, { "epoch": 41.30722891566265, "grad_norm": 6.028814315795898, "learning_rate": 2.9320388349514565e-05, "loss": 0.1777, "step": 4260 }, { "epoch": 41.31686746987952, "grad_norm": 4.647971153259277, "learning_rate": 2.9315533980582526e-05, "loss": 0.2306, "step": 4261 }, { "epoch": 41.32650602409639, "grad_norm": 17.028785705566406, "learning_rate": 2.9310679611650487e-05, "loss": 0.2145, "step": 4262 }, { "epoch": 41.33614457831325, "grad_norm": 12.646530151367188, "learning_rate": 2.9305825242718448e-05, "loss": 0.297, "step": 4263 }, { "epoch": 41.34578313253012, "grad_norm": 3.930253505706787, "learning_rate": 2.930097087378641e-05, "loss": 0.1862, "step": 4264 }, { "epoch": 41.355421686746986, "grad_norm": 3.4180049896240234, "learning_rate": 2.929611650485437e-05, "loss": 0.2636, "step": 4265 }, { "epoch": 41.36506024096386, "grad_norm": 3.0126166343688965, "learning_rate": 2.929126213592233e-05, "loss": 0.2265, "step": 4266 }, { "epoch": 41.37469879518072, "grad_norm": 7.537500858306885, "learning_rate": 2.9286407766990293e-05, "loss": 0.444, "step": 4267 }, { "epoch": 41.38433734939759, "grad_norm": 9.961782455444336, "learning_rate": 2.9281553398058254e-05, "loss": 0.2588, "step": 4268 }, { "epoch": 41.39397590361446, "grad_norm": 3.3768765926361084, "learning_rate": 2.9276699029126215e-05, "loss": 0.2996, "step": 4269 }, { "epoch": 41.403614457831324, "grad_norm": 1.6667929887771606, "learning_rate": 2.9271844660194176e-05, "loss": 0.3202, "step": 4270 }, { "epoch": 41.413253012048195, "grad_norm": 15.048202514648438, "learning_rate": 2.9266990291262137e-05, "loss": 0.2587, "step": 4271 }, { "epoch": 41.42289156626506, "grad_norm": 15.982718467712402, "learning_rate": 2.9262135922330098e-05, "loss": 0.3247, "step": 4272 }, { "epoch": 41.43253012048193, "grad_norm": 15.152616500854492, "learning_rate": 2.925728155339806e-05, "loss": 0.1764, "step": 4273 }, { "epoch": 41.44216867469879, "grad_norm": 12.018767356872559, "learning_rate": 2.925242718446602e-05, "loss": 0.2017, "step": 4274 }, { "epoch": 41.45180722891566, "grad_norm": 27.30048942565918, "learning_rate": 2.9247572815533982e-05, "loss": 0.263, "step": 4275 }, { "epoch": 41.46144578313253, "grad_norm": 4.158144950866699, "learning_rate": 2.924271844660194e-05, "loss": 0.2106, "step": 4276 }, { "epoch": 41.471084337349396, "grad_norm": 155.00291442871094, "learning_rate": 2.92378640776699e-05, "loss": 0.3204, "step": 4277 }, { "epoch": 41.480722891566266, "grad_norm": 5.642494201660156, "learning_rate": 2.9233009708737862e-05, "loss": 0.2589, "step": 4278 }, { "epoch": 41.49036144578313, "grad_norm": 4.931209564208984, "learning_rate": 2.9228155339805823e-05, "loss": 0.2069, "step": 4279 }, { "epoch": 41.5, "grad_norm": 5.746601104736328, "learning_rate": 2.922330097087379e-05, "loss": 0.253, "step": 4280 }, { "epoch": 41.50963855421687, "grad_norm": 12.357719421386719, "learning_rate": 2.9218446601941752e-05, "loss": 0.4213, "step": 4281 }, { "epoch": 41.519277108433734, "grad_norm": 4.1805267333984375, "learning_rate": 2.9213592233009713e-05, "loss": 0.2913, "step": 4282 }, { "epoch": 41.528915662650604, "grad_norm": 3.389495849609375, "learning_rate": 2.9208737864077674e-05, "loss": 0.3557, "step": 4283 }, { "epoch": 41.53855421686747, "grad_norm": 2.038696765899658, "learning_rate": 2.9203883495145635e-05, "loss": 0.2499, "step": 4284 }, { "epoch": 41.54819277108434, "grad_norm": 1.4256342649459839, "learning_rate": 2.9199029126213597e-05, "loss": 0.2731, "step": 4285 }, { "epoch": 41.55783132530121, "grad_norm": 5.603267192840576, "learning_rate": 2.9194174757281558e-05, "loss": 0.2519, "step": 4286 }, { "epoch": 41.56746987951807, "grad_norm": 5.766209125518799, "learning_rate": 2.9189320388349515e-05, "loss": 0.2015, "step": 4287 }, { "epoch": 41.57710843373494, "grad_norm": 18.90562629699707, "learning_rate": 2.9184466019417477e-05, "loss": 0.2349, "step": 4288 }, { "epoch": 41.586746987951805, "grad_norm": 5.595191478729248, "learning_rate": 2.9179611650485438e-05, "loss": 0.2478, "step": 4289 }, { "epoch": 41.596385542168676, "grad_norm": 19.909770965576172, "learning_rate": 2.91747572815534e-05, "loss": 0.259, "step": 4290 }, { "epoch": 41.60602409638554, "grad_norm": 26.725963592529297, "learning_rate": 2.916990291262136e-05, "loss": 0.2817, "step": 4291 }, { "epoch": 41.61566265060241, "grad_norm": 11.94558048248291, "learning_rate": 2.916504854368932e-05, "loss": 0.4457, "step": 4292 }, { "epoch": 41.62530120481928, "grad_norm": 22.844158172607422, "learning_rate": 2.9160194174757282e-05, "loss": 0.271, "step": 4293 }, { "epoch": 41.63493975903614, "grad_norm": 6.488889217376709, "learning_rate": 2.9155339805825244e-05, "loss": 0.59, "step": 4294 }, { "epoch": 41.644578313253014, "grad_norm": 12.74889087677002, "learning_rate": 2.9150485436893205e-05, "loss": 0.218, "step": 4295 }, { "epoch": 41.65421686746988, "grad_norm": 0.8126906156539917, "learning_rate": 2.9145631067961166e-05, "loss": 0.143, "step": 4296 }, { "epoch": 41.66385542168675, "grad_norm": 4.961111068725586, "learning_rate": 2.9140776699029127e-05, "loss": 0.4488, "step": 4297 }, { "epoch": 41.67349397590361, "grad_norm": 8.459842681884766, "learning_rate": 2.9135922330097088e-05, "loss": 0.2085, "step": 4298 }, { "epoch": 41.68313253012048, "grad_norm": 9.033902168273926, "learning_rate": 2.913106796116505e-05, "loss": 0.1995, "step": 4299 }, { "epoch": 41.69277108433735, "grad_norm": 5.676359176635742, "learning_rate": 2.912621359223301e-05, "loss": 0.1129, "step": 4300 }, { "epoch": 41.702409638554215, "grad_norm": 10.284400939941406, "learning_rate": 2.912135922330097e-05, "loss": 0.4508, "step": 4301 }, { "epoch": 41.712048192771086, "grad_norm": 3.6795706748962402, "learning_rate": 2.9116504854368933e-05, "loss": 0.1867, "step": 4302 }, { "epoch": 41.72168674698795, "grad_norm": 8.43498706817627, "learning_rate": 2.911165048543689e-05, "loss": 0.2757, "step": 4303 }, { "epoch": 41.73132530120482, "grad_norm": 8.396834373474121, "learning_rate": 2.910679611650485e-05, "loss": 0.28, "step": 4304 }, { "epoch": 41.74096385542169, "grad_norm": 15.659092903137207, "learning_rate": 2.910194174757282e-05, "loss": 0.3053, "step": 4305 }, { "epoch": 41.75060240963855, "grad_norm": 40.84017562866211, "learning_rate": 2.909708737864078e-05, "loss": 0.3597, "step": 4306 }, { "epoch": 41.76024096385542, "grad_norm": 6.622158050537109, "learning_rate": 2.9092233009708742e-05, "loss": 0.2448, "step": 4307 }, { "epoch": 41.76987951807229, "grad_norm": 2.914346933364868, "learning_rate": 2.9087378640776703e-05, "loss": 0.0937, "step": 4308 }, { "epoch": 41.77951807228916, "grad_norm": 5.170509338378906, "learning_rate": 2.9082524271844664e-05, "loss": 0.2155, "step": 4309 }, { "epoch": 41.78915662650603, "grad_norm": 16.75540542602539, "learning_rate": 2.9077669902912625e-05, "loss": 0.2506, "step": 4310 }, { "epoch": 41.79879518072289, "grad_norm": 12.090685844421387, "learning_rate": 2.9072815533980586e-05, "loss": 0.1528, "step": 4311 }, { "epoch": 41.80843373493976, "grad_norm": 2.440037965774536, "learning_rate": 2.9067961165048548e-05, "loss": 0.3464, "step": 4312 }, { "epoch": 41.818072289156625, "grad_norm": 2.862466335296631, "learning_rate": 2.906310679611651e-05, "loss": 0.2246, "step": 4313 }, { "epoch": 41.827710843373495, "grad_norm": 3.5876100063323975, "learning_rate": 2.9058252427184466e-05, "loss": 0.2351, "step": 4314 }, { "epoch": 41.83734939759036, "grad_norm": 8.90294075012207, "learning_rate": 2.9053398058252428e-05, "loss": 0.4009, "step": 4315 }, { "epoch": 41.84698795180723, "grad_norm": 11.407102584838867, "learning_rate": 2.904854368932039e-05, "loss": 0.1484, "step": 4316 }, { "epoch": 41.8566265060241, "grad_norm": 8.625401496887207, "learning_rate": 2.904368932038835e-05, "loss": 0.1592, "step": 4317 }, { "epoch": 41.86626506024096, "grad_norm": 3.4791011810302734, "learning_rate": 2.903883495145631e-05, "loss": 0.1683, "step": 4318 }, { "epoch": 41.87590361445783, "grad_norm": 6.804556369781494, "learning_rate": 2.9033980582524272e-05, "loss": 0.2571, "step": 4319 }, { "epoch": 41.8855421686747, "grad_norm": 4.296633720397949, "learning_rate": 2.9029126213592233e-05, "loss": 0.371, "step": 4320 }, { "epoch": 41.89518072289157, "grad_norm": 4.13163948059082, "learning_rate": 2.9024271844660194e-05, "loss": 0.1915, "step": 4321 }, { "epoch": 41.90481927710843, "grad_norm": 5.4687652587890625, "learning_rate": 2.9019417475728156e-05, "loss": 0.2176, "step": 4322 }, { "epoch": 41.9144578313253, "grad_norm": 7.452935218811035, "learning_rate": 2.9014563106796117e-05, "loss": 0.2679, "step": 4323 }, { "epoch": 41.92409638554217, "grad_norm": 3.0644776821136475, "learning_rate": 2.9009708737864078e-05, "loss": 0.1586, "step": 4324 }, { "epoch": 41.933734939759034, "grad_norm": 2.5776820182800293, "learning_rate": 2.900485436893204e-05, "loss": 0.3475, "step": 4325 }, { "epoch": 41.943373493975905, "grad_norm": 15.110703468322754, "learning_rate": 2.9e-05, "loss": 0.3709, "step": 4326 }, { "epoch": 41.95301204819277, "grad_norm": 9.651769638061523, "learning_rate": 2.899514563106796e-05, "loss": 0.3354, "step": 4327 }, { "epoch": 41.96265060240964, "grad_norm": 6.24693489074707, "learning_rate": 2.8990291262135922e-05, "loss": 0.2989, "step": 4328 }, { "epoch": 41.97228915662651, "grad_norm": 2.515794277191162, "learning_rate": 2.8985436893203884e-05, "loss": 0.2004, "step": 4329 }, { "epoch": 41.98192771084337, "grad_norm": 2.9855148792266846, "learning_rate": 2.8980582524271848e-05, "loss": 0.3338, "step": 4330 }, { "epoch": 41.99156626506024, "grad_norm": 4.924575328826904, "learning_rate": 2.897572815533981e-05, "loss": 0.2305, "step": 4331 }, { "epoch": 42.00722891566265, "grad_norm": 0.9229151606559753, "learning_rate": 2.897087378640777e-05, "loss": 0.1832, "step": 4332 }, { "epoch": 42.01686746987952, "grad_norm": 11.401557922363281, "learning_rate": 2.896601941747573e-05, "loss": 0.3621, "step": 4333 }, { "epoch": 42.026506024096385, "grad_norm": 5.985057830810547, "learning_rate": 2.8961165048543693e-05, "loss": 0.4277, "step": 4334 }, { "epoch": 42.036144578313255, "grad_norm": 9.533875465393066, "learning_rate": 2.8956310679611654e-05, "loss": 0.2018, "step": 4335 }, { "epoch": 42.04578313253012, "grad_norm": 6.716027736663818, "learning_rate": 2.8951456310679615e-05, "loss": 0.1484, "step": 4336 }, { "epoch": 42.05542168674699, "grad_norm": 11.100883483886719, "learning_rate": 2.8946601941747576e-05, "loss": 0.4319, "step": 4337 }, { "epoch": 42.06506024096385, "grad_norm": 10.821640014648438, "learning_rate": 2.8941747572815537e-05, "loss": 0.2392, "step": 4338 }, { "epoch": 42.07469879518072, "grad_norm": 8.387761116027832, "learning_rate": 2.89368932038835e-05, "loss": 0.3062, "step": 4339 }, { "epoch": 42.08433734939759, "grad_norm": 8.829946517944336, "learning_rate": 2.893203883495146e-05, "loss": 0.2534, "step": 4340 }, { "epoch": 42.09397590361446, "grad_norm": 14.184067726135254, "learning_rate": 2.892718446601942e-05, "loss": 0.2396, "step": 4341 }, { "epoch": 42.10361445783133, "grad_norm": 2.4493987560272217, "learning_rate": 2.892233009708738e-05, "loss": 0.1816, "step": 4342 }, { "epoch": 42.11325301204819, "grad_norm": 6.799074649810791, "learning_rate": 2.891747572815534e-05, "loss": 0.1508, "step": 4343 }, { "epoch": 42.12289156626506, "grad_norm": 2.953359365463257, "learning_rate": 2.89126213592233e-05, "loss": 0.2412, "step": 4344 }, { "epoch": 42.13253012048193, "grad_norm": 5.128509521484375, "learning_rate": 2.8907766990291262e-05, "loss": 0.2516, "step": 4345 }, { "epoch": 42.142168674698794, "grad_norm": 5.651832103729248, "learning_rate": 2.8902912621359223e-05, "loss": 0.3349, "step": 4346 }, { "epoch": 42.151807228915665, "grad_norm": 1.5680558681488037, "learning_rate": 2.8898058252427184e-05, "loss": 0.2071, "step": 4347 }, { "epoch": 42.16144578313253, "grad_norm": 5.015317916870117, "learning_rate": 2.8893203883495145e-05, "loss": 0.2532, "step": 4348 }, { "epoch": 42.1710843373494, "grad_norm": 5.125904560089111, "learning_rate": 2.8888349514563107e-05, "loss": 0.257, "step": 4349 }, { "epoch": 42.18072289156626, "grad_norm": 28.988300323486328, "learning_rate": 2.8883495145631068e-05, "loss": 0.376, "step": 4350 }, { "epoch": 42.19036144578313, "grad_norm": 4.739566802978516, "learning_rate": 2.887864077669903e-05, "loss": 0.2128, "step": 4351 }, { "epoch": 42.2, "grad_norm": 1.4983779191970825, "learning_rate": 2.887378640776699e-05, "loss": 0.1444, "step": 4352 }, { "epoch": 42.209638554216866, "grad_norm": 10.729219436645508, "learning_rate": 2.886893203883495e-05, "loss": 0.2337, "step": 4353 }, { "epoch": 42.21927710843374, "grad_norm": 1.3961272239685059, "learning_rate": 2.8864077669902912e-05, "loss": 0.2738, "step": 4354 }, { "epoch": 42.2289156626506, "grad_norm": 2.917996644973755, "learning_rate": 2.8859223300970877e-05, "loss": 0.1045, "step": 4355 }, { "epoch": 42.23855421686747, "grad_norm": 8.65151309967041, "learning_rate": 2.8854368932038838e-05, "loss": 0.316, "step": 4356 }, { "epoch": 42.24819277108434, "grad_norm": 2.60768985748291, "learning_rate": 2.88495145631068e-05, "loss": 0.3101, "step": 4357 }, { "epoch": 42.257831325301204, "grad_norm": 2.061861276626587, "learning_rate": 2.884466019417476e-05, "loss": 0.1164, "step": 4358 }, { "epoch": 42.267469879518075, "grad_norm": 21.686161041259766, "learning_rate": 2.883980582524272e-05, "loss": 0.1772, "step": 4359 }, { "epoch": 42.27710843373494, "grad_norm": 14.157733917236328, "learning_rate": 2.8834951456310683e-05, "loss": 0.2616, "step": 4360 }, { "epoch": 42.28674698795181, "grad_norm": 16.237302780151367, "learning_rate": 2.8830097087378644e-05, "loss": 0.3032, "step": 4361 }, { "epoch": 42.29638554216867, "grad_norm": 8.172812461853027, "learning_rate": 2.8825242718446605e-05, "loss": 0.2529, "step": 4362 }, { "epoch": 42.30602409638554, "grad_norm": 2.385340929031372, "learning_rate": 2.8820388349514566e-05, "loss": 0.3383, "step": 4363 }, { "epoch": 42.31566265060241, "grad_norm": 7.495240688323975, "learning_rate": 2.8815533980582527e-05, "loss": 0.2297, "step": 4364 }, { "epoch": 42.325301204819276, "grad_norm": 13.29881477355957, "learning_rate": 2.8810679611650488e-05, "loss": 0.3106, "step": 4365 }, { "epoch": 42.334939759036146, "grad_norm": 5.130897521972656, "learning_rate": 2.880582524271845e-05, "loss": 0.1763, "step": 4366 }, { "epoch": 42.34457831325301, "grad_norm": 33.67259979248047, "learning_rate": 2.880097087378641e-05, "loss": 0.3188, "step": 4367 }, { "epoch": 42.35421686746988, "grad_norm": 13.4526948928833, "learning_rate": 2.879611650485437e-05, "loss": 0.2273, "step": 4368 }, { "epoch": 42.36385542168675, "grad_norm": 1.7943707704544067, "learning_rate": 2.879126213592233e-05, "loss": 0.2182, "step": 4369 }, { "epoch": 42.373493975903614, "grad_norm": 21.660327911376953, "learning_rate": 2.878640776699029e-05, "loss": 0.3324, "step": 4370 }, { "epoch": 42.383132530120484, "grad_norm": 15.983405113220215, "learning_rate": 2.8781553398058252e-05, "loss": 0.2144, "step": 4371 }, { "epoch": 42.39277108433735, "grad_norm": 13.179166793823242, "learning_rate": 2.8776699029126213e-05, "loss": 0.1703, "step": 4372 }, { "epoch": 42.40240963855422, "grad_norm": 9.00865650177002, "learning_rate": 2.8771844660194174e-05, "loss": 0.3707, "step": 4373 }, { "epoch": 42.41204819277108, "grad_norm": 1.9763470888137817, "learning_rate": 2.8766990291262135e-05, "loss": 0.1346, "step": 4374 }, { "epoch": 42.42168674698795, "grad_norm": 7.270771503448486, "learning_rate": 2.8762135922330096e-05, "loss": 0.1849, "step": 4375 }, { "epoch": 42.43132530120482, "grad_norm": 4.706078052520752, "learning_rate": 2.8757281553398057e-05, "loss": 0.2325, "step": 4376 }, { "epoch": 42.440963855421685, "grad_norm": 3.088601589202881, "learning_rate": 2.875242718446602e-05, "loss": 0.1579, "step": 4377 }, { "epoch": 42.450602409638556, "grad_norm": 12.183211326599121, "learning_rate": 2.874757281553398e-05, "loss": 0.3142, "step": 4378 }, { "epoch": 42.46024096385542, "grad_norm": 7.9619598388671875, "learning_rate": 2.874271844660194e-05, "loss": 0.2154, "step": 4379 }, { "epoch": 42.46987951807229, "grad_norm": 6.1531081199646, "learning_rate": 2.8737864077669902e-05, "loss": 0.2028, "step": 4380 }, { "epoch": 42.47951807228916, "grad_norm": 5.895444869995117, "learning_rate": 2.8733009708737867e-05, "loss": 0.1983, "step": 4381 }, { "epoch": 42.48915662650602, "grad_norm": 9.168207168579102, "learning_rate": 2.8728155339805828e-05, "loss": 0.2766, "step": 4382 }, { "epoch": 42.498795180722894, "grad_norm": 21.136737823486328, "learning_rate": 2.872330097087379e-05, "loss": 0.2469, "step": 4383 }, { "epoch": 42.50843373493976, "grad_norm": 3.230250597000122, "learning_rate": 2.871844660194175e-05, "loss": 0.2861, "step": 4384 }, { "epoch": 42.51807228915663, "grad_norm": 12.087491989135742, "learning_rate": 2.871359223300971e-05, "loss": 0.3422, "step": 4385 }, { "epoch": 42.52771084337349, "grad_norm": 18.32266616821289, "learning_rate": 2.8708737864077672e-05, "loss": 0.5409, "step": 4386 }, { "epoch": 42.53734939759036, "grad_norm": 3.7571098804473877, "learning_rate": 2.8703883495145633e-05, "loss": 0.3335, "step": 4387 }, { "epoch": 42.54698795180723, "grad_norm": 9.747684478759766, "learning_rate": 2.8699029126213595e-05, "loss": 0.5146, "step": 4388 }, { "epoch": 42.556626506024095, "grad_norm": 8.258710861206055, "learning_rate": 2.8694174757281556e-05, "loss": 0.2577, "step": 4389 }, { "epoch": 42.566265060240966, "grad_norm": 22.869461059570312, "learning_rate": 2.8689320388349517e-05, "loss": 0.1505, "step": 4390 }, { "epoch": 42.57590361445783, "grad_norm": 3.1430246829986572, "learning_rate": 2.8684466019417478e-05, "loss": 0.2618, "step": 4391 }, { "epoch": 42.5855421686747, "grad_norm": 6.541820526123047, "learning_rate": 2.867961165048544e-05, "loss": 0.3183, "step": 4392 }, { "epoch": 42.59518072289157, "grad_norm": 10.369453430175781, "learning_rate": 2.86747572815534e-05, "loss": 0.2288, "step": 4393 }, { "epoch": 42.60481927710843, "grad_norm": 2.911330223083496, "learning_rate": 2.866990291262136e-05, "loss": 0.1497, "step": 4394 }, { "epoch": 42.6144578313253, "grad_norm": 26.558700561523438, "learning_rate": 2.8665048543689323e-05, "loss": 0.2217, "step": 4395 }, { "epoch": 42.62409638554217, "grad_norm": 12.044054985046387, "learning_rate": 2.8660194174757284e-05, "loss": 0.4498, "step": 4396 }, { "epoch": 42.63373493975904, "grad_norm": 17.921098709106445, "learning_rate": 2.865533980582524e-05, "loss": 0.2999, "step": 4397 }, { "epoch": 42.6433734939759, "grad_norm": 7.351268768310547, "learning_rate": 2.8650485436893203e-05, "loss": 0.3705, "step": 4398 }, { "epoch": 42.65301204819277, "grad_norm": 5.986107349395752, "learning_rate": 2.8645631067961164e-05, "loss": 0.1505, "step": 4399 }, { "epoch": 42.66265060240964, "grad_norm": 6.29254674911499, "learning_rate": 2.8640776699029125e-05, "loss": 0.1733, "step": 4400 }, { "epoch": 42.672289156626505, "grad_norm": 3.2238001823425293, "learning_rate": 2.8635922330097086e-05, "loss": 0.2493, "step": 4401 }, { "epoch": 42.681927710843375, "grad_norm": 8.34994888305664, "learning_rate": 2.8631067961165047e-05, "loss": 0.3074, "step": 4402 }, { "epoch": 42.69156626506024, "grad_norm": 2.015784502029419, "learning_rate": 2.862621359223301e-05, "loss": 0.3625, "step": 4403 }, { "epoch": 42.70120481927711, "grad_norm": 3.468818187713623, "learning_rate": 2.862135922330097e-05, "loss": 0.1492, "step": 4404 }, { "epoch": 42.71084337349397, "grad_norm": 7.988134384155273, "learning_rate": 2.861650485436893e-05, "loss": 0.2091, "step": 4405 }, { "epoch": 42.72048192771084, "grad_norm": 22.285707473754883, "learning_rate": 2.86116504854369e-05, "loss": 0.3611, "step": 4406 }, { "epoch": 42.73012048192771, "grad_norm": 14.803598403930664, "learning_rate": 2.860679611650486e-05, "loss": 0.3253, "step": 4407 }, { "epoch": 42.73975903614458, "grad_norm": 5.693875789642334, "learning_rate": 2.8601941747572818e-05, "loss": 0.14, "step": 4408 }, { "epoch": 42.74939759036145, "grad_norm": 93.85542297363281, "learning_rate": 2.859708737864078e-05, "loss": 0.3973, "step": 4409 }, { "epoch": 42.75903614457831, "grad_norm": 3.94596791267395, "learning_rate": 2.859223300970874e-05, "loss": 0.1603, "step": 4410 }, { "epoch": 42.76867469879518, "grad_norm": 4.082611083984375, "learning_rate": 2.85873786407767e-05, "loss": 0.1713, "step": 4411 }, { "epoch": 42.77831325301205, "grad_norm": 8.717737197875977, "learning_rate": 2.8582524271844662e-05, "loss": 0.2996, "step": 4412 }, { "epoch": 42.787951807228914, "grad_norm": 5.6528706550598145, "learning_rate": 2.8577669902912623e-05, "loss": 0.299, "step": 4413 }, { "epoch": 42.797590361445785, "grad_norm": 17.674110412597656, "learning_rate": 2.8572815533980584e-05, "loss": 0.4646, "step": 4414 }, { "epoch": 42.80722891566265, "grad_norm": 3.580289363861084, "learning_rate": 2.8567961165048546e-05, "loss": 0.1625, "step": 4415 }, { "epoch": 42.81686746987952, "grad_norm": 14.507497787475586, "learning_rate": 2.8563106796116507e-05, "loss": 0.2173, "step": 4416 }, { "epoch": 42.82650602409639, "grad_norm": 2.40019154548645, "learning_rate": 2.8558252427184468e-05, "loss": 0.3166, "step": 4417 }, { "epoch": 42.83614457831325, "grad_norm": 3.202742099761963, "learning_rate": 2.855339805825243e-05, "loss": 0.2728, "step": 4418 }, { "epoch": 42.84578313253012, "grad_norm": 3.707721471786499, "learning_rate": 2.854854368932039e-05, "loss": 0.2131, "step": 4419 }, { "epoch": 42.855421686746986, "grad_norm": 4.724204063415527, "learning_rate": 2.854368932038835e-05, "loss": 0.1622, "step": 4420 }, { "epoch": 42.86506024096386, "grad_norm": 16.92510223388672, "learning_rate": 2.8538834951456312e-05, "loss": 0.4924, "step": 4421 }, { "epoch": 42.87469879518072, "grad_norm": 8.173928260803223, "learning_rate": 2.8533980582524274e-05, "loss": 0.2836, "step": 4422 }, { "epoch": 42.88433734939759, "grad_norm": 1.8438276052474976, "learning_rate": 2.8529126213592235e-05, "loss": 0.2509, "step": 4423 }, { "epoch": 42.89397590361446, "grad_norm": 18.040082931518555, "learning_rate": 2.8524271844660196e-05, "loss": 0.1136, "step": 4424 }, { "epoch": 42.903614457831324, "grad_norm": 5.1977667808532715, "learning_rate": 2.8519417475728154e-05, "loss": 0.2612, "step": 4425 }, { "epoch": 42.913253012048195, "grad_norm": 1.6008180379867554, "learning_rate": 2.8514563106796115e-05, "loss": 0.2279, "step": 4426 }, { "epoch": 42.92289156626506, "grad_norm": 15.372836112976074, "learning_rate": 2.8509708737864076e-05, "loss": 0.2389, "step": 4427 }, { "epoch": 42.93253012048193, "grad_norm": 4.13798189163208, "learning_rate": 2.8504854368932037e-05, "loss": 0.2942, "step": 4428 }, { "epoch": 42.94216867469879, "grad_norm": 24.241838455200195, "learning_rate": 2.8499999999999998e-05, "loss": 0.3142, "step": 4429 }, { "epoch": 42.95180722891566, "grad_norm": 13.108667373657227, "learning_rate": 2.849514563106796e-05, "loss": 0.4794, "step": 4430 }, { "epoch": 42.96144578313253, "grad_norm": 28.39360809326172, "learning_rate": 2.8490291262135927e-05, "loss": 0.1899, "step": 4431 }, { "epoch": 42.971084337349396, "grad_norm": 4.245414733886719, "learning_rate": 2.848543689320389e-05, "loss": 0.2393, "step": 4432 }, { "epoch": 42.980722891566266, "grad_norm": 2.9497745037078857, "learning_rate": 2.848058252427185e-05, "loss": 0.1284, "step": 4433 }, { "epoch": 42.99036144578313, "grad_norm": 14.242992401123047, "learning_rate": 2.847572815533981e-05, "loss": 0.5514, "step": 4434 }, { "epoch": 43.006024096385545, "grad_norm": 9.349072456359863, "learning_rate": 2.8470873786407772e-05, "loss": 0.3361, "step": 4435 }, { "epoch": 43.01566265060241, "grad_norm": 12.64782428741455, "learning_rate": 2.846601941747573e-05, "loss": 0.2172, "step": 4436 }, { "epoch": 43.02530120481928, "grad_norm": 4.499383449554443, "learning_rate": 2.846116504854369e-05, "loss": 0.2583, "step": 4437 }, { "epoch": 43.03493975903614, "grad_norm": 1.4550261497497559, "learning_rate": 2.8456310679611652e-05, "loss": 0.1031, "step": 4438 }, { "epoch": 43.04457831325301, "grad_norm": 4.006593704223633, "learning_rate": 2.8451456310679613e-05, "loss": 0.3102, "step": 4439 }, { "epoch": 43.05421686746988, "grad_norm": 13.033824920654297, "learning_rate": 2.8446601941747574e-05, "loss": 0.1746, "step": 4440 }, { "epoch": 43.063855421686746, "grad_norm": 1.9642754793167114, "learning_rate": 2.8441747572815535e-05, "loss": 0.2218, "step": 4441 }, { "epoch": 43.07349397590362, "grad_norm": 18.029813766479492, "learning_rate": 2.8436893203883496e-05, "loss": 0.238, "step": 4442 }, { "epoch": 43.08313253012048, "grad_norm": 3.569589138031006, "learning_rate": 2.8432038834951458e-05, "loss": 0.3822, "step": 4443 }, { "epoch": 43.09277108433735, "grad_norm": 7.936978816986084, "learning_rate": 2.842718446601942e-05, "loss": 0.1759, "step": 4444 }, { "epoch": 43.102409638554214, "grad_norm": 3.662753105163574, "learning_rate": 2.842233009708738e-05, "loss": 0.2044, "step": 4445 }, { "epoch": 43.112048192771084, "grad_norm": 5.465402603149414, "learning_rate": 2.841747572815534e-05, "loss": 0.3283, "step": 4446 }, { "epoch": 43.121686746987955, "grad_norm": 14.913286209106445, "learning_rate": 2.8412621359223302e-05, "loss": 0.3604, "step": 4447 }, { "epoch": 43.13132530120482, "grad_norm": 18.98535919189453, "learning_rate": 2.8407766990291263e-05, "loss": 0.3981, "step": 4448 }, { "epoch": 43.14096385542169, "grad_norm": 17.42354965209961, "learning_rate": 2.8402912621359224e-05, "loss": 0.3563, "step": 4449 }, { "epoch": 43.15060240963855, "grad_norm": 11.746910095214844, "learning_rate": 2.8398058252427186e-05, "loss": 0.2913, "step": 4450 }, { "epoch": 43.16024096385542, "grad_norm": 34.41671371459961, "learning_rate": 2.8393203883495147e-05, "loss": 0.1837, "step": 4451 }, { "epoch": 43.16987951807229, "grad_norm": 3.9491591453552246, "learning_rate": 2.8388349514563105e-05, "loss": 0.185, "step": 4452 }, { "epoch": 43.179518072289156, "grad_norm": 2.4703404903411865, "learning_rate": 2.8383495145631066e-05, "loss": 0.165, "step": 4453 }, { "epoch": 43.189156626506026, "grad_norm": 3.0548465251922607, "learning_rate": 2.8378640776699027e-05, "loss": 0.1821, "step": 4454 }, { "epoch": 43.19879518072289, "grad_norm": 3.42327618598938, "learning_rate": 2.8373786407766988e-05, "loss": 0.1838, "step": 4455 }, { "epoch": 43.20843373493976, "grad_norm": 4.868657112121582, "learning_rate": 2.8368932038834956e-05, "loss": 0.3977, "step": 4456 }, { "epoch": 43.21807228915662, "grad_norm": 4.158214569091797, "learning_rate": 2.8364077669902917e-05, "loss": 0.421, "step": 4457 }, { "epoch": 43.227710843373494, "grad_norm": 5.139862060546875, "learning_rate": 2.8359223300970878e-05, "loss": 0.2412, "step": 4458 }, { "epoch": 43.237349397590364, "grad_norm": 7.67268180847168, "learning_rate": 2.835436893203884e-05, "loss": 0.0959, "step": 4459 }, { "epoch": 43.24698795180723, "grad_norm": 5.687285423278809, "learning_rate": 2.83495145631068e-05, "loss": 0.247, "step": 4460 }, { "epoch": 43.2566265060241, "grad_norm": 3.0135128498077393, "learning_rate": 2.834466019417476e-05, "loss": 0.266, "step": 4461 }, { "epoch": 43.26626506024096, "grad_norm": 6.179495334625244, "learning_rate": 2.8339805825242723e-05, "loss": 0.2916, "step": 4462 }, { "epoch": 43.27590361445783, "grad_norm": 9.614313125610352, "learning_rate": 2.833495145631068e-05, "loss": 0.1431, "step": 4463 }, { "epoch": 43.2855421686747, "grad_norm": 10.562504768371582, "learning_rate": 2.833009708737864e-05, "loss": 0.2268, "step": 4464 }, { "epoch": 43.295180722891565, "grad_norm": 6.4554853439331055, "learning_rate": 2.8325242718446603e-05, "loss": 0.3249, "step": 4465 }, { "epoch": 43.304819277108436, "grad_norm": 13.363470077514648, "learning_rate": 2.8320388349514564e-05, "loss": 0.4297, "step": 4466 }, { "epoch": 43.3144578313253, "grad_norm": 16.124916076660156, "learning_rate": 2.8315533980582525e-05, "loss": 0.5284, "step": 4467 }, { "epoch": 43.32409638554217, "grad_norm": 6.216598033905029, "learning_rate": 2.8310679611650486e-05, "loss": 0.2535, "step": 4468 }, { "epoch": 43.33373493975903, "grad_norm": 6.056785583496094, "learning_rate": 2.8305825242718447e-05, "loss": 0.3513, "step": 4469 }, { "epoch": 43.3433734939759, "grad_norm": 27.333351135253906, "learning_rate": 2.830097087378641e-05, "loss": 0.1698, "step": 4470 }, { "epoch": 43.353012048192774, "grad_norm": 6.979308128356934, "learning_rate": 2.829611650485437e-05, "loss": 0.1568, "step": 4471 }, { "epoch": 43.36265060240964, "grad_norm": 8.942535400390625, "learning_rate": 2.829126213592233e-05, "loss": 0.3092, "step": 4472 }, { "epoch": 43.37228915662651, "grad_norm": 9.190067291259766, "learning_rate": 2.8286407766990292e-05, "loss": 0.3048, "step": 4473 }, { "epoch": 43.38192771084337, "grad_norm": 1.9388495683670044, "learning_rate": 2.8281553398058253e-05, "loss": 0.2163, "step": 4474 }, { "epoch": 43.39156626506024, "grad_norm": 6.106135368347168, "learning_rate": 2.8276699029126214e-05, "loss": 0.3663, "step": 4475 }, { "epoch": 43.40120481927711, "grad_norm": 70.06742858886719, "learning_rate": 2.8271844660194175e-05, "loss": 0.3876, "step": 4476 }, { "epoch": 43.410843373493975, "grad_norm": 3.902179002761841, "learning_rate": 2.8266990291262137e-05, "loss": 0.2094, "step": 4477 }, { "epoch": 43.420481927710846, "grad_norm": 31.390573501586914, "learning_rate": 2.8262135922330098e-05, "loss": 0.3201, "step": 4478 }, { "epoch": 43.43012048192771, "grad_norm": 13.000340461730957, "learning_rate": 2.825728155339806e-05, "loss": 0.1961, "step": 4479 }, { "epoch": 43.43975903614458, "grad_norm": 3.2214853763580322, "learning_rate": 2.8252427184466017e-05, "loss": 0.327, "step": 4480 }, { "epoch": 43.44939759036144, "grad_norm": 3.3003127574920654, "learning_rate": 2.8247572815533978e-05, "loss": 0.2621, "step": 4481 }, { "epoch": 43.45903614457831, "grad_norm": 1.0992143154144287, "learning_rate": 2.8242718446601946e-05, "loss": 0.2405, "step": 4482 }, { "epoch": 43.46867469879518, "grad_norm": 15.7748384475708, "learning_rate": 2.8237864077669907e-05, "loss": 0.1944, "step": 4483 }, { "epoch": 43.47831325301205, "grad_norm": 5.4963059425354, "learning_rate": 2.8233009708737868e-05, "loss": 0.2534, "step": 4484 }, { "epoch": 43.48795180722892, "grad_norm": 29.18428611755371, "learning_rate": 2.822815533980583e-05, "loss": 0.3332, "step": 4485 }, { "epoch": 43.49759036144578, "grad_norm": 3.8943400382995605, "learning_rate": 2.822330097087379e-05, "loss": 0.4113, "step": 4486 }, { "epoch": 43.50722891566265, "grad_norm": 6.094031810760498, "learning_rate": 2.821844660194175e-05, "loss": 0.3524, "step": 4487 }, { "epoch": 43.51686746987952, "grad_norm": 9.496499061584473, "learning_rate": 2.8213592233009713e-05, "loss": 0.3917, "step": 4488 }, { "epoch": 43.526506024096385, "grad_norm": 3.212347984313965, "learning_rate": 2.8208737864077674e-05, "loss": 0.1823, "step": 4489 }, { "epoch": 43.536144578313255, "grad_norm": 7.487888336181641, "learning_rate": 2.8203883495145635e-05, "loss": 0.2691, "step": 4490 }, { "epoch": 43.54578313253012, "grad_norm": 2.146392583847046, "learning_rate": 2.8199029126213593e-05, "loss": 0.2713, "step": 4491 }, { "epoch": 43.55542168674699, "grad_norm": 3.041259765625, "learning_rate": 2.8194174757281554e-05, "loss": 0.1566, "step": 4492 }, { "epoch": 43.56506024096385, "grad_norm": 30.90891456604004, "learning_rate": 2.8189320388349515e-05, "loss": 0.3259, "step": 4493 }, { "epoch": 43.57469879518072, "grad_norm": 8.340420722961426, "learning_rate": 2.8184466019417476e-05, "loss": 0.1806, "step": 4494 }, { "epoch": 43.58433734939759, "grad_norm": 12.954121589660645, "learning_rate": 2.8179611650485437e-05, "loss": 0.3411, "step": 4495 }, { "epoch": 43.59397590361446, "grad_norm": 0.8627391457557678, "learning_rate": 2.81747572815534e-05, "loss": 0.1735, "step": 4496 }, { "epoch": 43.60361445783133, "grad_norm": 14.790473937988281, "learning_rate": 2.816990291262136e-05, "loss": 0.1604, "step": 4497 }, { "epoch": 43.61325301204819, "grad_norm": 4.294200420379639, "learning_rate": 2.816504854368932e-05, "loss": 0.4157, "step": 4498 }, { "epoch": 43.62289156626506, "grad_norm": 2.3374781608581543, "learning_rate": 2.8160194174757282e-05, "loss": 0.2175, "step": 4499 }, { "epoch": 43.63253012048193, "grad_norm": 1.885321021080017, "learning_rate": 2.8155339805825243e-05, "loss": 0.123, "step": 4500 }, { "epoch": 43.642168674698794, "grad_norm": 21.538583755493164, "learning_rate": 2.8150485436893204e-05, "loss": 0.0762, "step": 4501 }, { "epoch": 43.651807228915665, "grad_norm": 14.128580093383789, "learning_rate": 2.8145631067961165e-05, "loss": 0.4068, "step": 4502 }, { "epoch": 43.66144578313253, "grad_norm": 5.8742780685424805, "learning_rate": 2.8140776699029126e-05, "loss": 0.2416, "step": 4503 }, { "epoch": 43.6710843373494, "grad_norm": 5.449161529541016, "learning_rate": 2.8135922330097087e-05, "loss": 0.4404, "step": 4504 }, { "epoch": 43.68072289156626, "grad_norm": 8.30863094329834, "learning_rate": 2.813106796116505e-05, "loss": 0.1802, "step": 4505 }, { "epoch": 43.69036144578313, "grad_norm": 9.255702018737793, "learning_rate": 2.812621359223301e-05, "loss": 0.6347, "step": 4506 }, { "epoch": 43.7, "grad_norm": 1.976987361907959, "learning_rate": 2.8121359223300974e-05, "loss": 0.1827, "step": 4507 }, { "epoch": 43.709638554216866, "grad_norm": 2.9195730686187744, "learning_rate": 2.8116504854368935e-05, "loss": 0.2104, "step": 4508 }, { "epoch": 43.71927710843374, "grad_norm": 17.618030548095703, "learning_rate": 2.8111650485436897e-05, "loss": 0.3376, "step": 4509 }, { "epoch": 43.7289156626506, "grad_norm": 7.45732307434082, "learning_rate": 2.8106796116504858e-05, "loss": 0.2205, "step": 4510 }, { "epoch": 43.73855421686747, "grad_norm": 17.286893844604492, "learning_rate": 2.810194174757282e-05, "loss": 0.3576, "step": 4511 }, { "epoch": 43.74819277108434, "grad_norm": 2.338507652282715, "learning_rate": 2.809708737864078e-05, "loss": 0.2283, "step": 4512 }, { "epoch": 43.757831325301204, "grad_norm": 4.4073309898376465, "learning_rate": 2.809223300970874e-05, "loss": 0.1733, "step": 4513 }, { "epoch": 43.767469879518075, "grad_norm": 2.5265939235687256, "learning_rate": 2.8087378640776702e-05, "loss": 0.1899, "step": 4514 }, { "epoch": 43.77710843373494, "grad_norm": 5.928648948669434, "learning_rate": 2.8082524271844663e-05, "loss": 0.2732, "step": 4515 }, { "epoch": 43.78674698795181, "grad_norm": 3.0367441177368164, "learning_rate": 2.8077669902912625e-05, "loss": 0.2562, "step": 4516 }, { "epoch": 43.79638554216867, "grad_norm": 2.774413824081421, "learning_rate": 2.8072815533980586e-05, "loss": 0.3351, "step": 4517 }, { "epoch": 43.80602409638554, "grad_norm": 2.698040723800659, "learning_rate": 2.8067961165048544e-05, "loss": 0.1529, "step": 4518 }, { "epoch": 43.81566265060241, "grad_norm": 11.586012840270996, "learning_rate": 2.8063106796116505e-05, "loss": 0.2078, "step": 4519 }, { "epoch": 43.825301204819276, "grad_norm": 9.960457801818848, "learning_rate": 2.8058252427184466e-05, "loss": 0.2658, "step": 4520 }, { "epoch": 43.834939759036146, "grad_norm": 11.237152099609375, "learning_rate": 2.8053398058252427e-05, "loss": 0.1564, "step": 4521 }, { "epoch": 43.84457831325301, "grad_norm": 4.60736608505249, "learning_rate": 2.8048543689320388e-05, "loss": 0.1953, "step": 4522 }, { "epoch": 43.85421686746988, "grad_norm": 4.624864101409912, "learning_rate": 2.804368932038835e-05, "loss": 0.2914, "step": 4523 }, { "epoch": 43.86385542168675, "grad_norm": 6.3474202156066895, "learning_rate": 2.803883495145631e-05, "loss": 0.2864, "step": 4524 }, { "epoch": 43.873493975903614, "grad_norm": 9.666255950927734, "learning_rate": 2.803398058252427e-05, "loss": 0.2252, "step": 4525 }, { "epoch": 43.883132530120484, "grad_norm": 13.115371704101562, "learning_rate": 2.8029126213592233e-05, "loss": 0.2095, "step": 4526 }, { "epoch": 43.89277108433735, "grad_norm": 4.054659366607666, "learning_rate": 2.8024271844660194e-05, "loss": 0.2751, "step": 4527 }, { "epoch": 43.90240963855422, "grad_norm": 22.78936767578125, "learning_rate": 2.8019417475728155e-05, "loss": 0.297, "step": 4528 }, { "epoch": 43.91204819277108, "grad_norm": 5.465768814086914, "learning_rate": 2.8014563106796116e-05, "loss": 0.5137, "step": 4529 }, { "epoch": 43.92168674698795, "grad_norm": 6.489612579345703, "learning_rate": 2.8009708737864077e-05, "loss": 0.2541, "step": 4530 }, { "epoch": 43.93132530120482, "grad_norm": 8.010637283325195, "learning_rate": 2.800485436893204e-05, "loss": 0.3855, "step": 4531 }, { "epoch": 43.940963855421685, "grad_norm": 14.340497016906738, "learning_rate": 2.8000000000000003e-05, "loss": 0.3751, "step": 4532 }, { "epoch": 43.950602409638556, "grad_norm": 5.329237461090088, "learning_rate": 2.7995145631067964e-05, "loss": 0.2773, "step": 4533 }, { "epoch": 43.96024096385542, "grad_norm": 11.390731811523438, "learning_rate": 2.7990291262135925e-05, "loss": 0.3784, "step": 4534 }, { "epoch": 43.96987951807229, "grad_norm": 16.036945343017578, "learning_rate": 2.7985436893203886e-05, "loss": 0.2001, "step": 4535 }, { "epoch": 43.97951807228916, "grad_norm": 9.119912147521973, "learning_rate": 2.7980582524271848e-05, "loss": 0.1818, "step": 4536 }, { "epoch": 43.98915662650602, "grad_norm": 9.08752155303955, "learning_rate": 2.797572815533981e-05, "loss": 0.2303, "step": 4537 }, { "epoch": 44.00481927710843, "grad_norm": 2.9236092567443848, "learning_rate": 2.797087378640777e-05, "loss": 0.1924, "step": 4538 }, { "epoch": 44.0144578313253, "grad_norm": 4.923802375793457, "learning_rate": 2.796601941747573e-05, "loss": 0.1307, "step": 4539 }, { "epoch": 44.024096385542165, "grad_norm": 44.15631866455078, "learning_rate": 2.7961165048543692e-05, "loss": 0.2794, "step": 4540 }, { "epoch": 44.033734939759036, "grad_norm": 7.262697696685791, "learning_rate": 2.7956310679611653e-05, "loss": 0.2057, "step": 4541 }, { "epoch": 44.043373493975906, "grad_norm": 3.0158801078796387, "learning_rate": 2.7951456310679614e-05, "loss": 0.206, "step": 4542 }, { "epoch": 44.05301204819277, "grad_norm": 2.825894594192505, "learning_rate": 2.7946601941747576e-05, "loss": 0.214, "step": 4543 }, { "epoch": 44.06265060240964, "grad_norm": 2.1699068546295166, "learning_rate": 2.7941747572815537e-05, "loss": 0.1168, "step": 4544 }, { "epoch": 44.0722891566265, "grad_norm": 6.325565814971924, "learning_rate": 2.7936893203883498e-05, "loss": 0.4188, "step": 4545 }, { "epoch": 44.081927710843374, "grad_norm": 5.1860480308532715, "learning_rate": 2.7932038834951456e-05, "loss": 0.2803, "step": 4546 }, { "epoch": 44.091566265060244, "grad_norm": 17.77091407775879, "learning_rate": 2.7927184466019417e-05, "loss": 0.4788, "step": 4547 }, { "epoch": 44.10120481927711, "grad_norm": 5.5759453773498535, "learning_rate": 2.7922330097087378e-05, "loss": 0.166, "step": 4548 }, { "epoch": 44.11084337349398, "grad_norm": 12.45634937286377, "learning_rate": 2.791747572815534e-05, "loss": 0.3071, "step": 4549 }, { "epoch": 44.12048192771084, "grad_norm": 8.105039596557617, "learning_rate": 2.79126213592233e-05, "loss": 0.1418, "step": 4550 }, { "epoch": 44.13012048192771, "grad_norm": 31.731380462646484, "learning_rate": 2.790776699029126e-05, "loss": 0.2909, "step": 4551 }, { "epoch": 44.139759036144575, "grad_norm": 1.5384140014648438, "learning_rate": 2.7902912621359222e-05, "loss": 0.1364, "step": 4552 }, { "epoch": 44.149397590361446, "grad_norm": 4.920721530914307, "learning_rate": 2.7898058252427184e-05, "loss": 0.3038, "step": 4553 }, { "epoch": 44.159036144578316, "grad_norm": 6.713641166687012, "learning_rate": 2.7893203883495145e-05, "loss": 0.2717, "step": 4554 }, { "epoch": 44.16867469879518, "grad_norm": 5.209966659545898, "learning_rate": 2.7888349514563106e-05, "loss": 0.0868, "step": 4555 }, { "epoch": 44.17831325301205, "grad_norm": 3.8421008586883545, "learning_rate": 2.7883495145631067e-05, "loss": 0.2711, "step": 4556 }, { "epoch": 44.18795180722891, "grad_norm": 4.403404712677002, "learning_rate": 2.787864077669903e-05, "loss": 0.334, "step": 4557 }, { "epoch": 44.19759036144578, "grad_norm": 3.554098606109619, "learning_rate": 2.7873786407766993e-05, "loss": 0.4025, "step": 4558 }, { "epoch": 44.207228915662654, "grad_norm": 3.3462133407592773, "learning_rate": 2.7868932038834954e-05, "loss": 0.3877, "step": 4559 }, { "epoch": 44.21686746987952, "grad_norm": 3.221897602081299, "learning_rate": 2.7864077669902915e-05, "loss": 0.3173, "step": 4560 }, { "epoch": 44.22650602409639, "grad_norm": 11.238462448120117, "learning_rate": 2.7859223300970876e-05, "loss": 0.3079, "step": 4561 }, { "epoch": 44.23614457831325, "grad_norm": 4.065271854400635, "learning_rate": 2.7854368932038837e-05, "loss": 0.1317, "step": 4562 }, { "epoch": 44.24578313253012, "grad_norm": 2.7030386924743652, "learning_rate": 2.78495145631068e-05, "loss": 0.1995, "step": 4563 }, { "epoch": 44.255421686746985, "grad_norm": 5.373016357421875, "learning_rate": 2.784466019417476e-05, "loss": 0.2941, "step": 4564 }, { "epoch": 44.265060240963855, "grad_norm": 9.049996376037598, "learning_rate": 2.783980582524272e-05, "loss": 0.124, "step": 4565 }, { "epoch": 44.274698795180726, "grad_norm": 3.551114320755005, "learning_rate": 2.7834951456310682e-05, "loss": 0.2896, "step": 4566 }, { "epoch": 44.28433734939759, "grad_norm": 20.21651840209961, "learning_rate": 2.7830097087378643e-05, "loss": 0.2813, "step": 4567 }, { "epoch": 44.29397590361446, "grad_norm": 6.121240615844727, "learning_rate": 2.7825242718446604e-05, "loss": 0.2964, "step": 4568 }, { "epoch": 44.30361445783132, "grad_norm": 5.688607215881348, "learning_rate": 2.7820388349514565e-05, "loss": 0.2071, "step": 4569 }, { "epoch": 44.31325301204819, "grad_norm": 5.865672588348389, "learning_rate": 2.7815533980582527e-05, "loss": 0.1896, "step": 4570 }, { "epoch": 44.32289156626506, "grad_norm": 8.762053489685059, "learning_rate": 2.7810679611650488e-05, "loss": 0.3114, "step": 4571 }, { "epoch": 44.33253012048193, "grad_norm": 8.934377670288086, "learning_rate": 2.780582524271845e-05, "loss": 0.2083, "step": 4572 }, { "epoch": 44.3421686746988, "grad_norm": 9.238000869750977, "learning_rate": 2.780097087378641e-05, "loss": 0.356, "step": 4573 }, { "epoch": 44.35180722891566, "grad_norm": 3.2237589359283447, "learning_rate": 2.7796116504854368e-05, "loss": 0.3163, "step": 4574 }, { "epoch": 44.36144578313253, "grad_norm": 8.398276329040527, "learning_rate": 2.779126213592233e-05, "loss": 0.2492, "step": 4575 }, { "epoch": 44.371084337349394, "grad_norm": 10.424206733703613, "learning_rate": 2.778640776699029e-05, "loss": 0.2052, "step": 4576 }, { "epoch": 44.380722891566265, "grad_norm": 15.180276870727539, "learning_rate": 2.778155339805825e-05, "loss": 0.2261, "step": 4577 }, { "epoch": 44.390361445783135, "grad_norm": 4.96720027923584, "learning_rate": 2.7776699029126212e-05, "loss": 0.1908, "step": 4578 }, { "epoch": 44.4, "grad_norm": 8.499058723449707, "learning_rate": 2.7771844660194173e-05, "loss": 0.2675, "step": 4579 }, { "epoch": 44.40963855421687, "grad_norm": 14.076699256896973, "learning_rate": 2.7766990291262135e-05, "loss": 0.3299, "step": 4580 }, { "epoch": 44.41927710843373, "grad_norm": 9.482433319091797, "learning_rate": 2.7762135922330096e-05, "loss": 0.3292, "step": 4581 }, { "epoch": 44.4289156626506, "grad_norm": 4.457645416259766, "learning_rate": 2.7757281553398064e-05, "loss": 0.3518, "step": 4582 }, { "epoch": 44.43855421686747, "grad_norm": 17.810218811035156, "learning_rate": 2.7752427184466025e-05, "loss": 0.15, "step": 4583 }, { "epoch": 44.44819277108434, "grad_norm": 19.82204246520996, "learning_rate": 2.7747572815533986e-05, "loss": 0.326, "step": 4584 }, { "epoch": 44.45783132530121, "grad_norm": 2.3799314498901367, "learning_rate": 2.7742718446601944e-05, "loss": 0.1666, "step": 4585 }, { "epoch": 44.46746987951807, "grad_norm": 1.5130555629730225, "learning_rate": 2.7737864077669905e-05, "loss": 0.2571, "step": 4586 }, { "epoch": 44.47710843373494, "grad_norm": 1.9019644260406494, "learning_rate": 2.7733009708737866e-05, "loss": 0.4123, "step": 4587 }, { "epoch": 44.486746987951804, "grad_norm": 4.469345569610596, "learning_rate": 2.7728155339805827e-05, "loss": 0.1293, "step": 4588 }, { "epoch": 44.496385542168674, "grad_norm": 3.4051663875579834, "learning_rate": 2.7723300970873788e-05, "loss": 0.171, "step": 4589 }, { "epoch": 44.506024096385545, "grad_norm": 5.271926403045654, "learning_rate": 2.771844660194175e-05, "loss": 0.1799, "step": 4590 }, { "epoch": 44.51566265060241, "grad_norm": 2.82837176322937, "learning_rate": 2.771359223300971e-05, "loss": 0.2723, "step": 4591 }, { "epoch": 44.52530120481928, "grad_norm": 7.322528839111328, "learning_rate": 2.7708737864077672e-05, "loss": 0.2047, "step": 4592 }, { "epoch": 44.53493975903614, "grad_norm": 4.855798244476318, "learning_rate": 2.7703883495145633e-05, "loss": 0.2633, "step": 4593 }, { "epoch": 44.54457831325301, "grad_norm": 8.719532012939453, "learning_rate": 2.7699029126213594e-05, "loss": 0.1093, "step": 4594 }, { "epoch": 44.55421686746988, "grad_norm": 4.087757110595703, "learning_rate": 2.7694174757281555e-05, "loss": 0.1595, "step": 4595 }, { "epoch": 44.563855421686746, "grad_norm": 16.054319381713867, "learning_rate": 2.7689320388349516e-05, "loss": 0.3291, "step": 4596 }, { "epoch": 44.57349397590362, "grad_norm": 2.94655442237854, "learning_rate": 2.7684466019417477e-05, "loss": 0.187, "step": 4597 }, { "epoch": 44.58313253012048, "grad_norm": 3.6676504611968994, "learning_rate": 2.767961165048544e-05, "loss": 0.2107, "step": 4598 }, { "epoch": 44.59277108433735, "grad_norm": 6.204626560211182, "learning_rate": 2.76747572815534e-05, "loss": 0.2194, "step": 4599 }, { "epoch": 44.602409638554214, "grad_norm": 23.56688690185547, "learning_rate": 2.766990291262136e-05, "loss": 0.2833, "step": 4600 }, { "epoch": 44.612048192771084, "grad_norm": 3.8617539405822754, "learning_rate": 2.766504854368932e-05, "loss": 0.3326, "step": 4601 }, { "epoch": 44.621686746987955, "grad_norm": 4.946985721588135, "learning_rate": 2.766019417475728e-05, "loss": 0.3654, "step": 4602 }, { "epoch": 44.63132530120482, "grad_norm": 6.128940582275391, "learning_rate": 2.765533980582524e-05, "loss": 0.2439, "step": 4603 }, { "epoch": 44.64096385542169, "grad_norm": 11.93354320526123, "learning_rate": 2.7650485436893202e-05, "loss": 0.2533, "step": 4604 }, { "epoch": 44.65060240963855, "grad_norm": 3.818211078643799, "learning_rate": 2.7645631067961163e-05, "loss": 0.1551, "step": 4605 }, { "epoch": 44.66024096385542, "grad_norm": 14.937355995178223, "learning_rate": 2.7640776699029124e-05, "loss": 0.1476, "step": 4606 }, { "epoch": 44.66987951807229, "grad_norm": 11.686851501464844, "learning_rate": 2.7635922330097086e-05, "loss": 0.2836, "step": 4607 }, { "epoch": 44.679518072289156, "grad_norm": 1.669884443283081, "learning_rate": 2.7631067961165053e-05, "loss": 0.2344, "step": 4608 }, { "epoch": 44.689156626506026, "grad_norm": 24.27410316467285, "learning_rate": 2.7626213592233015e-05, "loss": 0.3261, "step": 4609 }, { "epoch": 44.69879518072289, "grad_norm": 20.743581771850586, "learning_rate": 2.7621359223300976e-05, "loss": 0.4051, "step": 4610 }, { "epoch": 44.70843373493976, "grad_norm": 8.04038143157959, "learning_rate": 2.7616504854368937e-05, "loss": 0.2468, "step": 4611 }, { "epoch": 44.71807228915662, "grad_norm": 5.409212112426758, "learning_rate": 2.7611650485436895e-05, "loss": 0.4852, "step": 4612 }, { "epoch": 44.727710843373494, "grad_norm": 35.24665832519531, "learning_rate": 2.7606796116504856e-05, "loss": 0.2134, "step": 4613 }, { "epoch": 44.737349397590364, "grad_norm": 23.059053421020508, "learning_rate": 2.7601941747572817e-05, "loss": 0.2807, "step": 4614 }, { "epoch": 44.74698795180723, "grad_norm": 9.369253158569336, "learning_rate": 2.7597087378640778e-05, "loss": 0.2437, "step": 4615 }, { "epoch": 44.7566265060241, "grad_norm": 4.680335998535156, "learning_rate": 2.759223300970874e-05, "loss": 0.2218, "step": 4616 }, { "epoch": 44.76626506024096, "grad_norm": 9.704885482788086, "learning_rate": 2.75873786407767e-05, "loss": 0.247, "step": 4617 }, { "epoch": 44.77590361445783, "grad_norm": 4.35260534286499, "learning_rate": 2.758252427184466e-05, "loss": 0.2017, "step": 4618 }, { "epoch": 44.7855421686747, "grad_norm": 9.800283432006836, "learning_rate": 2.7577669902912623e-05, "loss": 0.113, "step": 4619 }, { "epoch": 44.795180722891565, "grad_norm": 27.767908096313477, "learning_rate": 2.7572815533980584e-05, "loss": 0.4161, "step": 4620 }, { "epoch": 44.804819277108436, "grad_norm": 1.9231605529785156, "learning_rate": 2.7567961165048545e-05, "loss": 0.1623, "step": 4621 }, { "epoch": 44.8144578313253, "grad_norm": 2.859635829925537, "learning_rate": 2.7563106796116506e-05, "loss": 0.3402, "step": 4622 }, { "epoch": 44.82409638554217, "grad_norm": 6.3611226081848145, "learning_rate": 2.7558252427184467e-05, "loss": 0.4259, "step": 4623 }, { "epoch": 44.83373493975903, "grad_norm": 4.513238430023193, "learning_rate": 2.755339805825243e-05, "loss": 0.2922, "step": 4624 }, { "epoch": 44.8433734939759, "grad_norm": 10.079894065856934, "learning_rate": 2.754854368932039e-05, "loss": 0.2495, "step": 4625 }, { "epoch": 44.853012048192774, "grad_norm": 3.79736065864563, "learning_rate": 2.754368932038835e-05, "loss": 0.199, "step": 4626 }, { "epoch": 44.86265060240964, "grad_norm": 4.888023853302002, "learning_rate": 2.7538834951456312e-05, "loss": 0.3253, "step": 4627 }, { "epoch": 44.87228915662651, "grad_norm": 3.256417751312256, "learning_rate": 2.7533980582524273e-05, "loss": 0.1698, "step": 4628 }, { "epoch": 44.88192771084337, "grad_norm": 10.147415161132812, "learning_rate": 2.752912621359223e-05, "loss": 0.1808, "step": 4629 }, { "epoch": 44.89156626506024, "grad_norm": 3.4806580543518066, "learning_rate": 2.7524271844660192e-05, "loss": 0.254, "step": 4630 }, { "epoch": 44.90120481927711, "grad_norm": 4.941023349761963, "learning_rate": 2.7519417475728153e-05, "loss": 0.3239, "step": 4631 }, { "epoch": 44.910843373493975, "grad_norm": 8.693228721618652, "learning_rate": 2.7514563106796114e-05, "loss": 0.3722, "step": 4632 }, { "epoch": 44.920481927710846, "grad_norm": 4.179804801940918, "learning_rate": 2.7509708737864082e-05, "loss": 0.2341, "step": 4633 }, { "epoch": 44.93012048192771, "grad_norm": 3.4524683952331543, "learning_rate": 2.7504854368932043e-05, "loss": 0.2407, "step": 4634 }, { "epoch": 44.93975903614458, "grad_norm": 13.955536842346191, "learning_rate": 2.7500000000000004e-05, "loss": 0.2773, "step": 4635 }, { "epoch": 44.94939759036144, "grad_norm": 3.30886173248291, "learning_rate": 2.7495145631067966e-05, "loss": 0.0758, "step": 4636 }, { "epoch": 44.95903614457831, "grad_norm": 15.44607925415039, "learning_rate": 2.7490291262135927e-05, "loss": 0.359, "step": 4637 }, { "epoch": 44.96867469879518, "grad_norm": 5.25464391708374, "learning_rate": 2.7485436893203888e-05, "loss": 0.3512, "step": 4638 }, { "epoch": 44.97831325301205, "grad_norm": 2.663447141647339, "learning_rate": 2.748058252427185e-05, "loss": 0.3163, "step": 4639 }, { "epoch": 44.98795180722892, "grad_norm": 13.071290969848633, "learning_rate": 2.7475728155339807e-05, "loss": 0.2794, "step": 4640 }, { "epoch": 45.003614457831326, "grad_norm": 4.2125163078308105, "learning_rate": 2.7470873786407768e-05, "loss": 0.2152, "step": 4641 }, { "epoch": 45.013253012048196, "grad_norm": 11.439892768859863, "learning_rate": 2.746601941747573e-05, "loss": 0.3739, "step": 4642 }, { "epoch": 45.02289156626506, "grad_norm": 8.15124797821045, "learning_rate": 2.746116504854369e-05, "loss": 0.2072, "step": 4643 }, { "epoch": 45.03253012048193, "grad_norm": 4.166398048400879, "learning_rate": 2.745631067961165e-05, "loss": 0.2505, "step": 4644 }, { "epoch": 45.04216867469879, "grad_norm": 4.308152675628662, "learning_rate": 2.7451456310679612e-05, "loss": 0.2995, "step": 4645 }, { "epoch": 45.05180722891566, "grad_norm": 3.7247884273529053, "learning_rate": 2.7446601941747574e-05, "loss": 0.2596, "step": 4646 }, { "epoch": 45.06144578313253, "grad_norm": 7.616769313812256, "learning_rate": 2.7441747572815535e-05, "loss": 0.3821, "step": 4647 }, { "epoch": 45.0710843373494, "grad_norm": 3.0323781967163086, "learning_rate": 2.7436893203883496e-05, "loss": 0.2986, "step": 4648 }, { "epoch": 45.08072289156627, "grad_norm": 5.53689432144165, "learning_rate": 2.7432038834951457e-05, "loss": 0.2467, "step": 4649 }, { "epoch": 45.09036144578313, "grad_norm": 6.217216968536377, "learning_rate": 2.7427184466019418e-05, "loss": 0.2606, "step": 4650 }, { "epoch": 45.1, "grad_norm": 10.052882194519043, "learning_rate": 2.742233009708738e-05, "loss": 0.2496, "step": 4651 }, { "epoch": 45.109638554216865, "grad_norm": 9.880097389221191, "learning_rate": 2.741747572815534e-05, "loss": 0.2223, "step": 4652 }, { "epoch": 45.119277108433735, "grad_norm": 3.6002073287963867, "learning_rate": 2.74126213592233e-05, "loss": 0.2516, "step": 4653 }, { "epoch": 45.128915662650606, "grad_norm": 6.693922519683838, "learning_rate": 2.7407766990291263e-05, "loss": 0.2514, "step": 4654 }, { "epoch": 45.13855421686747, "grad_norm": 4.882068157196045, "learning_rate": 2.7402912621359224e-05, "loss": 0.2496, "step": 4655 }, { "epoch": 45.14819277108434, "grad_norm": 5.957263469696045, "learning_rate": 2.7398058252427185e-05, "loss": 0.2438, "step": 4656 }, { "epoch": 45.1578313253012, "grad_norm": 4.629617691040039, "learning_rate": 2.7393203883495143e-05, "loss": 0.4084, "step": 4657 }, { "epoch": 45.16746987951807, "grad_norm": 4.450522422790527, "learning_rate": 2.738834951456311e-05, "loss": 0.1358, "step": 4658 }, { "epoch": 45.17710843373494, "grad_norm": 7.860206604003906, "learning_rate": 2.7383495145631072e-05, "loss": 0.3072, "step": 4659 }, { "epoch": 45.18674698795181, "grad_norm": 13.149619102478027, "learning_rate": 2.7378640776699033e-05, "loss": 0.2615, "step": 4660 }, { "epoch": 45.19638554216868, "grad_norm": 9.465240478515625, "learning_rate": 2.7373786407766994e-05, "loss": 0.2049, "step": 4661 }, { "epoch": 45.20602409638554, "grad_norm": 4.378102779388428, "learning_rate": 2.7368932038834955e-05, "loss": 0.2256, "step": 4662 }, { "epoch": 45.21566265060241, "grad_norm": 3.3010525703430176, "learning_rate": 2.7364077669902916e-05, "loss": 0.149, "step": 4663 }, { "epoch": 45.225301204819274, "grad_norm": 3.8800055980682373, "learning_rate": 2.7359223300970878e-05, "loss": 0.2581, "step": 4664 }, { "epoch": 45.234939759036145, "grad_norm": 6.936655521392822, "learning_rate": 2.735436893203884e-05, "loss": 0.1056, "step": 4665 }, { "epoch": 45.244578313253015, "grad_norm": 5.92602014541626, "learning_rate": 2.73495145631068e-05, "loss": 0.3899, "step": 4666 }, { "epoch": 45.25421686746988, "grad_norm": 6.3200554847717285, "learning_rate": 2.7344660194174758e-05, "loss": 0.2043, "step": 4667 }, { "epoch": 45.26385542168675, "grad_norm": 2.8198156356811523, "learning_rate": 2.733980582524272e-05, "loss": 0.2012, "step": 4668 }, { "epoch": 45.27349397590361, "grad_norm": 16.47161865234375, "learning_rate": 2.733495145631068e-05, "loss": 0.3099, "step": 4669 }, { "epoch": 45.28313253012048, "grad_norm": 17.6525821685791, "learning_rate": 2.733009708737864e-05, "loss": 0.3287, "step": 4670 }, { "epoch": 45.292771084337346, "grad_norm": 4.301858901977539, "learning_rate": 2.7325242718446602e-05, "loss": 0.1779, "step": 4671 }, { "epoch": 45.30240963855422, "grad_norm": 6.947841167449951, "learning_rate": 2.7320388349514563e-05, "loss": 0.3668, "step": 4672 }, { "epoch": 45.31204819277109, "grad_norm": 9.57515811920166, "learning_rate": 2.7315533980582525e-05, "loss": 0.2444, "step": 4673 }, { "epoch": 45.32168674698795, "grad_norm": 2.475083827972412, "learning_rate": 2.7310679611650486e-05, "loss": 0.0848, "step": 4674 }, { "epoch": 45.33132530120482, "grad_norm": 4.554519176483154, "learning_rate": 2.7305825242718447e-05, "loss": 0.3053, "step": 4675 }, { "epoch": 45.340963855421684, "grad_norm": 38.051143646240234, "learning_rate": 2.7300970873786408e-05, "loss": 0.3772, "step": 4676 }, { "epoch": 45.350602409638554, "grad_norm": 2.798882246017456, "learning_rate": 2.729611650485437e-05, "loss": 0.2622, "step": 4677 }, { "epoch": 45.360240963855425, "grad_norm": 5.244598388671875, "learning_rate": 2.729126213592233e-05, "loss": 0.2491, "step": 4678 }, { "epoch": 45.36987951807229, "grad_norm": 2.1869664192199707, "learning_rate": 2.728640776699029e-05, "loss": 0.074, "step": 4679 }, { "epoch": 45.37951807228916, "grad_norm": 5.3362650871276855, "learning_rate": 2.7281553398058253e-05, "loss": 0.168, "step": 4680 }, { "epoch": 45.38915662650602, "grad_norm": 3.792306900024414, "learning_rate": 2.7276699029126214e-05, "loss": 0.1866, "step": 4681 }, { "epoch": 45.39879518072289, "grad_norm": 3.814761161804199, "learning_rate": 2.7271844660194175e-05, "loss": 0.2928, "step": 4682 }, { "epoch": 45.408433734939756, "grad_norm": 7.57944393157959, "learning_rate": 2.726699029126214e-05, "loss": 0.265, "step": 4683 }, { "epoch": 45.418072289156626, "grad_norm": 14.47207260131836, "learning_rate": 2.72621359223301e-05, "loss": 0.4497, "step": 4684 }, { "epoch": 45.4277108433735, "grad_norm": 4.615080833435059, "learning_rate": 2.725728155339806e-05, "loss": 0.1458, "step": 4685 }, { "epoch": 45.43734939759036, "grad_norm": 4.072316646575928, "learning_rate": 2.7252427184466023e-05, "loss": 0.1638, "step": 4686 }, { "epoch": 45.44698795180723, "grad_norm": 8.007049560546875, "learning_rate": 2.7247572815533984e-05, "loss": 0.2305, "step": 4687 }, { "epoch": 45.456626506024094, "grad_norm": 8.094818115234375, "learning_rate": 2.7242718446601945e-05, "loss": 0.0847, "step": 4688 }, { "epoch": 45.466265060240964, "grad_norm": 7.5962324142456055, "learning_rate": 2.7237864077669906e-05, "loss": 0.2555, "step": 4689 }, { "epoch": 45.475903614457835, "grad_norm": 2.4776430130004883, "learning_rate": 2.7233009708737867e-05, "loss": 0.153, "step": 4690 }, { "epoch": 45.4855421686747, "grad_norm": 5.178109645843506, "learning_rate": 2.722815533980583e-05, "loss": 0.3956, "step": 4691 }, { "epoch": 45.49518072289157, "grad_norm": 16.2945613861084, "learning_rate": 2.722330097087379e-05, "loss": 0.1883, "step": 4692 }, { "epoch": 45.50481927710843, "grad_norm": 6.583761215209961, "learning_rate": 2.721844660194175e-05, "loss": 0.3599, "step": 4693 }, { "epoch": 45.5144578313253, "grad_norm": 9.053525924682617, "learning_rate": 2.7213592233009712e-05, "loss": 0.2891, "step": 4694 }, { "epoch": 45.524096385542165, "grad_norm": 6.5373148918151855, "learning_rate": 2.720873786407767e-05, "loss": 0.2623, "step": 4695 }, { "epoch": 45.533734939759036, "grad_norm": 5.726780891418457, "learning_rate": 2.720388349514563e-05, "loss": 0.1846, "step": 4696 }, { "epoch": 45.543373493975906, "grad_norm": 2.8704023361206055, "learning_rate": 2.7199029126213592e-05, "loss": 0.2887, "step": 4697 }, { "epoch": 45.55301204819277, "grad_norm": 4.646401405334473, "learning_rate": 2.7194174757281553e-05, "loss": 0.1684, "step": 4698 }, { "epoch": 45.56265060240964, "grad_norm": 22.048194885253906, "learning_rate": 2.7189320388349514e-05, "loss": 0.2862, "step": 4699 }, { "epoch": 45.5722891566265, "grad_norm": 17.97262954711914, "learning_rate": 2.7184466019417475e-05, "loss": 0.2982, "step": 4700 }, { "epoch": 45.581927710843374, "grad_norm": 5.103722095489502, "learning_rate": 2.7179611650485437e-05, "loss": 0.2479, "step": 4701 }, { "epoch": 45.591566265060244, "grad_norm": 8.139520645141602, "learning_rate": 2.7174757281553398e-05, "loss": 0.1084, "step": 4702 }, { "epoch": 45.60120481927711, "grad_norm": 5.787885665893555, "learning_rate": 2.716990291262136e-05, "loss": 0.2038, "step": 4703 }, { "epoch": 45.61084337349398, "grad_norm": 11.170841217041016, "learning_rate": 2.716504854368932e-05, "loss": 0.3608, "step": 4704 }, { "epoch": 45.62048192771084, "grad_norm": 8.085899353027344, "learning_rate": 2.716019417475728e-05, "loss": 0.2353, "step": 4705 }, { "epoch": 45.63012048192771, "grad_norm": 1.3490160703659058, "learning_rate": 2.7155339805825242e-05, "loss": 0.0794, "step": 4706 }, { "epoch": 45.639759036144575, "grad_norm": 5.63179349899292, "learning_rate": 2.7150485436893203e-05, "loss": 0.3221, "step": 4707 }, { "epoch": 45.649397590361446, "grad_norm": 11.067961692810059, "learning_rate": 2.7145631067961165e-05, "loss": 0.2583, "step": 4708 }, { "epoch": 45.659036144578316, "grad_norm": 8.014067649841309, "learning_rate": 2.714077669902913e-05, "loss": 0.1701, "step": 4709 }, { "epoch": 45.66867469879518, "grad_norm": 3.6147637367248535, "learning_rate": 2.713592233009709e-05, "loss": 0.2267, "step": 4710 }, { "epoch": 45.67831325301205, "grad_norm": 4.2201313972473145, "learning_rate": 2.713106796116505e-05, "loss": 0.172, "step": 4711 }, { "epoch": 45.68795180722891, "grad_norm": 4.384742259979248, "learning_rate": 2.7126213592233013e-05, "loss": 0.2978, "step": 4712 }, { "epoch": 45.69759036144578, "grad_norm": 4.4032158851623535, "learning_rate": 2.7121359223300974e-05, "loss": 0.3766, "step": 4713 }, { "epoch": 45.707228915662654, "grad_norm": 7.715346336364746, "learning_rate": 2.7116504854368935e-05, "loss": 0.3116, "step": 4714 }, { "epoch": 45.71686746987952, "grad_norm": 6.491019248962402, "learning_rate": 2.7111650485436896e-05, "loss": 0.2926, "step": 4715 }, { "epoch": 45.72650602409639, "grad_norm": 2.3863351345062256, "learning_rate": 2.7106796116504857e-05, "loss": 0.148, "step": 4716 }, { "epoch": 45.73614457831325, "grad_norm": 1.7805395126342773, "learning_rate": 2.710194174757282e-05, "loss": 0.2289, "step": 4717 }, { "epoch": 45.74578313253012, "grad_norm": 11.578788757324219, "learning_rate": 2.709708737864078e-05, "loss": 0.2095, "step": 4718 }, { "epoch": 45.755421686746985, "grad_norm": 11.49576473236084, "learning_rate": 2.709223300970874e-05, "loss": 0.2342, "step": 4719 }, { "epoch": 45.765060240963855, "grad_norm": 3.734484910964966, "learning_rate": 2.7087378640776702e-05, "loss": 0.2537, "step": 4720 }, { "epoch": 45.774698795180726, "grad_norm": 6.800707817077637, "learning_rate": 2.7082524271844663e-05, "loss": 0.2432, "step": 4721 }, { "epoch": 45.78433734939759, "grad_norm": 16.837310791015625, "learning_rate": 2.7077669902912624e-05, "loss": 0.2846, "step": 4722 }, { "epoch": 45.79397590361446, "grad_norm": 7.611032962799072, "learning_rate": 2.7072815533980582e-05, "loss": 0.0958, "step": 4723 }, { "epoch": 45.80361445783132, "grad_norm": 3.3935725688934326, "learning_rate": 2.7067961165048543e-05, "loss": 0.1754, "step": 4724 }, { "epoch": 45.81325301204819, "grad_norm": 2.8591883182525635, "learning_rate": 2.7063106796116504e-05, "loss": 0.1023, "step": 4725 }, { "epoch": 45.82289156626506, "grad_norm": 1.8951796293258667, "learning_rate": 2.7058252427184465e-05, "loss": 0.0745, "step": 4726 }, { "epoch": 45.83253012048193, "grad_norm": 3.033785104751587, "learning_rate": 2.7053398058252426e-05, "loss": 0.1805, "step": 4727 }, { "epoch": 45.8421686746988, "grad_norm": 6.013187408447266, "learning_rate": 2.7048543689320388e-05, "loss": 0.1552, "step": 4728 }, { "epoch": 45.85180722891566, "grad_norm": 6.91603946685791, "learning_rate": 2.704368932038835e-05, "loss": 0.1886, "step": 4729 }, { "epoch": 45.86144578313253, "grad_norm": 22.039905548095703, "learning_rate": 2.703883495145631e-05, "loss": 0.2048, "step": 4730 }, { "epoch": 45.871084337349394, "grad_norm": 10.93646240234375, "learning_rate": 2.703398058252427e-05, "loss": 0.3665, "step": 4731 }, { "epoch": 45.880722891566265, "grad_norm": 13.150333404541016, "learning_rate": 2.7029126213592232e-05, "loss": 0.2088, "step": 4732 }, { "epoch": 45.890361445783135, "grad_norm": 11.840934753417969, "learning_rate": 2.7024271844660193e-05, "loss": 0.3748, "step": 4733 }, { "epoch": 45.9, "grad_norm": 9.536296844482422, "learning_rate": 2.7019417475728158e-05, "loss": 0.1895, "step": 4734 }, { "epoch": 45.90963855421687, "grad_norm": 3.1417806148529053, "learning_rate": 2.701456310679612e-05, "loss": 0.2078, "step": 4735 }, { "epoch": 45.91927710843373, "grad_norm": 5.226475715637207, "learning_rate": 2.700970873786408e-05, "loss": 0.2889, "step": 4736 }, { "epoch": 45.9289156626506, "grad_norm": 5.823170185089111, "learning_rate": 2.700485436893204e-05, "loss": 0.112, "step": 4737 }, { "epoch": 45.93855421686747, "grad_norm": 3.4126596450805664, "learning_rate": 2.7000000000000002e-05, "loss": 0.1776, "step": 4738 }, { "epoch": 45.94819277108434, "grad_norm": 8.693978309631348, "learning_rate": 2.6995145631067964e-05, "loss": 0.3768, "step": 4739 }, { "epoch": 45.95783132530121, "grad_norm": 5.016844272613525, "learning_rate": 2.6990291262135925e-05, "loss": 0.1057, "step": 4740 }, { "epoch": 45.96746987951807, "grad_norm": 8.112601280212402, "learning_rate": 2.6985436893203886e-05, "loss": 0.522, "step": 4741 }, { "epoch": 45.97710843373494, "grad_norm": 5.765379428863525, "learning_rate": 2.6980582524271847e-05, "loss": 0.1911, "step": 4742 }, { "epoch": 45.986746987951804, "grad_norm": 3.2297048568725586, "learning_rate": 2.6975728155339808e-05, "loss": 0.2123, "step": 4743 }, { "epoch": 46.00240963855422, "grad_norm": 2.9495582580566406, "learning_rate": 2.697087378640777e-05, "loss": 0.1691, "step": 4744 }, { "epoch": 46.01204819277108, "grad_norm": 8.273957252502441, "learning_rate": 2.696601941747573e-05, "loss": 0.3556, "step": 4745 }, { "epoch": 46.02168674698795, "grad_norm": 5.556756496429443, "learning_rate": 2.696116504854369e-05, "loss": 0.3319, "step": 4746 }, { "epoch": 46.03132530120482, "grad_norm": 6.547088146209717, "learning_rate": 2.6956310679611653e-05, "loss": 0.288, "step": 4747 }, { "epoch": 46.04096385542169, "grad_norm": 28.66054344177246, "learning_rate": 2.6951456310679614e-05, "loss": 0.2648, "step": 4748 }, { "epoch": 46.05060240963856, "grad_norm": 9.18617057800293, "learning_rate": 2.6946601941747575e-05, "loss": 0.362, "step": 4749 }, { "epoch": 46.06024096385542, "grad_norm": 8.371834754943848, "learning_rate": 2.6941747572815533e-05, "loss": 0.2369, "step": 4750 }, { "epoch": 46.06987951807229, "grad_norm": 7.211695671081543, "learning_rate": 2.6936893203883494e-05, "loss": 0.2788, "step": 4751 }, { "epoch": 46.079518072289154, "grad_norm": 2.290266275405884, "learning_rate": 2.6932038834951455e-05, "loss": 0.1007, "step": 4752 }, { "epoch": 46.089156626506025, "grad_norm": 5.275758743286133, "learning_rate": 2.6927184466019416e-05, "loss": 0.3073, "step": 4753 }, { "epoch": 46.09879518072289, "grad_norm": 3.0370166301727295, "learning_rate": 2.6922330097087377e-05, "loss": 0.1272, "step": 4754 }, { "epoch": 46.10843373493976, "grad_norm": 4.219581604003906, "learning_rate": 2.691747572815534e-05, "loss": 0.2227, "step": 4755 }, { "epoch": 46.11807228915663, "grad_norm": 4.555667400360107, "learning_rate": 2.69126213592233e-05, "loss": 0.2401, "step": 4756 }, { "epoch": 46.12771084337349, "grad_norm": 5.569356441497803, "learning_rate": 2.690776699029126e-05, "loss": 0.2637, "step": 4757 }, { "epoch": 46.13734939759036, "grad_norm": 3.4748294353485107, "learning_rate": 2.6902912621359222e-05, "loss": 0.2453, "step": 4758 }, { "epoch": 46.146987951807226, "grad_norm": 4.153372764587402, "learning_rate": 2.689805825242719e-05, "loss": 0.1158, "step": 4759 }, { "epoch": 46.1566265060241, "grad_norm": 14.84614372253418, "learning_rate": 2.689320388349515e-05, "loss": 0.1961, "step": 4760 }, { "epoch": 46.16626506024097, "grad_norm": 2.703850746154785, "learning_rate": 2.688834951456311e-05, "loss": 0.362, "step": 4761 }, { "epoch": 46.17590361445783, "grad_norm": 1.2968518733978271, "learning_rate": 2.688349514563107e-05, "loss": 0.072, "step": 4762 }, { "epoch": 46.1855421686747, "grad_norm": 4.869528770446777, "learning_rate": 2.687864077669903e-05, "loss": 0.2358, "step": 4763 }, { "epoch": 46.195180722891564, "grad_norm": 4.43242883682251, "learning_rate": 2.6873786407766992e-05, "loss": 0.3279, "step": 4764 }, { "epoch": 46.204819277108435, "grad_norm": 11.864562034606934, "learning_rate": 2.6868932038834953e-05, "loss": 0.2382, "step": 4765 }, { "epoch": 46.2144578313253, "grad_norm": 5.523270606994629, "learning_rate": 2.6864077669902914e-05, "loss": 0.2369, "step": 4766 }, { "epoch": 46.22409638554217, "grad_norm": 7.738522529602051, "learning_rate": 2.6859223300970876e-05, "loss": 0.2294, "step": 4767 }, { "epoch": 46.23373493975904, "grad_norm": 4.123643398284912, "learning_rate": 2.6854368932038837e-05, "loss": 0.238, "step": 4768 }, { "epoch": 46.2433734939759, "grad_norm": 3.7477099895477295, "learning_rate": 2.6849514563106798e-05, "loss": 0.1653, "step": 4769 }, { "epoch": 46.25301204819277, "grad_norm": 4.675952434539795, "learning_rate": 2.684466019417476e-05, "loss": 0.2147, "step": 4770 }, { "epoch": 46.262650602409636, "grad_norm": 5.479208469390869, "learning_rate": 2.683980582524272e-05, "loss": 0.2472, "step": 4771 }, { "epoch": 46.272289156626506, "grad_norm": 6.181296348571777, "learning_rate": 2.683495145631068e-05, "loss": 0.428, "step": 4772 }, { "epoch": 46.28192771084338, "grad_norm": 5.440836429595947, "learning_rate": 2.6830097087378642e-05, "loss": 0.1195, "step": 4773 }, { "epoch": 46.29156626506024, "grad_norm": 4.243145942687988, "learning_rate": 2.6825242718446604e-05, "loss": 0.5087, "step": 4774 }, { "epoch": 46.30120481927711, "grad_norm": 2.087804079055786, "learning_rate": 2.6820388349514565e-05, "loss": 0.0913, "step": 4775 }, { "epoch": 46.310843373493974, "grad_norm": 5.389830589294434, "learning_rate": 2.6815533980582526e-05, "loss": 0.2955, "step": 4776 }, { "epoch": 46.320481927710844, "grad_norm": 4.503159523010254, "learning_rate": 2.6810679611650487e-05, "loss": 0.3683, "step": 4777 }, { "epoch": 46.33012048192771, "grad_norm": 5.4950995445251465, "learning_rate": 2.6805825242718445e-05, "loss": 0.2727, "step": 4778 }, { "epoch": 46.33975903614458, "grad_norm": 3.7737069129943848, "learning_rate": 2.6800970873786406e-05, "loss": 0.3156, "step": 4779 }, { "epoch": 46.34939759036145, "grad_norm": 5.000969409942627, "learning_rate": 2.6796116504854367e-05, "loss": 0.2385, "step": 4780 }, { "epoch": 46.35903614457831, "grad_norm": 4.142760276794434, "learning_rate": 2.6791262135922328e-05, "loss": 0.3932, "step": 4781 }, { "epoch": 46.36867469879518, "grad_norm": 3.298234224319458, "learning_rate": 2.678640776699029e-05, "loss": 0.1619, "step": 4782 }, { "epoch": 46.378313253012045, "grad_norm": 24.691118240356445, "learning_rate": 2.678155339805825e-05, "loss": 0.2849, "step": 4783 }, { "epoch": 46.387951807228916, "grad_norm": 2.6980693340301514, "learning_rate": 2.677669902912622e-05, "loss": 0.1649, "step": 4784 }, { "epoch": 46.397590361445786, "grad_norm": 4.384038925170898, "learning_rate": 2.677184466019418e-05, "loss": 0.1788, "step": 4785 }, { "epoch": 46.40722891566265, "grad_norm": 13.054452896118164, "learning_rate": 2.676699029126214e-05, "loss": 0.331, "step": 4786 }, { "epoch": 46.41686746987952, "grad_norm": 3.397508144378662, "learning_rate": 2.6762135922330102e-05, "loss": 0.346, "step": 4787 }, { "epoch": 46.42650602409638, "grad_norm": 15.254549026489258, "learning_rate": 2.6757281553398063e-05, "loss": 0.4546, "step": 4788 }, { "epoch": 46.436144578313254, "grad_norm": 11.04262924194336, "learning_rate": 2.675242718446602e-05, "loss": 0.0824, "step": 4789 }, { "epoch": 46.44578313253012, "grad_norm": 8.441530227661133, "learning_rate": 2.6747572815533982e-05, "loss": 0.2584, "step": 4790 }, { "epoch": 46.45542168674699, "grad_norm": 3.1125359535217285, "learning_rate": 2.6742718446601943e-05, "loss": 0.1795, "step": 4791 }, { "epoch": 46.46506024096386, "grad_norm": 3.0620083808898926, "learning_rate": 2.6737864077669904e-05, "loss": 0.2931, "step": 4792 }, { "epoch": 46.47469879518072, "grad_norm": 6.675529479980469, "learning_rate": 2.6733009708737865e-05, "loss": 0.1919, "step": 4793 }, { "epoch": 46.48433734939759, "grad_norm": 30.110698699951172, "learning_rate": 2.6728155339805827e-05, "loss": 0.2847, "step": 4794 }, { "epoch": 46.493975903614455, "grad_norm": 6.804153919219971, "learning_rate": 2.6723300970873788e-05, "loss": 0.3063, "step": 4795 }, { "epoch": 46.503614457831326, "grad_norm": 4.963301658630371, "learning_rate": 2.671844660194175e-05, "loss": 0.3301, "step": 4796 }, { "epoch": 46.513253012048196, "grad_norm": 18.862957000732422, "learning_rate": 2.671359223300971e-05, "loss": 0.113, "step": 4797 }, { "epoch": 46.52289156626506, "grad_norm": 6.8394927978515625, "learning_rate": 2.670873786407767e-05, "loss": 0.2778, "step": 4798 }, { "epoch": 46.53253012048193, "grad_norm": 6.470944881439209, "learning_rate": 2.6703883495145632e-05, "loss": 0.2252, "step": 4799 }, { "epoch": 46.54216867469879, "grad_norm": 4.131027698516846, "learning_rate": 2.6699029126213593e-05, "loss": 0.2742, "step": 4800 }, { "epoch": 46.55180722891566, "grad_norm": 14.770063400268555, "learning_rate": 2.6694174757281555e-05, "loss": 0.4744, "step": 4801 }, { "epoch": 46.56144578313253, "grad_norm": 3.5140364170074463, "learning_rate": 2.6689320388349516e-05, "loss": 0.2786, "step": 4802 }, { "epoch": 46.5710843373494, "grad_norm": 22.103675842285156, "learning_rate": 2.6684466019417477e-05, "loss": 0.1724, "step": 4803 }, { "epoch": 46.58072289156627, "grad_norm": 8.129850387573242, "learning_rate": 2.6679611650485438e-05, "loss": 0.3311, "step": 4804 }, { "epoch": 46.59036144578313, "grad_norm": 3.092346668243408, "learning_rate": 2.66747572815534e-05, "loss": 0.1804, "step": 4805 }, { "epoch": 46.6, "grad_norm": 3.2283823490142822, "learning_rate": 2.6669902912621357e-05, "loss": 0.1966, "step": 4806 }, { "epoch": 46.609638554216865, "grad_norm": 3.7503602504730225, "learning_rate": 2.6665048543689318e-05, "loss": 0.1105, "step": 4807 }, { "epoch": 46.619277108433735, "grad_norm": 8.685826301574707, "learning_rate": 2.666019417475728e-05, "loss": 0.1373, "step": 4808 }, { "epoch": 46.628915662650606, "grad_norm": 9.319050788879395, "learning_rate": 2.665533980582524e-05, "loss": 0.314, "step": 4809 }, { "epoch": 46.63855421686747, "grad_norm": 4.955615043640137, "learning_rate": 2.6650485436893208e-05, "loss": 0.2822, "step": 4810 }, { "epoch": 46.64819277108434, "grad_norm": 3.6266496181488037, "learning_rate": 2.664563106796117e-05, "loss": 0.1167, "step": 4811 }, { "epoch": 46.6578313253012, "grad_norm": 4.882874488830566, "learning_rate": 2.664077669902913e-05, "loss": 0.2619, "step": 4812 }, { "epoch": 46.66746987951807, "grad_norm": 5.125772476196289, "learning_rate": 2.6635922330097092e-05, "loss": 0.3252, "step": 4813 }, { "epoch": 46.67710843373494, "grad_norm": 11.498223304748535, "learning_rate": 2.6631067961165053e-05, "loss": 0.1808, "step": 4814 }, { "epoch": 46.68674698795181, "grad_norm": 1.3729547262191772, "learning_rate": 2.6626213592233014e-05, "loss": 0.0893, "step": 4815 }, { "epoch": 46.69638554216868, "grad_norm": 6.244307041168213, "learning_rate": 2.6621359223300972e-05, "loss": 0.2761, "step": 4816 }, { "epoch": 46.70602409638554, "grad_norm": 3.5236618518829346, "learning_rate": 2.6616504854368933e-05, "loss": 0.1685, "step": 4817 }, { "epoch": 46.71566265060241, "grad_norm": 6.580108165740967, "learning_rate": 2.6611650485436894e-05, "loss": 0.2849, "step": 4818 }, { "epoch": 46.725301204819274, "grad_norm": 5.593547344207764, "learning_rate": 2.6606796116504855e-05, "loss": 0.2792, "step": 4819 }, { "epoch": 46.734939759036145, "grad_norm": 4.893856048583984, "learning_rate": 2.6601941747572816e-05, "loss": 0.1256, "step": 4820 }, { "epoch": 46.744578313253015, "grad_norm": 4.5812458992004395, "learning_rate": 2.6597087378640777e-05, "loss": 0.4973, "step": 4821 }, { "epoch": 46.75421686746988, "grad_norm": 7.9508843421936035, "learning_rate": 2.659223300970874e-05, "loss": 0.2909, "step": 4822 }, { "epoch": 46.76385542168675, "grad_norm": 3.334413766860962, "learning_rate": 2.65873786407767e-05, "loss": 0.1732, "step": 4823 }, { "epoch": 46.77349397590361, "grad_norm": 6.188394069671631, "learning_rate": 2.658252427184466e-05, "loss": 0.3365, "step": 4824 }, { "epoch": 46.78313253012048, "grad_norm": 6.074957370758057, "learning_rate": 2.6577669902912622e-05, "loss": 0.2882, "step": 4825 }, { "epoch": 46.792771084337346, "grad_norm": 3.7289931774139404, "learning_rate": 2.6572815533980583e-05, "loss": 0.166, "step": 4826 }, { "epoch": 46.80240963855422, "grad_norm": 5.285876750946045, "learning_rate": 2.6567961165048544e-05, "loss": 0.1574, "step": 4827 }, { "epoch": 46.81204819277109, "grad_norm": 19.397327423095703, "learning_rate": 2.6563106796116505e-05, "loss": 0.2386, "step": 4828 }, { "epoch": 46.82168674698795, "grad_norm": 7.485217571258545, "learning_rate": 2.6558252427184467e-05, "loss": 0.3938, "step": 4829 }, { "epoch": 46.83132530120482, "grad_norm": 8.315876007080078, "learning_rate": 2.6553398058252428e-05, "loss": 0.2864, "step": 4830 }, { "epoch": 46.840963855421684, "grad_norm": 10.341084480285645, "learning_rate": 2.654854368932039e-05, "loss": 0.2964, "step": 4831 }, { "epoch": 46.850602409638554, "grad_norm": 3.0523366928100586, "learning_rate": 2.654368932038835e-05, "loss": 0.1572, "step": 4832 }, { "epoch": 46.860240963855425, "grad_norm": 2.7913572788238525, "learning_rate": 2.6538834951456308e-05, "loss": 0.163, "step": 4833 }, { "epoch": 46.86987951807229, "grad_norm": 14.78608512878418, "learning_rate": 2.653398058252427e-05, "loss": 0.2718, "step": 4834 }, { "epoch": 46.87951807228916, "grad_norm": 5.07145881652832, "learning_rate": 2.6529126213592237e-05, "loss": 0.2413, "step": 4835 }, { "epoch": 46.88915662650602, "grad_norm": 3.0728330612182617, "learning_rate": 2.6524271844660198e-05, "loss": 0.2517, "step": 4836 }, { "epoch": 46.89879518072289, "grad_norm": 3.80682373046875, "learning_rate": 2.651941747572816e-05, "loss": 0.1843, "step": 4837 }, { "epoch": 46.908433734939756, "grad_norm": 5.888142108917236, "learning_rate": 2.651456310679612e-05, "loss": 0.1876, "step": 4838 }, { "epoch": 46.918072289156626, "grad_norm": 10.88643741607666, "learning_rate": 2.650970873786408e-05, "loss": 0.4748, "step": 4839 }, { "epoch": 46.9277108433735, "grad_norm": 3.899517297744751, "learning_rate": 2.6504854368932043e-05, "loss": 0.4145, "step": 4840 }, { "epoch": 46.93734939759036, "grad_norm": 3.3770363330841064, "learning_rate": 2.6500000000000004e-05, "loss": 0.1569, "step": 4841 }, { "epoch": 46.94698795180723, "grad_norm": 3.1924922466278076, "learning_rate": 2.6495145631067965e-05, "loss": 0.1419, "step": 4842 }, { "epoch": 46.956626506024094, "grad_norm": 12.528681755065918, "learning_rate": 2.6490291262135926e-05, "loss": 0.3355, "step": 4843 }, { "epoch": 46.966265060240964, "grad_norm": 3.733076810836792, "learning_rate": 2.6485436893203884e-05, "loss": 0.3877, "step": 4844 }, { "epoch": 46.975903614457835, "grad_norm": 4.151050567626953, "learning_rate": 2.6480582524271845e-05, "loss": 0.3061, "step": 4845 }, { "epoch": 46.9855421686747, "grad_norm": 4.9761762619018555, "learning_rate": 2.6475728155339806e-05, "loss": 0.2894, "step": 4846 }, { "epoch": 47.001204819277106, "grad_norm": 7.577517986297607, "learning_rate": 2.6470873786407767e-05, "loss": 0.2731, "step": 4847 }, { "epoch": 47.01084337349398, "grad_norm": 5.747587203979492, "learning_rate": 2.646601941747573e-05, "loss": 0.3112, "step": 4848 }, { "epoch": 47.02048192771084, "grad_norm": 2.1929104328155518, "learning_rate": 2.646116504854369e-05, "loss": 0.0631, "step": 4849 }, { "epoch": 47.03012048192771, "grad_norm": 4.662525653839111, "learning_rate": 2.645631067961165e-05, "loss": 0.2617, "step": 4850 }, { "epoch": 47.03975903614458, "grad_norm": 14.800092697143555, "learning_rate": 2.6451456310679612e-05, "loss": 0.3756, "step": 4851 }, { "epoch": 47.049397590361444, "grad_norm": 2.608928918838501, "learning_rate": 2.6446601941747573e-05, "loss": 0.1774, "step": 4852 }, { "epoch": 47.059036144578315, "grad_norm": 4.31479549407959, "learning_rate": 2.6441747572815534e-05, "loss": 0.2359, "step": 4853 }, { "epoch": 47.06867469879518, "grad_norm": 2.007174253463745, "learning_rate": 2.6436893203883495e-05, "loss": 0.1115, "step": 4854 }, { "epoch": 47.07831325301205, "grad_norm": 8.358386993408203, "learning_rate": 2.6432038834951456e-05, "loss": 0.3437, "step": 4855 }, { "epoch": 47.08795180722892, "grad_norm": 6.275721549987793, "learning_rate": 2.6427184466019418e-05, "loss": 0.332, "step": 4856 }, { "epoch": 47.09759036144578, "grad_norm": 7.435280799865723, "learning_rate": 2.642233009708738e-05, "loss": 0.3017, "step": 4857 }, { "epoch": 47.10722891566265, "grad_norm": 5.5177741050720215, "learning_rate": 2.641747572815534e-05, "loss": 0.199, "step": 4858 }, { "epoch": 47.116867469879516, "grad_norm": 3.244845151901245, "learning_rate": 2.64126213592233e-05, "loss": 0.3717, "step": 4859 }, { "epoch": 47.126506024096386, "grad_norm": 4.025696277618408, "learning_rate": 2.6407766990291266e-05, "loss": 0.2563, "step": 4860 }, { "epoch": 47.13614457831325, "grad_norm": 44.139930725097656, "learning_rate": 2.6402912621359227e-05, "loss": 0.3787, "step": 4861 }, { "epoch": 47.14578313253012, "grad_norm": 5.024181842803955, "learning_rate": 2.6398058252427188e-05, "loss": 0.185, "step": 4862 }, { "epoch": 47.15542168674699, "grad_norm": 3.5893239974975586, "learning_rate": 2.639320388349515e-05, "loss": 0.16, "step": 4863 }, { "epoch": 47.165060240963854, "grad_norm": 7.65956974029541, "learning_rate": 2.638834951456311e-05, "loss": 0.1905, "step": 4864 }, { "epoch": 47.174698795180724, "grad_norm": 4.281960487365723, "learning_rate": 2.638349514563107e-05, "loss": 0.2481, "step": 4865 }, { "epoch": 47.18433734939759, "grad_norm": 4.451889991760254, "learning_rate": 2.6378640776699032e-05, "loss": 0.2623, "step": 4866 }, { "epoch": 47.19397590361446, "grad_norm": 9.13801383972168, "learning_rate": 2.6373786407766994e-05, "loss": 0.1726, "step": 4867 }, { "epoch": 47.20361445783133, "grad_norm": 6.62760591506958, "learning_rate": 2.6368932038834955e-05, "loss": 0.2516, "step": 4868 }, { "epoch": 47.21325301204819, "grad_norm": 5.2343902587890625, "learning_rate": 2.6364077669902916e-05, "loss": 0.2945, "step": 4869 }, { "epoch": 47.22289156626506, "grad_norm": 6.490097999572754, "learning_rate": 2.6359223300970877e-05, "loss": 0.2824, "step": 4870 }, { "epoch": 47.232530120481925, "grad_norm": 4.56581449508667, "learning_rate": 2.6354368932038838e-05, "loss": 0.1419, "step": 4871 }, { "epoch": 47.242168674698796, "grad_norm": 6.481657028198242, "learning_rate": 2.6349514563106796e-05, "loss": 0.346, "step": 4872 }, { "epoch": 47.25180722891566, "grad_norm": 8.276991844177246, "learning_rate": 2.6344660194174757e-05, "loss": 0.341, "step": 4873 }, { "epoch": 47.26144578313253, "grad_norm": 7.296951770782471, "learning_rate": 2.6339805825242718e-05, "loss": 0.3309, "step": 4874 }, { "epoch": 47.2710843373494, "grad_norm": 4.948811054229736, "learning_rate": 2.633495145631068e-05, "loss": 0.1498, "step": 4875 }, { "epoch": 47.28072289156626, "grad_norm": 2.748105525970459, "learning_rate": 2.633009708737864e-05, "loss": 0.0745, "step": 4876 }, { "epoch": 47.290361445783134, "grad_norm": 7.236608505249023, "learning_rate": 2.63252427184466e-05, "loss": 0.2627, "step": 4877 }, { "epoch": 47.3, "grad_norm": 2.638772964477539, "learning_rate": 2.6320388349514563e-05, "loss": 0.1838, "step": 4878 }, { "epoch": 47.30963855421687, "grad_norm": 3.5239362716674805, "learning_rate": 2.6315533980582524e-05, "loss": 0.2027, "step": 4879 }, { "epoch": 47.31927710843374, "grad_norm": 5.280236721038818, "learning_rate": 2.6310679611650485e-05, "loss": 0.1568, "step": 4880 }, { "epoch": 47.3289156626506, "grad_norm": 3.9568750858306885, "learning_rate": 2.6305825242718446e-05, "loss": 0.2771, "step": 4881 }, { "epoch": 47.33855421686747, "grad_norm": 3.647172212600708, "learning_rate": 2.6300970873786407e-05, "loss": 0.174, "step": 4882 }, { "epoch": 47.348192771084335, "grad_norm": 10.619354248046875, "learning_rate": 2.629611650485437e-05, "loss": 0.3132, "step": 4883 }, { "epoch": 47.357831325301206, "grad_norm": 3.7160303592681885, "learning_rate": 2.629126213592233e-05, "loss": 0.2705, "step": 4884 }, { "epoch": 47.36746987951807, "grad_norm": 4.254880428314209, "learning_rate": 2.6286407766990294e-05, "loss": 0.2359, "step": 4885 }, { "epoch": 47.37710843373494, "grad_norm": 1.9788355827331543, "learning_rate": 2.6281553398058255e-05, "loss": 0.1123, "step": 4886 }, { "epoch": 47.38674698795181, "grad_norm": 5.966589450836182, "learning_rate": 2.6276699029126216e-05, "loss": 0.4374, "step": 4887 }, { "epoch": 47.39638554216867, "grad_norm": 7.869369983673096, "learning_rate": 2.6271844660194178e-05, "loss": 0.3477, "step": 4888 }, { "epoch": 47.40602409638554, "grad_norm": 6.554029941558838, "learning_rate": 2.626699029126214e-05, "loss": 0.1804, "step": 4889 }, { "epoch": 47.41566265060241, "grad_norm": 5.405633449554443, "learning_rate": 2.62621359223301e-05, "loss": 0.2442, "step": 4890 }, { "epoch": 47.42530120481928, "grad_norm": 5.353194236755371, "learning_rate": 2.625728155339806e-05, "loss": 0.4411, "step": 4891 }, { "epoch": 47.43493975903615, "grad_norm": 4.087175369262695, "learning_rate": 2.6252427184466022e-05, "loss": 0.1951, "step": 4892 }, { "epoch": 47.44457831325301, "grad_norm": 5.405718803405762, "learning_rate": 2.6247572815533983e-05, "loss": 0.2901, "step": 4893 }, { "epoch": 47.45421686746988, "grad_norm": 2.253171920776367, "learning_rate": 2.6242718446601945e-05, "loss": 0.1588, "step": 4894 }, { "epoch": 47.463855421686745, "grad_norm": 4.925593852996826, "learning_rate": 2.6237864077669906e-05, "loss": 0.2999, "step": 4895 }, { "epoch": 47.473493975903615, "grad_norm": 5.4874420166015625, "learning_rate": 2.6233009708737867e-05, "loss": 0.2941, "step": 4896 }, { "epoch": 47.48313253012048, "grad_norm": 4.45454216003418, "learning_rate": 2.6228155339805828e-05, "loss": 0.3908, "step": 4897 }, { "epoch": 47.49277108433735, "grad_norm": 6.849409103393555, "learning_rate": 2.622330097087379e-05, "loss": 0.2153, "step": 4898 }, { "epoch": 47.50240963855422, "grad_norm": 10.153120040893555, "learning_rate": 2.6218446601941747e-05, "loss": 0.1197, "step": 4899 }, { "epoch": 47.51204819277108, "grad_norm": 2.141986608505249, "learning_rate": 2.6213592233009708e-05, "loss": 0.208, "step": 4900 }, { "epoch": 47.52168674698795, "grad_norm": 7.09451150894165, "learning_rate": 2.620873786407767e-05, "loss": 0.329, "step": 4901 }, { "epoch": 47.53132530120482, "grad_norm": 3.1807029247283936, "learning_rate": 2.620388349514563e-05, "loss": 0.1076, "step": 4902 }, { "epoch": 47.54096385542169, "grad_norm": 2.753356695175171, "learning_rate": 2.619902912621359e-05, "loss": 0.1118, "step": 4903 }, { "epoch": 47.55060240963856, "grad_norm": 2.2074055671691895, "learning_rate": 2.6194174757281553e-05, "loss": 0.0951, "step": 4904 }, { "epoch": 47.56024096385542, "grad_norm": 2.731265068054199, "learning_rate": 2.6189320388349514e-05, "loss": 0.0977, "step": 4905 }, { "epoch": 47.56987951807229, "grad_norm": 3.556445837020874, "learning_rate": 2.6184466019417475e-05, "loss": 0.2052, "step": 4906 }, { "epoch": 47.579518072289154, "grad_norm": 7.919158458709717, "learning_rate": 2.6179611650485436e-05, "loss": 0.2401, "step": 4907 }, { "epoch": 47.589156626506025, "grad_norm": 2.413902521133423, "learning_rate": 2.6174757281553397e-05, "loss": 0.1117, "step": 4908 }, { "epoch": 47.59879518072289, "grad_norm": 6.606542587280273, "learning_rate": 2.6169902912621358e-05, "loss": 0.2328, "step": 4909 }, { "epoch": 47.60843373493976, "grad_norm": 2.991821527481079, "learning_rate": 2.616504854368932e-05, "loss": 0.2198, "step": 4910 }, { "epoch": 47.61807228915663, "grad_norm": 5.644036293029785, "learning_rate": 2.6160194174757284e-05, "loss": 0.2543, "step": 4911 }, { "epoch": 47.62771084337349, "grad_norm": 4.479405879974365, "learning_rate": 2.6155339805825245e-05, "loss": 0.2908, "step": 4912 }, { "epoch": 47.63734939759036, "grad_norm": 5.365135192871094, "learning_rate": 2.6150485436893206e-05, "loss": 0.1691, "step": 4913 }, { "epoch": 47.646987951807226, "grad_norm": 10.965442657470703, "learning_rate": 2.6145631067961167e-05, "loss": 0.2408, "step": 4914 }, { "epoch": 47.6566265060241, "grad_norm": 3.6549735069274902, "learning_rate": 2.614077669902913e-05, "loss": 0.2354, "step": 4915 }, { "epoch": 47.66626506024097, "grad_norm": 6.691993236541748, "learning_rate": 2.613592233009709e-05, "loss": 0.3439, "step": 4916 }, { "epoch": 47.67590361445783, "grad_norm": 3.7112081050872803, "learning_rate": 2.613106796116505e-05, "loss": 0.3227, "step": 4917 }, { "epoch": 47.6855421686747, "grad_norm": 28.950387954711914, "learning_rate": 2.6126213592233012e-05, "loss": 0.2575, "step": 4918 }, { "epoch": 47.695180722891564, "grad_norm": 3.308546543121338, "learning_rate": 2.6121359223300973e-05, "loss": 0.1225, "step": 4919 }, { "epoch": 47.704819277108435, "grad_norm": 1.91367769241333, "learning_rate": 2.6116504854368934e-05, "loss": 0.0916, "step": 4920 }, { "epoch": 47.7144578313253, "grad_norm": 4.355243682861328, "learning_rate": 2.6111650485436895e-05, "loss": 0.1803, "step": 4921 }, { "epoch": 47.72409638554217, "grad_norm": 2.6466383934020996, "learning_rate": 2.6106796116504857e-05, "loss": 0.1412, "step": 4922 }, { "epoch": 47.73373493975904, "grad_norm": 4.523141384124756, "learning_rate": 2.6101941747572818e-05, "loss": 0.301, "step": 4923 }, { "epoch": 47.7433734939759, "grad_norm": 4.402410507202148, "learning_rate": 2.609708737864078e-05, "loss": 0.321, "step": 4924 }, { "epoch": 47.75301204819277, "grad_norm": 4.110836505889893, "learning_rate": 2.609223300970874e-05, "loss": 0.2523, "step": 4925 }, { "epoch": 47.762650602409636, "grad_norm": 5.569491386413574, "learning_rate": 2.60873786407767e-05, "loss": 0.2526, "step": 4926 }, { "epoch": 47.772289156626506, "grad_norm": 2.196120500564575, "learning_rate": 2.608252427184466e-05, "loss": 0.093, "step": 4927 }, { "epoch": 47.78192771084338, "grad_norm": 7.874795913696289, "learning_rate": 2.607766990291262e-05, "loss": 0.4281, "step": 4928 }, { "epoch": 47.79156626506024, "grad_norm": 6.500578880310059, "learning_rate": 2.607281553398058e-05, "loss": 0.3517, "step": 4929 }, { "epoch": 47.80120481927711, "grad_norm": 5.6975908279418945, "learning_rate": 2.6067961165048542e-05, "loss": 0.2, "step": 4930 }, { "epoch": 47.810843373493974, "grad_norm": 6.681776523590088, "learning_rate": 2.6063106796116504e-05, "loss": 0.3261, "step": 4931 }, { "epoch": 47.820481927710844, "grad_norm": 5.363801002502441, "learning_rate": 2.6058252427184465e-05, "loss": 0.4297, "step": 4932 }, { "epoch": 47.83012048192771, "grad_norm": 22.090303421020508, "learning_rate": 2.6053398058252426e-05, "loss": 0.4435, "step": 4933 }, { "epoch": 47.83975903614458, "grad_norm": 7.093578338623047, "learning_rate": 2.6048543689320387e-05, "loss": 0.3781, "step": 4934 }, { "epoch": 47.84939759036145, "grad_norm": 6.713463306427002, "learning_rate": 2.6043689320388348e-05, "loss": 0.4015, "step": 4935 }, { "epoch": 47.85903614457831, "grad_norm": 12.492280960083008, "learning_rate": 2.6038834951456316e-05, "loss": 0.5268, "step": 4936 }, { "epoch": 47.86867469879518, "grad_norm": 10.022189140319824, "learning_rate": 2.6033980582524277e-05, "loss": 0.4375, "step": 4937 }, { "epoch": 47.878313253012045, "grad_norm": 7.570248126983643, "learning_rate": 2.6029126213592235e-05, "loss": 0.1246, "step": 4938 }, { "epoch": 47.887951807228916, "grad_norm": 10.351090431213379, "learning_rate": 2.6024271844660196e-05, "loss": 0.2274, "step": 4939 }, { "epoch": 47.897590361445786, "grad_norm": 10.332307815551758, "learning_rate": 2.6019417475728157e-05, "loss": 0.2069, "step": 4940 }, { "epoch": 47.90722891566265, "grad_norm": 2.187656879425049, "learning_rate": 2.601456310679612e-05, "loss": 0.1378, "step": 4941 }, { "epoch": 47.91686746987952, "grad_norm": 3.2787418365478516, "learning_rate": 2.600970873786408e-05, "loss": 0.2493, "step": 4942 }, { "epoch": 47.92650602409638, "grad_norm": 7.131016254425049, "learning_rate": 2.600485436893204e-05, "loss": 0.2156, "step": 4943 }, { "epoch": 47.936144578313254, "grad_norm": 7.746039867401123, "learning_rate": 2.6000000000000002e-05, "loss": 0.3569, "step": 4944 }, { "epoch": 47.94578313253012, "grad_norm": 3.9674923419952393, "learning_rate": 2.5995145631067963e-05, "loss": 0.119, "step": 4945 }, { "epoch": 47.95542168674699, "grad_norm": 4.504773139953613, "learning_rate": 2.5990291262135924e-05, "loss": 0.3148, "step": 4946 }, { "epoch": 47.96506024096386, "grad_norm": 6.144868850708008, "learning_rate": 2.5985436893203885e-05, "loss": 0.284, "step": 4947 }, { "epoch": 47.97469879518072, "grad_norm": 3.810072183609009, "learning_rate": 2.5980582524271846e-05, "loss": 0.1355, "step": 4948 }, { "epoch": 47.98433734939759, "grad_norm": 4.921435356140137, "learning_rate": 2.5975728155339808e-05, "loss": 0.2912, "step": 4949 }, { "epoch": 47.993975903614455, "grad_norm": 6.231573104858398, "learning_rate": 2.597087378640777e-05, "loss": 0.3065, "step": 4950 }, { "epoch": 48.00963855421687, "grad_norm": 6.683330059051514, "learning_rate": 2.596601941747573e-05, "loss": 0.4459, "step": 4951 }, { "epoch": 48.019277108433734, "grad_norm": 3.315568447113037, "learning_rate": 2.596116504854369e-05, "loss": 0.0844, "step": 4952 }, { "epoch": 48.028915662650604, "grad_norm": 2.396432638168335, "learning_rate": 2.5956310679611652e-05, "loss": 0.1431, "step": 4953 }, { "epoch": 48.03855421686747, "grad_norm": 4.663015842437744, "learning_rate": 2.5951456310679613e-05, "loss": 0.2349, "step": 4954 }, { "epoch": 48.04819277108434, "grad_norm": 10.476755142211914, "learning_rate": 2.594660194174757e-05, "loss": 0.402, "step": 4955 }, { "epoch": 48.0578313253012, "grad_norm": 4.783491611480713, "learning_rate": 2.5941747572815532e-05, "loss": 0.2923, "step": 4956 }, { "epoch": 48.06746987951807, "grad_norm": 1.5939316749572754, "learning_rate": 2.5936893203883493e-05, "loss": 0.0659, "step": 4957 }, { "epoch": 48.07710843373494, "grad_norm": 6.626514911651611, "learning_rate": 2.5932038834951454e-05, "loss": 0.2135, "step": 4958 }, { "epoch": 48.086746987951805, "grad_norm": 13.263118743896484, "learning_rate": 2.5927184466019416e-05, "loss": 0.3667, "step": 4959 }, { "epoch": 48.096385542168676, "grad_norm": 3.86128830909729, "learning_rate": 2.5922330097087377e-05, "loss": 0.1511, "step": 4960 }, { "epoch": 48.10602409638554, "grad_norm": 4.426331043243408, "learning_rate": 2.5917475728155345e-05, "loss": 0.1436, "step": 4961 }, { "epoch": 48.11566265060241, "grad_norm": 2.7850112915039062, "learning_rate": 2.5912621359223306e-05, "loss": 0.1271, "step": 4962 }, { "epoch": 48.12530120481928, "grad_norm": 7.662455081939697, "learning_rate": 2.5907766990291267e-05, "loss": 0.4348, "step": 4963 }, { "epoch": 48.13493975903614, "grad_norm": 12.643366813659668, "learning_rate": 2.5902912621359228e-05, "loss": 0.1513, "step": 4964 }, { "epoch": 48.144578313253014, "grad_norm": 6.3896803855896, "learning_rate": 2.5898058252427186e-05, "loss": 0.3002, "step": 4965 }, { "epoch": 48.15421686746988, "grad_norm": 8.56907844543457, "learning_rate": 2.5893203883495147e-05, "loss": 0.2302, "step": 4966 }, { "epoch": 48.16385542168675, "grad_norm": 2.838294744491577, "learning_rate": 2.5888349514563108e-05, "loss": 0.2486, "step": 4967 }, { "epoch": 48.17349397590361, "grad_norm": 5.7744550704956055, "learning_rate": 2.588349514563107e-05, "loss": 0.2131, "step": 4968 }, { "epoch": 48.18313253012048, "grad_norm": 4.494974136352539, "learning_rate": 2.587864077669903e-05, "loss": 0.1525, "step": 4969 }, { "epoch": 48.19277108433735, "grad_norm": 3.3525850772857666, "learning_rate": 2.587378640776699e-05, "loss": 0.3741, "step": 4970 }, { "epoch": 48.202409638554215, "grad_norm": 3.851180076599121, "learning_rate": 2.5868932038834953e-05, "loss": 0.2284, "step": 4971 }, { "epoch": 48.212048192771086, "grad_norm": 5.5264434814453125, "learning_rate": 2.5864077669902914e-05, "loss": 0.2688, "step": 4972 }, { "epoch": 48.22168674698795, "grad_norm": 4.889129638671875, "learning_rate": 2.5859223300970875e-05, "loss": 0.1833, "step": 4973 }, { "epoch": 48.23132530120482, "grad_norm": 4.14417028427124, "learning_rate": 2.5854368932038836e-05, "loss": 0.308, "step": 4974 }, { "epoch": 48.24096385542169, "grad_norm": 2.497101068496704, "learning_rate": 2.5849514563106797e-05, "loss": 0.2073, "step": 4975 }, { "epoch": 48.25060240963855, "grad_norm": 4.481491565704346, "learning_rate": 2.584466019417476e-05, "loss": 0.2888, "step": 4976 }, { "epoch": 48.26024096385542, "grad_norm": 6.251045227050781, "learning_rate": 2.583980582524272e-05, "loss": 0.3591, "step": 4977 }, { "epoch": 48.26987951807229, "grad_norm": 5.802585601806641, "learning_rate": 2.583495145631068e-05, "loss": 0.2907, "step": 4978 }, { "epoch": 48.27951807228916, "grad_norm": 5.393713474273682, "learning_rate": 2.5830097087378642e-05, "loss": 0.2284, "step": 4979 }, { "epoch": 48.28915662650602, "grad_norm": 3.821373462677002, "learning_rate": 2.5825242718446603e-05, "loss": 0.2164, "step": 4980 }, { "epoch": 48.29879518072289, "grad_norm": 4.778879642486572, "learning_rate": 2.5820388349514564e-05, "loss": 0.1232, "step": 4981 }, { "epoch": 48.30843373493976, "grad_norm": 5.191327095031738, "learning_rate": 2.5815533980582522e-05, "loss": 0.3022, "step": 4982 }, { "epoch": 48.318072289156625, "grad_norm": 4.830749988555908, "learning_rate": 2.5810679611650483e-05, "loss": 0.2663, "step": 4983 }, { "epoch": 48.327710843373495, "grad_norm": 7.343194961547852, "learning_rate": 2.5805825242718444e-05, "loss": 0.3357, "step": 4984 }, { "epoch": 48.33734939759036, "grad_norm": 5.429691791534424, "learning_rate": 2.5800970873786405e-05, "loss": 0.2269, "step": 4985 }, { "epoch": 48.34698795180723, "grad_norm": 4.156835556030273, "learning_rate": 2.5796116504854373e-05, "loss": 0.1831, "step": 4986 }, { "epoch": 48.3566265060241, "grad_norm": 10.156183242797852, "learning_rate": 2.5791262135922334e-05, "loss": 0.5128, "step": 4987 }, { "epoch": 48.36626506024096, "grad_norm": 4.802363872528076, "learning_rate": 2.5786407766990296e-05, "loss": 0.1585, "step": 4988 }, { "epoch": 48.37590361445783, "grad_norm": 8.216224670410156, "learning_rate": 2.5781553398058257e-05, "loss": 0.4137, "step": 4989 }, { "epoch": 48.3855421686747, "grad_norm": 4.3489861488342285, "learning_rate": 2.5776699029126218e-05, "loss": 0.2291, "step": 4990 }, { "epoch": 48.39518072289157, "grad_norm": 5.482563018798828, "learning_rate": 2.577184466019418e-05, "loss": 0.4089, "step": 4991 }, { "epoch": 48.40481927710843, "grad_norm": 3.9440793991088867, "learning_rate": 2.576699029126214e-05, "loss": 0.2396, "step": 4992 }, { "epoch": 48.4144578313253, "grad_norm": 3.4360623359680176, "learning_rate": 2.5762135922330098e-05, "loss": 0.2393, "step": 4993 }, { "epoch": 48.42409638554217, "grad_norm": 13.519271850585938, "learning_rate": 2.575728155339806e-05, "loss": 0.3459, "step": 4994 }, { "epoch": 48.433734939759034, "grad_norm": 3.3684518337249756, "learning_rate": 2.575242718446602e-05, "loss": 0.2139, "step": 4995 }, { "epoch": 48.443373493975905, "grad_norm": 2.749746561050415, "learning_rate": 2.574757281553398e-05, "loss": 0.2805, "step": 4996 }, { "epoch": 48.45301204819277, "grad_norm": 2.602335214614868, "learning_rate": 2.5742718446601943e-05, "loss": 0.2895, "step": 4997 }, { "epoch": 48.46265060240964, "grad_norm": 4.236668586730957, "learning_rate": 2.5737864077669904e-05, "loss": 0.3539, "step": 4998 }, { "epoch": 48.47228915662651, "grad_norm": 5.466032028198242, "learning_rate": 2.5733009708737865e-05, "loss": 0.3055, "step": 4999 }, { "epoch": 48.48192771084337, "grad_norm": 8.483505249023438, "learning_rate": 2.5728155339805826e-05, "loss": 0.5593, "step": 5000 }, { "epoch": 48.49156626506024, "grad_norm": 7.634701728820801, "learning_rate": 2.5723300970873787e-05, "loss": 0.2293, "step": 5001 }, { "epoch": 48.501204819277106, "grad_norm": 4.634397029876709, "learning_rate": 2.5718446601941748e-05, "loss": 0.2661, "step": 5002 }, { "epoch": 48.51084337349398, "grad_norm": 4.678524971008301, "learning_rate": 2.571359223300971e-05, "loss": 0.1529, "step": 5003 }, { "epoch": 48.52048192771084, "grad_norm": 7.290117263793945, "learning_rate": 2.570873786407767e-05, "loss": 0.2238, "step": 5004 }, { "epoch": 48.53012048192771, "grad_norm": 5.488028526306152, "learning_rate": 2.570388349514563e-05, "loss": 0.3406, "step": 5005 }, { "epoch": 48.53975903614458, "grad_norm": 14.211350440979004, "learning_rate": 2.5699029126213593e-05, "loss": 0.1321, "step": 5006 }, { "epoch": 48.549397590361444, "grad_norm": 6.164660930633545, "learning_rate": 2.5694174757281554e-05, "loss": 0.3079, "step": 5007 }, { "epoch": 48.559036144578315, "grad_norm": 6.722437858581543, "learning_rate": 2.5689320388349515e-05, "loss": 0.301, "step": 5008 }, { "epoch": 48.56867469879518, "grad_norm": 8.06154727935791, "learning_rate": 2.5684466019417476e-05, "loss": 0.3944, "step": 5009 }, { "epoch": 48.57831325301205, "grad_norm": 3.703782081604004, "learning_rate": 2.5679611650485434e-05, "loss": 0.2903, "step": 5010 }, { "epoch": 48.58795180722892, "grad_norm": 3.651097536087036, "learning_rate": 2.5674757281553395e-05, "loss": 0.1189, "step": 5011 }, { "epoch": 48.59759036144578, "grad_norm": 3.79666805267334, "learning_rate": 2.5669902912621363e-05, "loss": 0.2115, "step": 5012 }, { "epoch": 48.60722891566265, "grad_norm": 4.620194911956787, "learning_rate": 2.5665048543689324e-05, "loss": 0.2502, "step": 5013 }, { "epoch": 48.616867469879516, "grad_norm": 4.547916889190674, "learning_rate": 2.5660194174757285e-05, "loss": 0.2545, "step": 5014 }, { "epoch": 48.626506024096386, "grad_norm": 3.6155028343200684, "learning_rate": 2.5655339805825247e-05, "loss": 0.2473, "step": 5015 }, { "epoch": 48.63614457831325, "grad_norm": 5.682065963745117, "learning_rate": 2.5650485436893208e-05, "loss": 0.3861, "step": 5016 }, { "epoch": 48.64578313253012, "grad_norm": 3.4495654106140137, "learning_rate": 2.564563106796117e-05, "loss": 0.1196, "step": 5017 }, { "epoch": 48.65542168674699, "grad_norm": 4.828500270843506, "learning_rate": 2.564077669902913e-05, "loss": 0.4357, "step": 5018 }, { "epoch": 48.665060240963854, "grad_norm": 3.642563819885254, "learning_rate": 2.563592233009709e-05, "loss": 0.1864, "step": 5019 }, { "epoch": 48.674698795180724, "grad_norm": 5.963900566101074, "learning_rate": 2.5631067961165052e-05, "loss": 0.2776, "step": 5020 }, { "epoch": 48.68433734939759, "grad_norm": 4.266355037689209, "learning_rate": 2.562621359223301e-05, "loss": 0.1854, "step": 5021 }, { "epoch": 48.69397590361446, "grad_norm": 7.204751014709473, "learning_rate": 2.562135922330097e-05, "loss": 0.2513, "step": 5022 }, { "epoch": 48.70361445783133, "grad_norm": 8.807544708251953, "learning_rate": 2.5616504854368932e-05, "loss": 0.2518, "step": 5023 }, { "epoch": 48.71325301204819, "grad_norm": 3.7291738986968994, "learning_rate": 2.5611650485436893e-05, "loss": 0.123, "step": 5024 }, { "epoch": 48.72289156626506, "grad_norm": 19.51996421813965, "learning_rate": 2.5606796116504855e-05, "loss": 0.2165, "step": 5025 }, { "epoch": 48.732530120481925, "grad_norm": 3.375821828842163, "learning_rate": 2.5601941747572816e-05, "loss": 0.2628, "step": 5026 }, { "epoch": 48.742168674698796, "grad_norm": 6.02133846282959, "learning_rate": 2.5597087378640777e-05, "loss": 0.456, "step": 5027 }, { "epoch": 48.75180722891566, "grad_norm": 11.465376853942871, "learning_rate": 2.5592233009708738e-05, "loss": 0.2505, "step": 5028 }, { "epoch": 48.76144578313253, "grad_norm": 10.678309440612793, "learning_rate": 2.55873786407767e-05, "loss": 0.3748, "step": 5029 }, { "epoch": 48.7710843373494, "grad_norm": 4.257253646850586, "learning_rate": 2.558252427184466e-05, "loss": 0.3742, "step": 5030 }, { "epoch": 48.78072289156626, "grad_norm": 5.312784194946289, "learning_rate": 2.557766990291262e-05, "loss": 0.3479, "step": 5031 }, { "epoch": 48.790361445783134, "grad_norm": 5.265642166137695, "learning_rate": 2.5572815533980583e-05, "loss": 0.2371, "step": 5032 }, { "epoch": 48.8, "grad_norm": 3.2591264247894287, "learning_rate": 2.5567961165048544e-05, "loss": 0.1652, "step": 5033 }, { "epoch": 48.80963855421687, "grad_norm": 2.710796594619751, "learning_rate": 2.5563106796116505e-05, "loss": 0.1953, "step": 5034 }, { "epoch": 48.81927710843374, "grad_norm": 7.57290506362915, "learning_rate": 2.5558252427184466e-05, "loss": 0.3891, "step": 5035 }, { "epoch": 48.8289156626506, "grad_norm": 3.6720669269561768, "learning_rate": 2.5553398058252427e-05, "loss": 0.1668, "step": 5036 }, { "epoch": 48.83855421686747, "grad_norm": 4.227163791656494, "learning_rate": 2.5548543689320392e-05, "loss": 0.1806, "step": 5037 }, { "epoch": 48.848192771084335, "grad_norm": 3.2824196815490723, "learning_rate": 2.5543689320388353e-05, "loss": 0.2086, "step": 5038 }, { "epoch": 48.857831325301206, "grad_norm": 2.6645126342773438, "learning_rate": 2.5538834951456314e-05, "loss": 0.1636, "step": 5039 }, { "epoch": 48.86746987951807, "grad_norm": 3.2437584400177, "learning_rate": 2.5533980582524275e-05, "loss": 0.164, "step": 5040 }, { "epoch": 48.87710843373494, "grad_norm": 5.740001678466797, "learning_rate": 2.5529126213592236e-05, "loss": 0.2432, "step": 5041 }, { "epoch": 48.88674698795181, "grad_norm": 5.071084499359131, "learning_rate": 2.5524271844660197e-05, "loss": 0.4172, "step": 5042 }, { "epoch": 48.89638554216867, "grad_norm": 6.954521179199219, "learning_rate": 2.551941747572816e-05, "loss": 0.3914, "step": 5043 }, { "epoch": 48.90602409638554, "grad_norm": 2.1079816818237305, "learning_rate": 2.551456310679612e-05, "loss": 0.1189, "step": 5044 }, { "epoch": 48.91566265060241, "grad_norm": 7.224477767944336, "learning_rate": 2.550970873786408e-05, "loss": 0.2865, "step": 5045 }, { "epoch": 48.92530120481928, "grad_norm": 3.976748466491699, "learning_rate": 2.5504854368932042e-05, "loss": 0.251, "step": 5046 }, { "epoch": 48.93493975903615, "grad_norm": 5.288058757781982, "learning_rate": 2.5500000000000003e-05, "loss": 0.1564, "step": 5047 }, { "epoch": 48.94457831325301, "grad_norm": 4.072408676147461, "learning_rate": 2.549514563106796e-05, "loss": 0.1094, "step": 5048 }, { "epoch": 48.95421686746988, "grad_norm": 7.617861270904541, "learning_rate": 2.5490291262135922e-05, "loss": 0.3132, "step": 5049 }, { "epoch": 48.963855421686745, "grad_norm": 3.076545238494873, "learning_rate": 2.5485436893203883e-05, "loss": 0.1032, "step": 5050 }, { "epoch": 48.973493975903615, "grad_norm": 7.223437309265137, "learning_rate": 2.5480582524271844e-05, "loss": 0.1808, "step": 5051 }, { "epoch": 48.98313253012048, "grad_norm": 4.267085075378418, "learning_rate": 2.5475728155339806e-05, "loss": 0.3178, "step": 5052 }, { "epoch": 48.99277108433735, "grad_norm": 3.596379041671753, "learning_rate": 2.5470873786407767e-05, "loss": 0.1052, "step": 5053 }, { "epoch": 49.00843373493976, "grad_norm": 5.954527854919434, "learning_rate": 2.5466019417475728e-05, "loss": 0.2174, "step": 5054 }, { "epoch": 49.01807228915663, "grad_norm": 1.533413290977478, "learning_rate": 2.546116504854369e-05, "loss": 0.063, "step": 5055 }, { "epoch": 49.02771084337349, "grad_norm": 14.540621757507324, "learning_rate": 2.545631067961165e-05, "loss": 0.1222, "step": 5056 }, { "epoch": 49.03734939759036, "grad_norm": 2.5252444744110107, "learning_rate": 2.545145631067961e-05, "loss": 0.211, "step": 5057 }, { "epoch": 49.04698795180723, "grad_norm": 11.151843070983887, "learning_rate": 2.5446601941747572e-05, "loss": 0.3506, "step": 5058 }, { "epoch": 49.056626506024095, "grad_norm": 38.075984954833984, "learning_rate": 2.5441747572815534e-05, "loss": 0.4199, "step": 5059 }, { "epoch": 49.066265060240966, "grad_norm": 4.667537689208984, "learning_rate": 2.5436893203883495e-05, "loss": 0.2148, "step": 5060 }, { "epoch": 49.07590361445783, "grad_norm": 30.412960052490234, "learning_rate": 2.5432038834951456e-05, "loss": 0.3341, "step": 5061 }, { "epoch": 49.0855421686747, "grad_norm": 4.076159477233887, "learning_rate": 2.542718446601942e-05, "loss": 0.2883, "step": 5062 }, { "epoch": 49.09518072289157, "grad_norm": 15.525866508483887, "learning_rate": 2.542233009708738e-05, "loss": 0.415, "step": 5063 }, { "epoch": 49.10481927710843, "grad_norm": 14.255691528320312, "learning_rate": 2.5417475728155343e-05, "loss": 0.1773, "step": 5064 }, { "epoch": 49.1144578313253, "grad_norm": 10.785531997680664, "learning_rate": 2.5412621359223304e-05, "loss": 0.1376, "step": 5065 }, { "epoch": 49.12409638554217, "grad_norm": 1.6856210231781006, "learning_rate": 2.5407766990291265e-05, "loss": 0.26, "step": 5066 }, { "epoch": 49.13373493975904, "grad_norm": 4.6758198738098145, "learning_rate": 2.5402912621359226e-05, "loss": 0.1956, "step": 5067 }, { "epoch": 49.1433734939759, "grad_norm": 2.676856279373169, "learning_rate": 2.5398058252427187e-05, "loss": 0.2754, "step": 5068 }, { "epoch": 49.15301204819277, "grad_norm": 12.797760009765625, "learning_rate": 2.539320388349515e-05, "loss": 0.3627, "step": 5069 }, { "epoch": 49.16265060240964, "grad_norm": 1.6839489936828613, "learning_rate": 2.538834951456311e-05, "loss": 0.1181, "step": 5070 }, { "epoch": 49.172289156626505, "grad_norm": 24.829927444458008, "learning_rate": 2.538349514563107e-05, "loss": 0.3317, "step": 5071 }, { "epoch": 49.181927710843375, "grad_norm": 16.623350143432617, "learning_rate": 2.5378640776699032e-05, "loss": 0.1894, "step": 5072 }, { "epoch": 49.19156626506024, "grad_norm": 6.500393390655518, "learning_rate": 2.5373786407766993e-05, "loss": 0.1975, "step": 5073 }, { "epoch": 49.20120481927711, "grad_norm": 12.542038917541504, "learning_rate": 2.5368932038834954e-05, "loss": 0.2811, "step": 5074 }, { "epoch": 49.21084337349398, "grad_norm": 48.31657791137695, "learning_rate": 2.5364077669902915e-05, "loss": 0.2055, "step": 5075 }, { "epoch": 49.22048192771084, "grad_norm": 3.3312666416168213, "learning_rate": 2.5359223300970873e-05, "loss": 0.2011, "step": 5076 }, { "epoch": 49.23012048192771, "grad_norm": 2.5301594734191895, "learning_rate": 2.5354368932038834e-05, "loss": 0.36, "step": 5077 }, { "epoch": 49.23975903614458, "grad_norm": 7.081692218780518, "learning_rate": 2.5349514563106795e-05, "loss": 0.2634, "step": 5078 }, { "epoch": 49.24939759036145, "grad_norm": 14.144261360168457, "learning_rate": 2.5344660194174756e-05, "loss": 0.3875, "step": 5079 }, { "epoch": 49.25903614457831, "grad_norm": 6.8115153312683105, "learning_rate": 2.5339805825242718e-05, "loss": 0.259, "step": 5080 }, { "epoch": 49.26867469879518, "grad_norm": 2.303309679031372, "learning_rate": 2.533495145631068e-05, "loss": 0.3405, "step": 5081 }, { "epoch": 49.27831325301205, "grad_norm": 1.6043260097503662, "learning_rate": 2.533009708737864e-05, "loss": 0.0723, "step": 5082 }, { "epoch": 49.287951807228914, "grad_norm": 4.280880928039551, "learning_rate": 2.53252427184466e-05, "loss": 0.222, "step": 5083 }, { "epoch": 49.297590361445785, "grad_norm": 5.820611476898193, "learning_rate": 2.5320388349514562e-05, "loss": 0.2993, "step": 5084 }, { "epoch": 49.30722891566265, "grad_norm": 20.09237289428711, "learning_rate": 2.5315533980582523e-05, "loss": 0.3241, "step": 5085 }, { "epoch": 49.31686746987952, "grad_norm": 18.74172019958496, "learning_rate": 2.5310679611650484e-05, "loss": 0.3114, "step": 5086 }, { "epoch": 49.32650602409639, "grad_norm": 17.72019386291504, "learning_rate": 2.530582524271845e-05, "loss": 0.2366, "step": 5087 }, { "epoch": 49.33614457831325, "grad_norm": 4.893204212188721, "learning_rate": 2.530097087378641e-05, "loss": 0.2464, "step": 5088 }, { "epoch": 49.34578313253012, "grad_norm": 22.099533081054688, "learning_rate": 2.529611650485437e-05, "loss": 0.2937, "step": 5089 }, { "epoch": 49.355421686746986, "grad_norm": 9.17555046081543, "learning_rate": 2.5291262135922332e-05, "loss": 0.2909, "step": 5090 }, { "epoch": 49.36506024096386, "grad_norm": 7.307192325592041, "learning_rate": 2.5286407766990294e-05, "loss": 0.3099, "step": 5091 }, { "epoch": 49.37469879518072, "grad_norm": 36.09441375732422, "learning_rate": 2.5281553398058255e-05, "loss": 0.2009, "step": 5092 }, { "epoch": 49.38433734939759, "grad_norm": 3.943154811859131, "learning_rate": 2.5276699029126216e-05, "loss": 0.3307, "step": 5093 }, { "epoch": 49.39397590361446, "grad_norm": 3.3214242458343506, "learning_rate": 2.5271844660194177e-05, "loss": 0.2262, "step": 5094 }, { "epoch": 49.403614457831324, "grad_norm": 2.7647247314453125, "learning_rate": 2.5266990291262138e-05, "loss": 0.2955, "step": 5095 }, { "epoch": 49.413253012048195, "grad_norm": 24.110197067260742, "learning_rate": 2.52621359223301e-05, "loss": 0.3582, "step": 5096 }, { "epoch": 49.42289156626506, "grad_norm": 14.838876724243164, "learning_rate": 2.525728155339806e-05, "loss": 0.277, "step": 5097 }, { "epoch": 49.43253012048193, "grad_norm": 4.9838151931762695, "learning_rate": 2.525242718446602e-05, "loss": 0.3223, "step": 5098 }, { "epoch": 49.44216867469879, "grad_norm": 20.954748153686523, "learning_rate": 2.5247572815533983e-05, "loss": 0.3663, "step": 5099 }, { "epoch": 49.45180722891566, "grad_norm": 8.392020225524902, "learning_rate": 2.5242718446601944e-05, "loss": 0.1776, "step": 5100 }, { "epoch": 49.46144578313253, "grad_norm": 6.336808681488037, "learning_rate": 2.5237864077669905e-05, "loss": 0.1544, "step": 5101 }, { "epoch": 49.471084337349396, "grad_norm": 1.3604261875152588, "learning_rate": 2.5233009708737866e-05, "loss": 0.2133, "step": 5102 }, { "epoch": 49.480722891566266, "grad_norm": 2.36283540725708, "learning_rate": 2.5228155339805827e-05, "loss": 0.2898, "step": 5103 }, { "epoch": 49.49036144578313, "grad_norm": 3.314354658126831, "learning_rate": 2.5223300970873785e-05, "loss": 0.1955, "step": 5104 }, { "epoch": 49.5, "grad_norm": 3.1528825759887695, "learning_rate": 2.5218446601941746e-05, "loss": 0.3297, "step": 5105 }, { "epoch": 49.50963855421687, "grad_norm": 28.719449996948242, "learning_rate": 2.5213592233009707e-05, "loss": 0.3457, "step": 5106 }, { "epoch": 49.519277108433734, "grad_norm": 5.181882858276367, "learning_rate": 2.520873786407767e-05, "loss": 0.1098, "step": 5107 }, { "epoch": 49.528915662650604, "grad_norm": 8.38620376586914, "learning_rate": 2.520388349514563e-05, "loss": 0.3045, "step": 5108 }, { "epoch": 49.53855421686747, "grad_norm": 7.187387943267822, "learning_rate": 2.519902912621359e-05, "loss": 0.1425, "step": 5109 }, { "epoch": 49.54819277108434, "grad_norm": 13.664786338806152, "learning_rate": 2.5194174757281552e-05, "loss": 0.2312, "step": 5110 }, { "epoch": 49.55783132530121, "grad_norm": 16.216371536254883, "learning_rate": 2.5189320388349513e-05, "loss": 0.4344, "step": 5111 }, { "epoch": 49.56746987951807, "grad_norm": 20.87324333190918, "learning_rate": 2.5184466019417474e-05, "loss": 0.4978, "step": 5112 }, { "epoch": 49.57710843373494, "grad_norm": 14.864775657653809, "learning_rate": 2.5179611650485442e-05, "loss": 0.3341, "step": 5113 }, { "epoch": 49.586746987951805, "grad_norm": 17.674768447875977, "learning_rate": 2.51747572815534e-05, "loss": 0.2225, "step": 5114 }, { "epoch": 49.596385542168676, "grad_norm": 2.452301502227783, "learning_rate": 2.516990291262136e-05, "loss": 0.2627, "step": 5115 }, { "epoch": 49.60602409638554, "grad_norm": 17.31108283996582, "learning_rate": 2.5165048543689322e-05, "loss": 0.1399, "step": 5116 }, { "epoch": 49.61566265060241, "grad_norm": 15.260538101196289, "learning_rate": 2.5160194174757283e-05, "loss": 0.1969, "step": 5117 }, { "epoch": 49.62530120481928, "grad_norm": 10.777791023254395, "learning_rate": 2.5155339805825245e-05, "loss": 0.1012, "step": 5118 }, { "epoch": 49.63493975903614, "grad_norm": 10.114513397216797, "learning_rate": 2.5150485436893206e-05, "loss": 0.2762, "step": 5119 }, { "epoch": 49.644578313253014, "grad_norm": 1.8354265689849854, "learning_rate": 2.5145631067961167e-05, "loss": 0.2822, "step": 5120 }, { "epoch": 49.65421686746988, "grad_norm": 1.1232768297195435, "learning_rate": 2.5140776699029128e-05, "loss": 0.2048, "step": 5121 }, { "epoch": 49.66385542168675, "grad_norm": 0.8378978967666626, "learning_rate": 2.513592233009709e-05, "loss": 0.1413, "step": 5122 }, { "epoch": 49.67349397590361, "grad_norm": 20.019065856933594, "learning_rate": 2.513106796116505e-05, "loss": 0.2652, "step": 5123 }, { "epoch": 49.68313253012048, "grad_norm": 7.959091663360596, "learning_rate": 2.512621359223301e-05, "loss": 0.1451, "step": 5124 }, { "epoch": 49.69277108433735, "grad_norm": 13.88736343383789, "learning_rate": 2.5121359223300973e-05, "loss": 0.299, "step": 5125 }, { "epoch": 49.702409638554215, "grad_norm": 19.01999282836914, "learning_rate": 2.5116504854368934e-05, "loss": 0.2889, "step": 5126 }, { "epoch": 49.712048192771086, "grad_norm": 19.96080207824707, "learning_rate": 2.5111650485436895e-05, "loss": 0.2164, "step": 5127 }, { "epoch": 49.72168674698795, "grad_norm": 8.103915214538574, "learning_rate": 2.5106796116504856e-05, "loss": 0.5139, "step": 5128 }, { "epoch": 49.73132530120482, "grad_norm": 15.078468322753906, "learning_rate": 2.5101941747572817e-05, "loss": 0.2234, "step": 5129 }, { "epoch": 49.74096385542169, "grad_norm": 11.21373462677002, "learning_rate": 2.5097087378640778e-05, "loss": 0.1935, "step": 5130 }, { "epoch": 49.75060240963855, "grad_norm": 16.31570053100586, "learning_rate": 2.5092233009708736e-05, "loss": 0.2232, "step": 5131 }, { "epoch": 49.76024096385542, "grad_norm": 29.589059829711914, "learning_rate": 2.5087378640776697e-05, "loss": 0.2385, "step": 5132 }, { "epoch": 49.76987951807229, "grad_norm": 3.638805866241455, "learning_rate": 2.508252427184466e-05, "loss": 0.2723, "step": 5133 }, { "epoch": 49.77951807228916, "grad_norm": 25.106813430786133, "learning_rate": 2.507766990291262e-05, "loss": 0.443, "step": 5134 }, { "epoch": 49.78915662650603, "grad_norm": 9.55923843383789, "learning_rate": 2.507281553398058e-05, "loss": 0.3897, "step": 5135 }, { "epoch": 49.79879518072289, "grad_norm": 8.322687149047852, "learning_rate": 2.5067961165048542e-05, "loss": 0.3792, "step": 5136 }, { "epoch": 49.80843373493976, "grad_norm": 18.46726417541504, "learning_rate": 2.5063106796116503e-05, "loss": 0.3509, "step": 5137 }, { "epoch": 49.818072289156625, "grad_norm": 2.6527600288391113, "learning_rate": 2.505825242718447e-05, "loss": 0.2083, "step": 5138 }, { "epoch": 49.827710843373495, "grad_norm": 2.3841512203216553, "learning_rate": 2.5053398058252432e-05, "loss": 0.1903, "step": 5139 }, { "epoch": 49.83734939759036, "grad_norm": 22.115583419799805, "learning_rate": 2.5048543689320393e-05, "loss": 0.1364, "step": 5140 }, { "epoch": 49.84698795180723, "grad_norm": 12.37524700164795, "learning_rate": 2.5043689320388354e-05, "loss": 0.2934, "step": 5141 }, { "epoch": 49.8566265060241, "grad_norm": 2.6487555503845215, "learning_rate": 2.5038834951456312e-05, "loss": 0.1604, "step": 5142 }, { "epoch": 49.86626506024096, "grad_norm": 21.987350463867188, "learning_rate": 2.5033980582524273e-05, "loss": 0.2977, "step": 5143 }, { "epoch": 49.87590361445783, "grad_norm": 16.136564254760742, "learning_rate": 2.5029126213592234e-05, "loss": 0.2812, "step": 5144 }, { "epoch": 49.8855421686747, "grad_norm": 14.339835166931152, "learning_rate": 2.5024271844660195e-05, "loss": 0.2531, "step": 5145 }, { "epoch": 49.89518072289157, "grad_norm": 3.357142925262451, "learning_rate": 2.5019417475728157e-05, "loss": 0.3309, "step": 5146 }, { "epoch": 49.90481927710843, "grad_norm": 5.645907878875732, "learning_rate": 2.5014563106796118e-05, "loss": 0.1586, "step": 5147 }, { "epoch": 49.9144578313253, "grad_norm": 7.914966583251953, "learning_rate": 2.500970873786408e-05, "loss": 0.1543, "step": 5148 }, { "epoch": 49.92409638554217, "grad_norm": 1.9309697151184082, "learning_rate": 2.500485436893204e-05, "loss": 0.2538, "step": 5149 }, { "epoch": 49.933734939759034, "grad_norm": 4.199207305908203, "learning_rate": 2.5e-05, "loss": 0.2718, "step": 5150 }, { "epoch": 49.943373493975905, "grad_norm": 7.380332946777344, "learning_rate": 2.4995145631067962e-05, "loss": 0.2686, "step": 5151 }, { "epoch": 49.95301204819277, "grad_norm": 6.170539855957031, "learning_rate": 2.4990291262135923e-05, "loss": 0.2187, "step": 5152 }, { "epoch": 49.96265060240964, "grad_norm": 30.197397232055664, "learning_rate": 2.4985436893203885e-05, "loss": 0.2612, "step": 5153 }, { "epoch": 49.97228915662651, "grad_norm": 12.961771011352539, "learning_rate": 2.4980582524271846e-05, "loss": 0.2466, "step": 5154 }, { "epoch": 49.98192771084337, "grad_norm": 3.727837085723877, "learning_rate": 2.4975728155339807e-05, "loss": 0.0771, "step": 5155 }, { "epoch": 49.99156626506024, "grad_norm": 24.39361000061035, "learning_rate": 2.4970873786407768e-05, "loss": 0.4049, "step": 5156 }, { "epoch": 50.00722891566265, "grad_norm": 8.907328605651855, "learning_rate": 2.496601941747573e-05, "loss": 0.3122, "step": 5157 }, { "epoch": 50.01686746987952, "grad_norm": 2.522019386291504, "learning_rate": 2.496116504854369e-05, "loss": 0.2761, "step": 5158 }, { "epoch": 50.026506024096385, "grad_norm": 13.237955093383789, "learning_rate": 2.495631067961165e-05, "loss": 0.2741, "step": 5159 }, { "epoch": 50.036144578313255, "grad_norm": 6.281686782836914, "learning_rate": 2.4951456310679613e-05, "loss": 0.337, "step": 5160 }, { "epoch": 50.04578313253012, "grad_norm": 3.280390739440918, "learning_rate": 2.4946601941747574e-05, "loss": 0.3179, "step": 5161 }, { "epoch": 50.05542168674699, "grad_norm": 3.183603525161743, "learning_rate": 2.4941747572815535e-05, "loss": 0.3254, "step": 5162 }, { "epoch": 50.06506024096385, "grad_norm": 3.7009220123291016, "learning_rate": 2.4936893203883496e-05, "loss": 0.1959, "step": 5163 }, { "epoch": 50.07469879518072, "grad_norm": 5.224655628204346, "learning_rate": 2.4932038834951457e-05, "loss": 0.374, "step": 5164 }, { "epoch": 50.08433734939759, "grad_norm": 11.047991752624512, "learning_rate": 2.492718446601942e-05, "loss": 0.1797, "step": 5165 }, { "epoch": 50.09397590361446, "grad_norm": 15.851334571838379, "learning_rate": 2.492233009708738e-05, "loss": 0.2871, "step": 5166 }, { "epoch": 50.10361445783133, "grad_norm": 4.503855228424072, "learning_rate": 2.491747572815534e-05, "loss": 0.2483, "step": 5167 }, { "epoch": 50.11325301204819, "grad_norm": 9.447331428527832, "learning_rate": 2.4912621359223302e-05, "loss": 0.1813, "step": 5168 }, { "epoch": 50.12289156626506, "grad_norm": 12.914770126342773, "learning_rate": 2.4907766990291266e-05, "loss": 0.3872, "step": 5169 }, { "epoch": 50.13253012048193, "grad_norm": 5.878535747528076, "learning_rate": 2.4902912621359224e-05, "loss": 0.418, "step": 5170 }, { "epoch": 50.142168674698794, "grad_norm": 22.197799682617188, "learning_rate": 2.4898058252427185e-05, "loss": 0.2807, "step": 5171 }, { "epoch": 50.151807228915665, "grad_norm": 39.28121566772461, "learning_rate": 2.4893203883495146e-05, "loss": 0.297, "step": 5172 }, { "epoch": 50.16144578313253, "grad_norm": 2.5467443466186523, "learning_rate": 2.4888349514563108e-05, "loss": 0.3503, "step": 5173 }, { "epoch": 50.1710843373494, "grad_norm": 1.7560611963272095, "learning_rate": 2.488349514563107e-05, "loss": 0.18, "step": 5174 }, { "epoch": 50.18072289156626, "grad_norm": 5.737582683563232, "learning_rate": 2.487864077669903e-05, "loss": 0.3116, "step": 5175 }, { "epoch": 50.19036144578313, "grad_norm": 7.978929042816162, "learning_rate": 2.487378640776699e-05, "loss": 0.4075, "step": 5176 }, { "epoch": 50.2, "grad_norm": 3.8501205444335938, "learning_rate": 2.4868932038834952e-05, "loss": 0.3099, "step": 5177 }, { "epoch": 50.209638554216866, "grad_norm": 4.564516067504883, "learning_rate": 2.4864077669902913e-05, "loss": 0.299, "step": 5178 }, { "epoch": 50.21927710843374, "grad_norm": 2.4607748985290527, "learning_rate": 2.4859223300970874e-05, "loss": 0.2824, "step": 5179 }, { "epoch": 50.2289156626506, "grad_norm": 2.611238718032837, "learning_rate": 2.4854368932038836e-05, "loss": 0.1655, "step": 5180 }, { "epoch": 50.23855421686747, "grad_norm": 2.83522891998291, "learning_rate": 2.4849514563106797e-05, "loss": 0.33, "step": 5181 }, { "epoch": 50.24819277108434, "grad_norm": 8.188576698303223, "learning_rate": 2.484466019417476e-05, "loss": 0.4049, "step": 5182 }, { "epoch": 50.257831325301204, "grad_norm": 6.947574138641357, "learning_rate": 2.4839805825242722e-05, "loss": 0.4124, "step": 5183 }, { "epoch": 50.267469879518075, "grad_norm": 4.818913459777832, "learning_rate": 2.483495145631068e-05, "loss": 0.1474, "step": 5184 }, { "epoch": 50.27710843373494, "grad_norm": 11.602519035339355, "learning_rate": 2.483009708737864e-05, "loss": 0.2119, "step": 5185 }, { "epoch": 50.28674698795181, "grad_norm": 2.191000461578369, "learning_rate": 2.4825242718446602e-05, "loss": 0.1562, "step": 5186 }, { "epoch": 50.29638554216867, "grad_norm": 6.095247745513916, "learning_rate": 2.4820388349514564e-05, "loss": 0.2038, "step": 5187 }, { "epoch": 50.30602409638554, "grad_norm": 35.306739807128906, "learning_rate": 2.4815533980582525e-05, "loss": 0.3077, "step": 5188 }, { "epoch": 50.31566265060241, "grad_norm": 10.33797836303711, "learning_rate": 2.4810679611650486e-05, "loss": 0.2649, "step": 5189 }, { "epoch": 50.325301204819276, "grad_norm": 1.9121370315551758, "learning_rate": 2.4805825242718447e-05, "loss": 0.1886, "step": 5190 }, { "epoch": 50.334939759036146, "grad_norm": 5.503051280975342, "learning_rate": 2.4800970873786408e-05, "loss": 0.2441, "step": 5191 }, { "epoch": 50.34457831325301, "grad_norm": 6.682211875915527, "learning_rate": 2.479611650485437e-05, "loss": 0.3389, "step": 5192 }, { "epoch": 50.35421686746988, "grad_norm": 71.58735656738281, "learning_rate": 2.479126213592233e-05, "loss": 0.3855, "step": 5193 }, { "epoch": 50.36385542168675, "grad_norm": 10.800817489624023, "learning_rate": 2.478640776699029e-05, "loss": 0.4239, "step": 5194 }, { "epoch": 50.373493975903614, "grad_norm": 2.2574408054351807, "learning_rate": 2.4781553398058256e-05, "loss": 0.3193, "step": 5195 }, { "epoch": 50.383132530120484, "grad_norm": 16.40774917602539, "learning_rate": 2.4776699029126217e-05, "loss": 0.2295, "step": 5196 }, { "epoch": 50.39277108433735, "grad_norm": 24.766143798828125, "learning_rate": 2.4771844660194175e-05, "loss": 0.3366, "step": 5197 }, { "epoch": 50.40240963855422, "grad_norm": 10.722746849060059, "learning_rate": 2.4766990291262136e-05, "loss": 0.2701, "step": 5198 }, { "epoch": 50.41204819277108, "grad_norm": 7.623980522155762, "learning_rate": 2.4762135922330097e-05, "loss": 0.2793, "step": 5199 }, { "epoch": 50.42168674698795, "grad_norm": 15.5199613571167, "learning_rate": 2.475728155339806e-05, "loss": 0.2491, "step": 5200 }, { "epoch": 50.43132530120482, "grad_norm": 7.474836349487305, "learning_rate": 2.475242718446602e-05, "loss": 0.201, "step": 5201 }, { "epoch": 50.440963855421685, "grad_norm": 14.666597366333008, "learning_rate": 2.474757281553398e-05, "loss": 0.2815, "step": 5202 }, { "epoch": 50.450602409638556, "grad_norm": 17.938976287841797, "learning_rate": 2.4742718446601942e-05, "loss": 0.2349, "step": 5203 }, { "epoch": 50.46024096385542, "grad_norm": 19.623268127441406, "learning_rate": 2.4737864077669903e-05, "loss": 0.454, "step": 5204 }, { "epoch": 50.46987951807229, "grad_norm": 9.432114601135254, "learning_rate": 2.4733009708737864e-05, "loss": 0.2973, "step": 5205 }, { "epoch": 50.47951807228916, "grad_norm": 2.6941287517547607, "learning_rate": 2.4728155339805825e-05, "loss": 0.2985, "step": 5206 }, { "epoch": 50.48915662650602, "grad_norm": 29.247581481933594, "learning_rate": 2.472330097087379e-05, "loss": 0.4494, "step": 5207 }, { "epoch": 50.498795180722894, "grad_norm": 1.8712986707687378, "learning_rate": 2.471844660194175e-05, "loss": 0.291, "step": 5208 }, { "epoch": 50.50843373493976, "grad_norm": 2.465841293334961, "learning_rate": 2.4713592233009712e-05, "loss": 0.3525, "step": 5209 }, { "epoch": 50.51807228915663, "grad_norm": 7.829378604888916, "learning_rate": 2.4708737864077673e-05, "loss": 0.4063, "step": 5210 }, { "epoch": 50.52771084337349, "grad_norm": 57.7098274230957, "learning_rate": 2.470388349514563e-05, "loss": 0.3467, "step": 5211 }, { "epoch": 50.53734939759036, "grad_norm": 6.168209075927734, "learning_rate": 2.4699029126213592e-05, "loss": 0.3068, "step": 5212 }, { "epoch": 50.54698795180723, "grad_norm": 1.607252836227417, "learning_rate": 2.4694174757281553e-05, "loss": 0.113, "step": 5213 }, { "epoch": 50.556626506024095, "grad_norm": 4.620938777923584, "learning_rate": 2.4689320388349515e-05, "loss": 0.3718, "step": 5214 }, { "epoch": 50.566265060240966, "grad_norm": 12.107338905334473, "learning_rate": 2.4684466019417476e-05, "loss": 0.2228, "step": 5215 }, { "epoch": 50.57590361445783, "grad_norm": 16.311555862426758, "learning_rate": 2.4679611650485437e-05, "loss": 0.3555, "step": 5216 }, { "epoch": 50.5855421686747, "grad_norm": 2.7669122219085693, "learning_rate": 2.4674757281553398e-05, "loss": 0.4037, "step": 5217 }, { "epoch": 50.59518072289157, "grad_norm": 6.928861618041992, "learning_rate": 2.466990291262136e-05, "loss": 0.0871, "step": 5218 }, { "epoch": 50.60481927710843, "grad_norm": 4.718478202819824, "learning_rate": 2.466504854368932e-05, "loss": 0.2939, "step": 5219 }, { "epoch": 50.6144578313253, "grad_norm": 3.449922561645508, "learning_rate": 2.4660194174757285e-05, "loss": 0.3694, "step": 5220 }, { "epoch": 50.62409638554217, "grad_norm": 9.679689407348633, "learning_rate": 2.4655339805825246e-05, "loss": 0.3384, "step": 5221 }, { "epoch": 50.63373493975904, "grad_norm": 9.493908882141113, "learning_rate": 2.4650485436893207e-05, "loss": 0.1755, "step": 5222 }, { "epoch": 50.6433734939759, "grad_norm": 2.475945234298706, "learning_rate": 2.4645631067961168e-05, "loss": 0.2065, "step": 5223 }, { "epoch": 50.65301204819277, "grad_norm": 81.16978454589844, "learning_rate": 2.464077669902913e-05, "loss": 0.3455, "step": 5224 }, { "epoch": 50.66265060240964, "grad_norm": 4.583354473114014, "learning_rate": 2.4635922330097087e-05, "loss": 0.2574, "step": 5225 }, { "epoch": 50.672289156626505, "grad_norm": 3.631075382232666, "learning_rate": 2.4631067961165048e-05, "loss": 0.3594, "step": 5226 }, { "epoch": 50.681927710843375, "grad_norm": 3.436275005340576, "learning_rate": 2.462621359223301e-05, "loss": 0.2085, "step": 5227 }, { "epoch": 50.69156626506024, "grad_norm": 14.651714324951172, "learning_rate": 2.462135922330097e-05, "loss": 0.1395, "step": 5228 }, { "epoch": 50.70120481927711, "grad_norm": 61.79590606689453, "learning_rate": 2.4616504854368932e-05, "loss": 0.1957, "step": 5229 }, { "epoch": 50.71084337349397, "grad_norm": 6.213949680328369, "learning_rate": 2.4611650485436893e-05, "loss": 0.1775, "step": 5230 }, { "epoch": 50.72048192771084, "grad_norm": 2.83866286277771, "learning_rate": 2.4606796116504854e-05, "loss": 0.2314, "step": 5231 }, { "epoch": 50.73012048192771, "grad_norm": 1.630750060081482, "learning_rate": 2.4601941747572815e-05, "loss": 0.1404, "step": 5232 }, { "epoch": 50.73975903614458, "grad_norm": 9.083125114440918, "learning_rate": 2.459708737864078e-05, "loss": 0.1828, "step": 5233 }, { "epoch": 50.74939759036145, "grad_norm": 11.61108112335205, "learning_rate": 2.459223300970874e-05, "loss": 0.4825, "step": 5234 }, { "epoch": 50.75903614457831, "grad_norm": 1.8721948862075806, "learning_rate": 2.4587378640776702e-05, "loss": 0.2399, "step": 5235 }, { "epoch": 50.76867469879518, "grad_norm": 24.194040298461914, "learning_rate": 2.4582524271844663e-05, "loss": 0.3032, "step": 5236 }, { "epoch": 50.77831325301205, "grad_norm": 6.991902828216553, "learning_rate": 2.4577669902912624e-05, "loss": 0.2865, "step": 5237 }, { "epoch": 50.787951807228914, "grad_norm": 12.905768394470215, "learning_rate": 2.4572815533980585e-05, "loss": 0.3311, "step": 5238 }, { "epoch": 50.797590361445785, "grad_norm": 31.722579956054688, "learning_rate": 2.4567961165048543e-05, "loss": 0.2571, "step": 5239 }, { "epoch": 50.80722891566265, "grad_norm": 3.750081777572632, "learning_rate": 2.4563106796116504e-05, "loss": 0.3267, "step": 5240 }, { "epoch": 50.81686746987952, "grad_norm": 2.5085031986236572, "learning_rate": 2.4558252427184465e-05, "loss": 0.2948, "step": 5241 }, { "epoch": 50.82650602409639, "grad_norm": 2.788198709487915, "learning_rate": 2.4553398058252427e-05, "loss": 0.2966, "step": 5242 }, { "epoch": 50.83614457831325, "grad_norm": 3.463812828063965, "learning_rate": 2.4548543689320388e-05, "loss": 0.2265, "step": 5243 }, { "epoch": 50.84578313253012, "grad_norm": 20.287410736083984, "learning_rate": 2.454368932038835e-05, "loss": 0.4648, "step": 5244 }, { "epoch": 50.855421686746986, "grad_norm": 6.3066816329956055, "learning_rate": 2.4538834951456313e-05, "loss": 0.2867, "step": 5245 }, { "epoch": 50.86506024096386, "grad_norm": 26.108556747436523, "learning_rate": 2.4533980582524275e-05, "loss": 0.2164, "step": 5246 }, { "epoch": 50.87469879518072, "grad_norm": 12.99937629699707, "learning_rate": 2.4529126213592236e-05, "loss": 0.3918, "step": 5247 }, { "epoch": 50.88433734939759, "grad_norm": 10.793391227722168, "learning_rate": 2.4524271844660197e-05, "loss": 0.1995, "step": 5248 }, { "epoch": 50.89397590361446, "grad_norm": 5.585360527038574, "learning_rate": 2.4519417475728158e-05, "loss": 0.4606, "step": 5249 }, { "epoch": 50.903614457831324, "grad_norm": 33.508975982666016, "learning_rate": 2.451456310679612e-05, "loss": 0.3474, "step": 5250 }, { "epoch": 50.913253012048195, "grad_norm": 3.323014736175537, "learning_rate": 2.450970873786408e-05, "loss": 0.2539, "step": 5251 }, { "epoch": 50.92289156626506, "grad_norm": 9.868334770202637, "learning_rate": 2.450485436893204e-05, "loss": 0.242, "step": 5252 }, { "epoch": 50.93253012048193, "grad_norm": 7.96504545211792, "learning_rate": 2.45e-05, "loss": 0.1616, "step": 5253 }, { "epoch": 50.94216867469879, "grad_norm": 11.046442031860352, "learning_rate": 2.449514563106796e-05, "loss": 0.3135, "step": 5254 }, { "epoch": 50.95180722891566, "grad_norm": 4.932316780090332, "learning_rate": 2.449029126213592e-05, "loss": 0.1801, "step": 5255 }, { "epoch": 50.96144578313253, "grad_norm": 23.049121856689453, "learning_rate": 2.4485436893203883e-05, "loss": 0.1398, "step": 5256 }, { "epoch": 50.971084337349396, "grad_norm": 2.383540630340576, "learning_rate": 2.4480582524271844e-05, "loss": 0.2857, "step": 5257 }, { "epoch": 50.980722891566266, "grad_norm": 14.141098022460938, "learning_rate": 2.447572815533981e-05, "loss": 0.3385, "step": 5258 }, { "epoch": 50.99036144578313, "grad_norm": 6.203896522521973, "learning_rate": 2.447087378640777e-05, "loss": 0.2238, "step": 5259 }, { "epoch": 51.006024096385545, "grad_norm": 9.046186447143555, "learning_rate": 2.446601941747573e-05, "loss": 0.2813, "step": 5260 }, { "epoch": 51.01566265060241, "grad_norm": 9.389819145202637, "learning_rate": 2.4461165048543692e-05, "loss": 0.1399, "step": 5261 }, { "epoch": 51.02530120481928, "grad_norm": 7.233315467834473, "learning_rate": 2.4456310679611653e-05, "loss": 0.2656, "step": 5262 }, { "epoch": 51.03493975903614, "grad_norm": 4.236023426055908, "learning_rate": 2.4451456310679614e-05, "loss": 0.239, "step": 5263 }, { "epoch": 51.04457831325301, "grad_norm": 4.336773872375488, "learning_rate": 2.4446601941747575e-05, "loss": 0.276, "step": 5264 }, { "epoch": 51.05421686746988, "grad_norm": 17.090696334838867, "learning_rate": 2.4441747572815536e-05, "loss": 0.2966, "step": 5265 }, { "epoch": 51.063855421686746, "grad_norm": 2.6438162326812744, "learning_rate": 2.4436893203883494e-05, "loss": 0.1173, "step": 5266 }, { "epoch": 51.07349397590362, "grad_norm": 29.311283111572266, "learning_rate": 2.4432038834951455e-05, "loss": 0.2536, "step": 5267 }, { "epoch": 51.08313253012048, "grad_norm": 3.5264434814453125, "learning_rate": 2.4427184466019416e-05, "loss": 0.0805, "step": 5268 }, { "epoch": 51.09277108433735, "grad_norm": 9.544415473937988, "learning_rate": 2.4422330097087378e-05, "loss": 0.1964, "step": 5269 }, { "epoch": 51.102409638554214, "grad_norm": 7.330490589141846, "learning_rate": 2.4417475728155342e-05, "loss": 0.3904, "step": 5270 }, { "epoch": 51.112048192771084, "grad_norm": 9.76351547241211, "learning_rate": 2.4412621359223303e-05, "loss": 0.1411, "step": 5271 }, { "epoch": 51.121686746987955, "grad_norm": 2.3509433269500732, "learning_rate": 2.4407766990291264e-05, "loss": 0.2207, "step": 5272 }, { "epoch": 51.13132530120482, "grad_norm": 9.437243461608887, "learning_rate": 2.4402912621359226e-05, "loss": 0.3081, "step": 5273 }, { "epoch": 51.14096385542169, "grad_norm": 22.713537216186523, "learning_rate": 2.4398058252427187e-05, "loss": 0.2286, "step": 5274 }, { "epoch": 51.15060240963855, "grad_norm": 3.949253559112549, "learning_rate": 2.4393203883495148e-05, "loss": 0.2013, "step": 5275 }, { "epoch": 51.16024096385542, "grad_norm": 2.1908674240112305, "learning_rate": 2.438834951456311e-05, "loss": 0.2051, "step": 5276 }, { "epoch": 51.16987951807229, "grad_norm": 10.450827598571777, "learning_rate": 2.438349514563107e-05, "loss": 0.3379, "step": 5277 }, { "epoch": 51.179518072289156, "grad_norm": 4.129067897796631, "learning_rate": 2.437864077669903e-05, "loss": 0.1426, "step": 5278 }, { "epoch": 51.189156626506026, "grad_norm": 3.382817029953003, "learning_rate": 2.4373786407766992e-05, "loss": 0.2724, "step": 5279 }, { "epoch": 51.19879518072289, "grad_norm": 10.872232437133789, "learning_rate": 2.436893203883495e-05, "loss": 0.1394, "step": 5280 }, { "epoch": 51.20843373493976, "grad_norm": 7.96065092086792, "learning_rate": 2.436407766990291e-05, "loss": 0.2265, "step": 5281 }, { "epoch": 51.21807228915662, "grad_norm": 1.877575159072876, "learning_rate": 2.4359223300970872e-05, "loss": 0.2224, "step": 5282 }, { "epoch": 51.227710843373494, "grad_norm": 4.35508394241333, "learning_rate": 2.4354368932038837e-05, "loss": 0.3726, "step": 5283 }, { "epoch": 51.237349397590364, "grad_norm": 15.70058536529541, "learning_rate": 2.4349514563106798e-05, "loss": 0.2415, "step": 5284 }, { "epoch": 51.24698795180723, "grad_norm": 4.661725044250488, "learning_rate": 2.434466019417476e-05, "loss": 0.3976, "step": 5285 }, { "epoch": 51.2566265060241, "grad_norm": 22.126453399658203, "learning_rate": 2.433980582524272e-05, "loss": 0.2988, "step": 5286 }, { "epoch": 51.26626506024096, "grad_norm": 5.691509246826172, "learning_rate": 2.433495145631068e-05, "loss": 0.2461, "step": 5287 }, { "epoch": 51.27590361445783, "grad_norm": 6.417958736419678, "learning_rate": 2.4330097087378643e-05, "loss": 0.2852, "step": 5288 }, { "epoch": 51.2855421686747, "grad_norm": 8.957808494567871, "learning_rate": 2.4325242718446604e-05, "loss": 0.1914, "step": 5289 }, { "epoch": 51.295180722891565, "grad_norm": 31.570411682128906, "learning_rate": 2.4320388349514565e-05, "loss": 0.4522, "step": 5290 }, { "epoch": 51.304819277108436, "grad_norm": 8.409769058227539, "learning_rate": 2.4315533980582526e-05, "loss": 0.2266, "step": 5291 }, { "epoch": 51.3144578313253, "grad_norm": 2.507272243499756, "learning_rate": 2.4310679611650487e-05, "loss": 0.2167, "step": 5292 }, { "epoch": 51.32409638554217, "grad_norm": 12.592601776123047, "learning_rate": 2.430582524271845e-05, "loss": 0.3247, "step": 5293 }, { "epoch": 51.33373493975903, "grad_norm": 16.777034759521484, "learning_rate": 2.4300970873786406e-05, "loss": 0.3529, "step": 5294 }, { "epoch": 51.3433734939759, "grad_norm": 2.0017008781433105, "learning_rate": 2.4296116504854367e-05, "loss": 0.3281, "step": 5295 }, { "epoch": 51.353012048192774, "grad_norm": 9.51331615447998, "learning_rate": 2.4291262135922332e-05, "loss": 0.1988, "step": 5296 }, { "epoch": 51.36265060240964, "grad_norm": 1.9859185218811035, "learning_rate": 2.4286407766990293e-05, "loss": 0.1463, "step": 5297 }, { "epoch": 51.37228915662651, "grad_norm": 3.0515222549438477, "learning_rate": 2.4281553398058254e-05, "loss": 0.2384, "step": 5298 }, { "epoch": 51.38192771084337, "grad_norm": 8.927729606628418, "learning_rate": 2.4276699029126215e-05, "loss": 0.1875, "step": 5299 }, { "epoch": 51.39156626506024, "grad_norm": 7.054436206817627, "learning_rate": 2.4271844660194176e-05, "loss": 0.1175, "step": 5300 }, { "epoch": 51.40120481927711, "grad_norm": 3.200510025024414, "learning_rate": 2.4266990291262138e-05, "loss": 0.1387, "step": 5301 }, { "epoch": 51.410843373493975, "grad_norm": 4.32614803314209, "learning_rate": 2.42621359223301e-05, "loss": 0.1813, "step": 5302 }, { "epoch": 51.420481927710846, "grad_norm": 3.6913740634918213, "learning_rate": 2.425728155339806e-05, "loss": 0.3187, "step": 5303 }, { "epoch": 51.43012048192771, "grad_norm": 7.481873512268066, "learning_rate": 2.425242718446602e-05, "loss": 0.303, "step": 5304 }, { "epoch": 51.43975903614458, "grad_norm": 2.251633882522583, "learning_rate": 2.4247572815533982e-05, "loss": 0.1328, "step": 5305 }, { "epoch": 51.44939759036144, "grad_norm": 5.3600053787231445, "learning_rate": 2.4242718446601943e-05, "loss": 0.3419, "step": 5306 }, { "epoch": 51.45903614457831, "grad_norm": 2.7877025604248047, "learning_rate": 2.4237864077669904e-05, "loss": 0.3127, "step": 5307 }, { "epoch": 51.46867469879518, "grad_norm": 4.481311321258545, "learning_rate": 2.4233009708737866e-05, "loss": 0.1596, "step": 5308 }, { "epoch": 51.47831325301205, "grad_norm": 3.5606915950775146, "learning_rate": 2.4228155339805827e-05, "loss": 0.2298, "step": 5309 }, { "epoch": 51.48795180722892, "grad_norm": 6.211640357971191, "learning_rate": 2.4223300970873788e-05, "loss": 0.2038, "step": 5310 }, { "epoch": 51.49759036144578, "grad_norm": 4.3607258796691895, "learning_rate": 2.421844660194175e-05, "loss": 0.386, "step": 5311 }, { "epoch": 51.50722891566265, "grad_norm": 2.789780378341675, "learning_rate": 2.421359223300971e-05, "loss": 0.3323, "step": 5312 }, { "epoch": 51.51686746987952, "grad_norm": 3.1407577991485596, "learning_rate": 2.420873786407767e-05, "loss": 0.1544, "step": 5313 }, { "epoch": 51.526506024096385, "grad_norm": 11.81666374206543, "learning_rate": 2.4203883495145632e-05, "loss": 0.2199, "step": 5314 }, { "epoch": 51.536144578313255, "grad_norm": 3.4541351795196533, "learning_rate": 2.4199029126213594e-05, "loss": 0.2295, "step": 5315 }, { "epoch": 51.54578313253012, "grad_norm": 4.2406511306762695, "learning_rate": 2.4194174757281555e-05, "loss": 0.3497, "step": 5316 }, { "epoch": 51.55542168674699, "grad_norm": 4.950170516967773, "learning_rate": 2.4189320388349516e-05, "loss": 0.5065, "step": 5317 }, { "epoch": 51.56506024096385, "grad_norm": 4.890383243560791, "learning_rate": 2.4184466019417477e-05, "loss": 0.3146, "step": 5318 }, { "epoch": 51.57469879518072, "grad_norm": 8.978775024414062, "learning_rate": 2.4179611650485438e-05, "loss": 0.2051, "step": 5319 }, { "epoch": 51.58433734939759, "grad_norm": 39.22322463989258, "learning_rate": 2.41747572815534e-05, "loss": 0.3674, "step": 5320 }, { "epoch": 51.59397590361446, "grad_norm": 5.792113780975342, "learning_rate": 2.416990291262136e-05, "loss": 0.2939, "step": 5321 }, { "epoch": 51.60361445783133, "grad_norm": 7.266948699951172, "learning_rate": 2.416504854368932e-05, "loss": 0.3295, "step": 5322 }, { "epoch": 51.61325301204819, "grad_norm": 22.806447982788086, "learning_rate": 2.4160194174757283e-05, "loss": 0.1704, "step": 5323 }, { "epoch": 51.62289156626506, "grad_norm": 21.617597579956055, "learning_rate": 2.4155339805825244e-05, "loss": 0.2661, "step": 5324 }, { "epoch": 51.63253012048193, "grad_norm": 18.636003494262695, "learning_rate": 2.4150485436893205e-05, "loss": 0.4522, "step": 5325 }, { "epoch": 51.642168674698794, "grad_norm": 1.8693569898605347, "learning_rate": 2.4145631067961166e-05, "loss": 0.0675, "step": 5326 }, { "epoch": 51.651807228915665, "grad_norm": 11.401555061340332, "learning_rate": 2.4140776699029127e-05, "loss": 0.4617, "step": 5327 }, { "epoch": 51.66144578313253, "grad_norm": 5.319490432739258, "learning_rate": 2.413592233009709e-05, "loss": 0.2095, "step": 5328 }, { "epoch": 51.6710843373494, "grad_norm": 14.766127586364746, "learning_rate": 2.413106796116505e-05, "loss": 0.2574, "step": 5329 }, { "epoch": 51.68072289156626, "grad_norm": 8.632861137390137, "learning_rate": 2.412621359223301e-05, "loss": 0.2922, "step": 5330 }, { "epoch": 51.69036144578313, "grad_norm": 2.6830642223358154, "learning_rate": 2.4121359223300972e-05, "loss": 0.1679, "step": 5331 }, { "epoch": 51.7, "grad_norm": 13.308432579040527, "learning_rate": 2.4116504854368933e-05, "loss": 0.4849, "step": 5332 }, { "epoch": 51.709638554216866, "grad_norm": 2.2773807048797607, "learning_rate": 2.4111650485436894e-05, "loss": 0.3208, "step": 5333 }, { "epoch": 51.71927710843374, "grad_norm": 3.1918342113494873, "learning_rate": 2.4106796116504855e-05, "loss": 0.2039, "step": 5334 }, { "epoch": 51.7289156626506, "grad_norm": 3.5709824562072754, "learning_rate": 2.4101941747572817e-05, "loss": 0.3012, "step": 5335 }, { "epoch": 51.73855421686747, "grad_norm": 9.846823692321777, "learning_rate": 2.4097087378640778e-05, "loss": 0.2097, "step": 5336 }, { "epoch": 51.74819277108434, "grad_norm": 2.336000680923462, "learning_rate": 2.409223300970874e-05, "loss": 0.1669, "step": 5337 }, { "epoch": 51.757831325301204, "grad_norm": 20.608095169067383, "learning_rate": 2.40873786407767e-05, "loss": 0.5334, "step": 5338 }, { "epoch": 51.767469879518075, "grad_norm": 5.5574564933776855, "learning_rate": 2.408252427184466e-05, "loss": 0.1038, "step": 5339 }, { "epoch": 51.77710843373494, "grad_norm": 2.4621031284332275, "learning_rate": 2.4077669902912622e-05, "loss": 0.2752, "step": 5340 }, { "epoch": 51.78674698795181, "grad_norm": 8.53105354309082, "learning_rate": 2.4072815533980583e-05, "loss": 0.3447, "step": 5341 }, { "epoch": 51.79638554216867, "grad_norm": 2.9004528522491455, "learning_rate": 2.4067961165048545e-05, "loss": 0.1566, "step": 5342 }, { "epoch": 51.80602409638554, "grad_norm": 4.405013084411621, "learning_rate": 2.4063106796116506e-05, "loss": 0.2126, "step": 5343 }, { "epoch": 51.81566265060241, "grad_norm": 14.085939407348633, "learning_rate": 2.4058252427184467e-05, "loss": 0.0815, "step": 5344 }, { "epoch": 51.825301204819276, "grad_norm": 12.860100746154785, "learning_rate": 2.4053398058252428e-05, "loss": 0.2947, "step": 5345 }, { "epoch": 51.834939759036146, "grad_norm": 9.75637149810791, "learning_rate": 2.404854368932039e-05, "loss": 0.2667, "step": 5346 }, { "epoch": 51.84457831325301, "grad_norm": 3.4763965606689453, "learning_rate": 2.404368932038835e-05, "loss": 0.0995, "step": 5347 }, { "epoch": 51.85421686746988, "grad_norm": 5.462562084197998, "learning_rate": 2.403883495145631e-05, "loss": 0.2902, "step": 5348 }, { "epoch": 51.86385542168675, "grad_norm": 2.878401756286621, "learning_rate": 2.4033980582524273e-05, "loss": 0.0557, "step": 5349 }, { "epoch": 51.873493975903614, "grad_norm": 12.286775588989258, "learning_rate": 2.4029126213592234e-05, "loss": 0.2458, "step": 5350 }, { "epoch": 51.883132530120484, "grad_norm": 3.3984057903289795, "learning_rate": 2.4024271844660195e-05, "loss": 0.2432, "step": 5351 }, { "epoch": 51.89277108433735, "grad_norm": 3.617366075515747, "learning_rate": 2.4019417475728156e-05, "loss": 0.3651, "step": 5352 }, { "epoch": 51.90240963855422, "grad_norm": 5.103437900543213, "learning_rate": 2.4014563106796117e-05, "loss": 0.1312, "step": 5353 }, { "epoch": 51.91204819277108, "grad_norm": 9.429754257202148, "learning_rate": 2.400970873786408e-05, "loss": 0.2543, "step": 5354 }, { "epoch": 51.92168674698795, "grad_norm": 7.211076259613037, "learning_rate": 2.400485436893204e-05, "loss": 0.2979, "step": 5355 }, { "epoch": 51.93132530120482, "grad_norm": 14.992973327636719, "learning_rate": 2.4e-05, "loss": 0.2428, "step": 5356 }, { "epoch": 51.940963855421685, "grad_norm": 4.09279727935791, "learning_rate": 2.3995145631067962e-05, "loss": 0.2636, "step": 5357 }, { "epoch": 51.950602409638556, "grad_norm": 10.647156715393066, "learning_rate": 2.3990291262135923e-05, "loss": 0.474, "step": 5358 }, { "epoch": 51.96024096385542, "grad_norm": 6.215211391448975, "learning_rate": 2.3985436893203887e-05, "loss": 0.3666, "step": 5359 }, { "epoch": 51.96987951807229, "grad_norm": 7.304287433624268, "learning_rate": 2.3980582524271845e-05, "loss": 0.4994, "step": 5360 }, { "epoch": 51.97951807228916, "grad_norm": 8.944281578063965, "learning_rate": 2.3975728155339806e-05, "loss": 0.3889, "step": 5361 }, { "epoch": 51.98915662650602, "grad_norm": 5.695209503173828, "learning_rate": 2.3970873786407767e-05, "loss": 0.1434, "step": 5362 }, { "epoch": 52.00481927710843, "grad_norm": 11.106424331665039, "learning_rate": 2.396601941747573e-05, "loss": 0.2389, "step": 5363 }, { "epoch": 52.0144578313253, "grad_norm": 4.62044095993042, "learning_rate": 2.396116504854369e-05, "loss": 0.2074, "step": 5364 }, { "epoch": 52.024096385542165, "grad_norm": 4.594639301300049, "learning_rate": 2.395631067961165e-05, "loss": 0.2994, "step": 5365 }, { "epoch": 52.033734939759036, "grad_norm": 20.956256866455078, "learning_rate": 2.3951456310679612e-05, "loss": 0.2575, "step": 5366 }, { "epoch": 52.043373493975906, "grad_norm": 4.868535995483398, "learning_rate": 2.3946601941747573e-05, "loss": 0.2795, "step": 5367 }, { "epoch": 52.05301204819277, "grad_norm": 6.025539875030518, "learning_rate": 2.3941747572815534e-05, "loss": 0.2582, "step": 5368 }, { "epoch": 52.06265060240964, "grad_norm": 4.6775360107421875, "learning_rate": 2.3936893203883496e-05, "loss": 0.1293, "step": 5369 }, { "epoch": 52.0722891566265, "grad_norm": 13.439738273620605, "learning_rate": 2.3932038834951457e-05, "loss": 0.3888, "step": 5370 }, { "epoch": 52.081927710843374, "grad_norm": 2.067972183227539, "learning_rate": 2.392718446601942e-05, "loss": 0.1806, "step": 5371 }, { "epoch": 52.091566265060244, "grad_norm": 2.4817168712615967, "learning_rate": 2.3922330097087382e-05, "loss": 0.1576, "step": 5372 }, { "epoch": 52.10120481927711, "grad_norm": 14.618334770202637, "learning_rate": 2.3917475728155343e-05, "loss": 0.205, "step": 5373 }, { "epoch": 52.11084337349398, "grad_norm": 7.759176254272461, "learning_rate": 2.39126213592233e-05, "loss": 0.2627, "step": 5374 }, { "epoch": 52.12048192771084, "grad_norm": 4.856598854064941, "learning_rate": 2.3907766990291262e-05, "loss": 0.357, "step": 5375 }, { "epoch": 52.13012048192771, "grad_norm": 11.942549705505371, "learning_rate": 2.3902912621359224e-05, "loss": 0.3565, "step": 5376 }, { "epoch": 52.139759036144575, "grad_norm": 2.9651551246643066, "learning_rate": 2.3898058252427185e-05, "loss": 0.2598, "step": 5377 }, { "epoch": 52.149397590361446, "grad_norm": 6.0492658615112305, "learning_rate": 2.3893203883495146e-05, "loss": 0.3993, "step": 5378 }, { "epoch": 52.159036144578316, "grad_norm": 4.741756916046143, "learning_rate": 2.3888349514563107e-05, "loss": 0.2514, "step": 5379 }, { "epoch": 52.16867469879518, "grad_norm": 10.003270149230957, "learning_rate": 2.3883495145631068e-05, "loss": 0.2253, "step": 5380 }, { "epoch": 52.17831325301205, "grad_norm": 3.5083372592926025, "learning_rate": 2.387864077669903e-05, "loss": 0.1787, "step": 5381 }, { "epoch": 52.18795180722891, "grad_norm": 2.619474411010742, "learning_rate": 2.387378640776699e-05, "loss": 0.0925, "step": 5382 }, { "epoch": 52.19759036144578, "grad_norm": 10.649932861328125, "learning_rate": 2.386893203883495e-05, "loss": 0.2574, "step": 5383 }, { "epoch": 52.207228915662654, "grad_norm": 20.12256622314453, "learning_rate": 2.3864077669902916e-05, "loss": 0.362, "step": 5384 }, { "epoch": 52.21686746987952, "grad_norm": 55.439613342285156, "learning_rate": 2.3859223300970877e-05, "loss": 0.3182, "step": 5385 }, { "epoch": 52.22650602409639, "grad_norm": 3.823984146118164, "learning_rate": 2.385436893203884e-05, "loss": 0.3964, "step": 5386 }, { "epoch": 52.23614457831325, "grad_norm": 1.0426145792007446, "learning_rate": 2.38495145631068e-05, "loss": 0.0904, "step": 5387 }, { "epoch": 52.24578313253012, "grad_norm": 7.317599296569824, "learning_rate": 2.3844660194174757e-05, "loss": 0.3085, "step": 5388 }, { "epoch": 52.255421686746985, "grad_norm": 0.9535546898841858, "learning_rate": 2.383980582524272e-05, "loss": 0.0641, "step": 5389 }, { "epoch": 52.265060240963855, "grad_norm": 4.2968878746032715, "learning_rate": 2.383495145631068e-05, "loss": 0.3223, "step": 5390 }, { "epoch": 52.274698795180726, "grad_norm": 4.136333465576172, "learning_rate": 2.383009708737864e-05, "loss": 0.1864, "step": 5391 }, { "epoch": 52.28433734939759, "grad_norm": 13.838615417480469, "learning_rate": 2.3825242718446602e-05, "loss": 0.186, "step": 5392 }, { "epoch": 52.29397590361446, "grad_norm": 2.909851551055908, "learning_rate": 2.3820388349514563e-05, "loss": 0.2903, "step": 5393 }, { "epoch": 52.30361445783132, "grad_norm": 12.786643028259277, "learning_rate": 2.3815533980582524e-05, "loss": 0.2829, "step": 5394 }, { "epoch": 52.31325301204819, "grad_norm": 9.23893928527832, "learning_rate": 2.3810679611650485e-05, "loss": 0.1663, "step": 5395 }, { "epoch": 52.32289156626506, "grad_norm": 9.658585548400879, "learning_rate": 2.3805825242718446e-05, "loss": 0.2374, "step": 5396 }, { "epoch": 52.33253012048193, "grad_norm": 5.390546798706055, "learning_rate": 2.380097087378641e-05, "loss": 0.2204, "step": 5397 }, { "epoch": 52.3421686746988, "grad_norm": 3.5070745944976807, "learning_rate": 2.3796116504854372e-05, "loss": 0.1184, "step": 5398 }, { "epoch": 52.35180722891566, "grad_norm": 12.372920989990234, "learning_rate": 2.3791262135922333e-05, "loss": 0.1787, "step": 5399 }, { "epoch": 52.36144578313253, "grad_norm": 5.601449489593506, "learning_rate": 2.3786407766990294e-05, "loss": 0.3391, "step": 5400 }, { "epoch": 52.371084337349394, "grad_norm": 4.405644416809082, "learning_rate": 2.3781553398058256e-05, "loss": 0.2561, "step": 5401 }, { "epoch": 52.380722891566265, "grad_norm": 3.8572885990142822, "learning_rate": 2.3776699029126213e-05, "loss": 0.1971, "step": 5402 }, { "epoch": 52.390361445783135, "grad_norm": 5.393390655517578, "learning_rate": 2.3771844660194174e-05, "loss": 0.2584, "step": 5403 }, { "epoch": 52.4, "grad_norm": 9.612507820129395, "learning_rate": 2.3766990291262136e-05, "loss": 0.2933, "step": 5404 }, { "epoch": 52.40963855421687, "grad_norm": 11.346028327941895, "learning_rate": 2.3762135922330097e-05, "loss": 0.3139, "step": 5405 }, { "epoch": 52.41927710843373, "grad_norm": 2.67439866065979, "learning_rate": 2.3757281553398058e-05, "loss": 0.1779, "step": 5406 }, { "epoch": 52.4289156626506, "grad_norm": 16.589107513427734, "learning_rate": 2.375242718446602e-05, "loss": 0.2268, "step": 5407 }, { "epoch": 52.43855421686747, "grad_norm": 5.184287071228027, "learning_rate": 2.374757281553398e-05, "loss": 0.2495, "step": 5408 }, { "epoch": 52.44819277108434, "grad_norm": 2.404958486557007, "learning_rate": 2.3742718446601945e-05, "loss": 0.1391, "step": 5409 }, { "epoch": 52.45783132530121, "grad_norm": 15.327077865600586, "learning_rate": 2.3737864077669906e-05, "loss": 0.3618, "step": 5410 }, { "epoch": 52.46746987951807, "grad_norm": 3.757091999053955, "learning_rate": 2.3733009708737867e-05, "loss": 0.0912, "step": 5411 }, { "epoch": 52.47710843373494, "grad_norm": 13.916970252990723, "learning_rate": 2.3728155339805828e-05, "loss": 0.4012, "step": 5412 }, { "epoch": 52.486746987951804, "grad_norm": 6.279572486877441, "learning_rate": 2.372330097087379e-05, "loss": 0.0705, "step": 5413 }, { "epoch": 52.496385542168674, "grad_norm": 9.969646453857422, "learning_rate": 2.371844660194175e-05, "loss": 0.2897, "step": 5414 }, { "epoch": 52.506024096385545, "grad_norm": 2.9001009464263916, "learning_rate": 2.3713592233009708e-05, "loss": 0.3012, "step": 5415 }, { "epoch": 52.51566265060241, "grad_norm": 5.371957302093506, "learning_rate": 2.370873786407767e-05, "loss": 0.3971, "step": 5416 }, { "epoch": 52.52530120481928, "grad_norm": 8.030728340148926, "learning_rate": 2.370388349514563e-05, "loss": 0.5153, "step": 5417 }, { "epoch": 52.53493975903614, "grad_norm": 1.967759370803833, "learning_rate": 2.369902912621359e-05, "loss": 0.1532, "step": 5418 }, { "epoch": 52.54457831325301, "grad_norm": 11.54360294342041, "learning_rate": 2.3694174757281553e-05, "loss": 0.4393, "step": 5419 }, { "epoch": 52.55421686746988, "grad_norm": 7.915136814117432, "learning_rate": 2.3689320388349514e-05, "loss": 0.3805, "step": 5420 }, { "epoch": 52.563855421686746, "grad_norm": 2.9840850830078125, "learning_rate": 2.3684466019417475e-05, "loss": 0.2269, "step": 5421 }, { "epoch": 52.57349397590362, "grad_norm": 7.825817108154297, "learning_rate": 2.367961165048544e-05, "loss": 0.1495, "step": 5422 }, { "epoch": 52.58313253012048, "grad_norm": 5.804856777191162, "learning_rate": 2.36747572815534e-05, "loss": 0.2778, "step": 5423 }, { "epoch": 52.59277108433735, "grad_norm": 2.9331929683685303, "learning_rate": 2.3669902912621362e-05, "loss": 0.1513, "step": 5424 }, { "epoch": 52.602409638554214, "grad_norm": 2.425748348236084, "learning_rate": 2.3665048543689323e-05, "loss": 0.3075, "step": 5425 }, { "epoch": 52.612048192771084, "grad_norm": 6.208572864532471, "learning_rate": 2.3660194174757284e-05, "loss": 0.2624, "step": 5426 }, { "epoch": 52.621686746987955, "grad_norm": 3.6956629753112793, "learning_rate": 2.3655339805825245e-05, "loss": 0.3717, "step": 5427 }, { "epoch": 52.63132530120482, "grad_norm": 8.774861335754395, "learning_rate": 2.3650485436893206e-05, "loss": 0.4851, "step": 5428 }, { "epoch": 52.64096385542169, "grad_norm": 6.025881767272949, "learning_rate": 2.3645631067961164e-05, "loss": 0.2274, "step": 5429 }, { "epoch": 52.65060240963855, "grad_norm": 3.067829132080078, "learning_rate": 2.3640776699029125e-05, "loss": 0.1902, "step": 5430 }, { "epoch": 52.66024096385542, "grad_norm": 22.654420852661133, "learning_rate": 2.3635922330097087e-05, "loss": 0.185, "step": 5431 }, { "epoch": 52.66987951807229, "grad_norm": 8.933442115783691, "learning_rate": 2.3631067961165048e-05, "loss": 0.1824, "step": 5432 }, { "epoch": 52.679518072289156, "grad_norm": 6.400386810302734, "learning_rate": 2.362621359223301e-05, "loss": 0.2513, "step": 5433 }, { "epoch": 52.689156626506026, "grad_norm": 8.637577056884766, "learning_rate": 2.362135922330097e-05, "loss": 0.3529, "step": 5434 }, { "epoch": 52.69879518072289, "grad_norm": 7.902117729187012, "learning_rate": 2.3616504854368935e-05, "loss": 0.1837, "step": 5435 }, { "epoch": 52.70843373493976, "grad_norm": 15.225760459899902, "learning_rate": 2.3611650485436896e-05, "loss": 0.2973, "step": 5436 }, { "epoch": 52.71807228915662, "grad_norm": 0.885994553565979, "learning_rate": 2.3606796116504857e-05, "loss": 0.0978, "step": 5437 }, { "epoch": 52.727710843373494, "grad_norm": 9.036835670471191, "learning_rate": 2.3601941747572818e-05, "loss": 0.4282, "step": 5438 }, { "epoch": 52.737349397590364, "grad_norm": 7.524320125579834, "learning_rate": 2.359708737864078e-05, "loss": 0.3075, "step": 5439 }, { "epoch": 52.74698795180723, "grad_norm": 7.655864238739014, "learning_rate": 2.359223300970874e-05, "loss": 0.1896, "step": 5440 }, { "epoch": 52.7566265060241, "grad_norm": 8.457209587097168, "learning_rate": 2.35873786407767e-05, "loss": 0.2217, "step": 5441 }, { "epoch": 52.76626506024096, "grad_norm": 11.022900581359863, "learning_rate": 2.3582524271844663e-05, "loss": 0.2765, "step": 5442 }, { "epoch": 52.77590361445783, "grad_norm": 3.1935229301452637, "learning_rate": 2.357766990291262e-05, "loss": 0.3447, "step": 5443 }, { "epoch": 52.7855421686747, "grad_norm": 7.120029926300049, "learning_rate": 2.357281553398058e-05, "loss": 0.1798, "step": 5444 }, { "epoch": 52.795180722891565, "grad_norm": 16.781095504760742, "learning_rate": 2.3567961165048543e-05, "loss": 0.1523, "step": 5445 }, { "epoch": 52.804819277108436, "grad_norm": 14.470193862915039, "learning_rate": 2.3563106796116504e-05, "loss": 0.2702, "step": 5446 }, { "epoch": 52.8144578313253, "grad_norm": 23.165802001953125, "learning_rate": 2.3558252427184468e-05, "loss": 0.2477, "step": 5447 }, { "epoch": 52.82409638554217, "grad_norm": 4.874135494232178, "learning_rate": 2.355339805825243e-05, "loss": 0.171, "step": 5448 }, { "epoch": 52.83373493975903, "grad_norm": 5.256649017333984, "learning_rate": 2.354854368932039e-05, "loss": 0.1247, "step": 5449 }, { "epoch": 52.8433734939759, "grad_norm": 9.305502891540527, "learning_rate": 2.3543689320388352e-05, "loss": 0.369, "step": 5450 }, { "epoch": 52.853012048192774, "grad_norm": 19.8901424407959, "learning_rate": 2.3538834951456313e-05, "loss": 0.347, "step": 5451 }, { "epoch": 52.86265060240964, "grad_norm": 7.168848991394043, "learning_rate": 2.3533980582524274e-05, "loss": 0.205, "step": 5452 }, { "epoch": 52.87228915662651, "grad_norm": 6.812000751495361, "learning_rate": 2.3529126213592235e-05, "loss": 0.3632, "step": 5453 }, { "epoch": 52.88192771084337, "grad_norm": 33.900386810302734, "learning_rate": 2.3524271844660196e-05, "loss": 0.3443, "step": 5454 }, { "epoch": 52.89156626506024, "grad_norm": 9.278144836425781, "learning_rate": 2.3519417475728157e-05, "loss": 0.178, "step": 5455 }, { "epoch": 52.90120481927711, "grad_norm": 20.451732635498047, "learning_rate": 2.351456310679612e-05, "loss": 0.192, "step": 5456 }, { "epoch": 52.910843373493975, "grad_norm": 5.317741870880127, "learning_rate": 2.3509708737864076e-05, "loss": 0.3313, "step": 5457 }, { "epoch": 52.920481927710846, "grad_norm": 5.795550346374512, "learning_rate": 2.3504854368932037e-05, "loss": 0.3981, "step": 5458 }, { "epoch": 52.93012048192771, "grad_norm": 6.101212024688721, "learning_rate": 2.35e-05, "loss": 0.2939, "step": 5459 }, { "epoch": 52.93975903614458, "grad_norm": 6.757400035858154, "learning_rate": 2.3495145631067963e-05, "loss": 0.3621, "step": 5460 }, { "epoch": 52.94939759036144, "grad_norm": 7.8545002937316895, "learning_rate": 2.3490291262135924e-05, "loss": 0.3821, "step": 5461 }, { "epoch": 52.95903614457831, "grad_norm": 3.2364070415496826, "learning_rate": 2.3485436893203885e-05, "loss": 0.1952, "step": 5462 }, { "epoch": 52.96867469879518, "grad_norm": 3.2848989963531494, "learning_rate": 2.3480582524271847e-05, "loss": 0.0987, "step": 5463 }, { "epoch": 52.97831325301205, "grad_norm": 12.172615051269531, "learning_rate": 2.3475728155339808e-05, "loss": 0.1604, "step": 5464 }, { "epoch": 52.98795180722892, "grad_norm": 17.81689453125, "learning_rate": 2.347087378640777e-05, "loss": 0.3041, "step": 5465 }, { "epoch": 53.003614457831326, "grad_norm": 6.778156757354736, "learning_rate": 2.346601941747573e-05, "loss": 0.1483, "step": 5466 }, { "epoch": 53.013253012048196, "grad_norm": 6.423877239227295, "learning_rate": 2.346116504854369e-05, "loss": 0.2359, "step": 5467 }, { "epoch": 53.02289156626506, "grad_norm": 5.705238342285156, "learning_rate": 2.3456310679611652e-05, "loss": 0.2113, "step": 5468 }, { "epoch": 53.03253012048193, "grad_norm": 0.9317858815193176, "learning_rate": 2.3451456310679613e-05, "loss": 0.1238, "step": 5469 }, { "epoch": 53.04216867469879, "grad_norm": 9.50816535949707, "learning_rate": 2.3446601941747575e-05, "loss": 0.2978, "step": 5470 }, { "epoch": 53.05180722891566, "grad_norm": 2.7393083572387695, "learning_rate": 2.3441747572815532e-05, "loss": 0.3249, "step": 5471 }, { "epoch": 53.06144578313253, "grad_norm": 5.529094219207764, "learning_rate": 2.3436893203883497e-05, "loss": 0.284, "step": 5472 }, { "epoch": 53.0710843373494, "grad_norm": 6.5403056144714355, "learning_rate": 2.3432038834951458e-05, "loss": 0.2891, "step": 5473 }, { "epoch": 53.08072289156627, "grad_norm": 3.4216933250427246, "learning_rate": 2.342718446601942e-05, "loss": 0.1209, "step": 5474 }, { "epoch": 53.09036144578313, "grad_norm": 16.747323989868164, "learning_rate": 2.342233009708738e-05, "loss": 0.2063, "step": 5475 }, { "epoch": 53.1, "grad_norm": 5.413741111755371, "learning_rate": 2.341747572815534e-05, "loss": 0.3699, "step": 5476 }, { "epoch": 53.109638554216865, "grad_norm": 2.7047009468078613, "learning_rate": 2.3412621359223303e-05, "loss": 0.2162, "step": 5477 }, { "epoch": 53.119277108433735, "grad_norm": 4.500445365905762, "learning_rate": 2.3407766990291264e-05, "loss": 0.1955, "step": 5478 }, { "epoch": 53.128915662650606, "grad_norm": 14.489871978759766, "learning_rate": 2.3402912621359225e-05, "loss": 0.1532, "step": 5479 }, { "epoch": 53.13855421686747, "grad_norm": 2.0149002075195312, "learning_rate": 2.3398058252427186e-05, "loss": 0.1897, "step": 5480 }, { "epoch": 53.14819277108434, "grad_norm": 7.333072185516357, "learning_rate": 2.3393203883495147e-05, "loss": 0.2606, "step": 5481 }, { "epoch": 53.1578313253012, "grad_norm": 2.8556501865386963, "learning_rate": 2.338834951456311e-05, "loss": 0.1708, "step": 5482 }, { "epoch": 53.16746987951807, "grad_norm": 2.4599337577819824, "learning_rate": 2.338349514563107e-05, "loss": 0.3056, "step": 5483 }, { "epoch": 53.17710843373494, "grad_norm": 5.611461162567139, "learning_rate": 2.337864077669903e-05, "loss": 0.3869, "step": 5484 }, { "epoch": 53.18674698795181, "grad_norm": 8.583463668823242, "learning_rate": 2.3373786407766992e-05, "loss": 0.3368, "step": 5485 }, { "epoch": 53.19638554216868, "grad_norm": 3.096550703048706, "learning_rate": 2.3368932038834953e-05, "loss": 0.1826, "step": 5486 }, { "epoch": 53.20602409638554, "grad_norm": 3.513617515563965, "learning_rate": 2.3364077669902914e-05, "loss": 0.2054, "step": 5487 }, { "epoch": 53.21566265060241, "grad_norm": 6.6704864501953125, "learning_rate": 2.3359223300970875e-05, "loss": 0.1077, "step": 5488 }, { "epoch": 53.225301204819274, "grad_norm": 3.745044469833374, "learning_rate": 2.3354368932038836e-05, "loss": 0.3627, "step": 5489 }, { "epoch": 53.234939759036145, "grad_norm": 4.334421157836914, "learning_rate": 2.3349514563106798e-05, "loss": 0.1721, "step": 5490 }, { "epoch": 53.244578313253015, "grad_norm": 18.913278579711914, "learning_rate": 2.334466019417476e-05, "loss": 0.2326, "step": 5491 }, { "epoch": 53.25421686746988, "grad_norm": 17.73507308959961, "learning_rate": 2.333980582524272e-05, "loss": 0.4006, "step": 5492 }, { "epoch": 53.26385542168675, "grad_norm": 2.826394557952881, "learning_rate": 2.333495145631068e-05, "loss": 0.1979, "step": 5493 }, { "epoch": 53.27349397590361, "grad_norm": 5.658834457397461, "learning_rate": 2.3330097087378642e-05, "loss": 0.3773, "step": 5494 }, { "epoch": 53.28313253012048, "grad_norm": 4.7345685958862305, "learning_rate": 2.3325242718446603e-05, "loss": 0.2481, "step": 5495 }, { "epoch": 53.292771084337346, "grad_norm": 7.968931674957275, "learning_rate": 2.3320388349514564e-05, "loss": 0.4726, "step": 5496 }, { "epoch": 53.30240963855422, "grad_norm": 5.687824726104736, "learning_rate": 2.3315533980582526e-05, "loss": 0.2406, "step": 5497 }, { "epoch": 53.31204819277109, "grad_norm": 13.61972427368164, "learning_rate": 2.3310679611650487e-05, "loss": 0.3898, "step": 5498 }, { "epoch": 53.32168674698795, "grad_norm": 2.437744379043579, "learning_rate": 2.3305825242718448e-05, "loss": 0.2275, "step": 5499 }, { "epoch": 53.33132530120482, "grad_norm": 5.646040439605713, "learning_rate": 2.330097087378641e-05, "loss": 0.3746, "step": 5500 }, { "epoch": 53.340963855421684, "grad_norm": 4.6341328620910645, "learning_rate": 2.329611650485437e-05, "loss": 0.3085, "step": 5501 }, { "epoch": 53.350602409638554, "grad_norm": 8.71480941772461, "learning_rate": 2.329126213592233e-05, "loss": 0.1956, "step": 5502 }, { "epoch": 53.360240963855425, "grad_norm": 2.9187493324279785, "learning_rate": 2.3286407766990292e-05, "loss": 0.2998, "step": 5503 }, { "epoch": 53.36987951807229, "grad_norm": 17.011259078979492, "learning_rate": 2.3281553398058254e-05, "loss": 0.1827, "step": 5504 }, { "epoch": 53.37951807228916, "grad_norm": 2.5973660945892334, "learning_rate": 2.3276699029126215e-05, "loss": 0.3098, "step": 5505 }, { "epoch": 53.38915662650602, "grad_norm": 4.6510467529296875, "learning_rate": 2.3271844660194176e-05, "loss": 0.3547, "step": 5506 }, { "epoch": 53.39879518072289, "grad_norm": 4.07442045211792, "learning_rate": 2.3266990291262137e-05, "loss": 0.3301, "step": 5507 }, { "epoch": 53.408433734939756, "grad_norm": 9.365259170532227, "learning_rate": 2.3262135922330098e-05, "loss": 0.2575, "step": 5508 }, { "epoch": 53.418072289156626, "grad_norm": 4.291959762573242, "learning_rate": 2.325728155339806e-05, "loss": 0.1772, "step": 5509 }, { "epoch": 53.4277108433735, "grad_norm": 20.473716735839844, "learning_rate": 2.325242718446602e-05, "loss": 0.1121, "step": 5510 }, { "epoch": 53.43734939759036, "grad_norm": 5.115424633026123, "learning_rate": 2.324757281553398e-05, "loss": 0.3036, "step": 5511 }, { "epoch": 53.44698795180723, "grad_norm": 5.369470119476318, "learning_rate": 2.3242718446601943e-05, "loss": 0.3514, "step": 5512 }, { "epoch": 53.456626506024094, "grad_norm": 4.555479049682617, "learning_rate": 2.3237864077669904e-05, "loss": 0.2231, "step": 5513 }, { "epoch": 53.466265060240964, "grad_norm": 3.1875030994415283, "learning_rate": 2.3233009708737865e-05, "loss": 0.2674, "step": 5514 }, { "epoch": 53.475903614457835, "grad_norm": 11.513036727905273, "learning_rate": 2.3228155339805826e-05, "loss": 0.371, "step": 5515 }, { "epoch": 53.4855421686747, "grad_norm": 2.5932705402374268, "learning_rate": 2.3223300970873787e-05, "loss": 0.1411, "step": 5516 }, { "epoch": 53.49518072289157, "grad_norm": 3.9145867824554443, "learning_rate": 2.321844660194175e-05, "loss": 0.1994, "step": 5517 }, { "epoch": 53.50481927710843, "grad_norm": 2.7709453105926514, "learning_rate": 2.321359223300971e-05, "loss": 0.1188, "step": 5518 }, { "epoch": 53.5144578313253, "grad_norm": 3.853254795074463, "learning_rate": 2.320873786407767e-05, "loss": 0.1319, "step": 5519 }, { "epoch": 53.524096385542165, "grad_norm": 4.17651891708374, "learning_rate": 2.3203883495145632e-05, "loss": 0.2917, "step": 5520 }, { "epoch": 53.533734939759036, "grad_norm": 6.2126545906066895, "learning_rate": 2.3199029126213593e-05, "loss": 0.1985, "step": 5521 }, { "epoch": 53.543373493975906, "grad_norm": 6.255090236663818, "learning_rate": 2.3194174757281554e-05, "loss": 0.1605, "step": 5522 }, { "epoch": 53.55301204819277, "grad_norm": 5.321601390838623, "learning_rate": 2.3189320388349515e-05, "loss": 0.1178, "step": 5523 }, { "epoch": 53.56265060240964, "grad_norm": 10.618165969848633, "learning_rate": 2.3184466019417476e-05, "loss": 0.3473, "step": 5524 }, { "epoch": 53.5722891566265, "grad_norm": 2.8693180084228516, "learning_rate": 2.3179611650485438e-05, "loss": 0.2673, "step": 5525 }, { "epoch": 53.581927710843374, "grad_norm": 16.120555877685547, "learning_rate": 2.31747572815534e-05, "loss": 0.4991, "step": 5526 }, { "epoch": 53.591566265060244, "grad_norm": 4.422208786010742, "learning_rate": 2.316990291262136e-05, "loss": 0.1164, "step": 5527 }, { "epoch": 53.60120481927711, "grad_norm": 2.7924771308898926, "learning_rate": 2.316504854368932e-05, "loss": 0.2713, "step": 5528 }, { "epoch": 53.61084337349398, "grad_norm": 0.8885562419891357, "learning_rate": 2.3160194174757282e-05, "loss": 0.0614, "step": 5529 }, { "epoch": 53.62048192771084, "grad_norm": 3.8675525188446045, "learning_rate": 2.3155339805825243e-05, "loss": 0.2894, "step": 5530 }, { "epoch": 53.63012048192771, "grad_norm": 1.1022577285766602, "learning_rate": 2.3150485436893205e-05, "loss": 0.2366, "step": 5531 }, { "epoch": 53.639759036144575, "grad_norm": 6.477520942687988, "learning_rate": 2.3145631067961166e-05, "loss": 0.3877, "step": 5532 }, { "epoch": 53.649397590361446, "grad_norm": 7.854410648345947, "learning_rate": 2.3140776699029127e-05, "loss": 0.2053, "step": 5533 }, { "epoch": 53.659036144578316, "grad_norm": 6.725521564483643, "learning_rate": 2.3135922330097088e-05, "loss": 0.3516, "step": 5534 }, { "epoch": 53.66867469879518, "grad_norm": 6.441816329956055, "learning_rate": 2.3131067961165052e-05, "loss": 0.2405, "step": 5535 }, { "epoch": 53.67831325301205, "grad_norm": 2.857706308364868, "learning_rate": 2.3126213592233014e-05, "loss": 0.3965, "step": 5536 }, { "epoch": 53.68795180722891, "grad_norm": 4.67800235748291, "learning_rate": 2.312135922330097e-05, "loss": 0.1662, "step": 5537 }, { "epoch": 53.69759036144578, "grad_norm": 18.0670166015625, "learning_rate": 2.3116504854368933e-05, "loss": 0.2303, "step": 5538 }, { "epoch": 53.707228915662654, "grad_norm": 6.979790210723877, "learning_rate": 2.3111650485436894e-05, "loss": 0.2165, "step": 5539 }, { "epoch": 53.71686746987952, "grad_norm": 9.189973831176758, "learning_rate": 2.3106796116504855e-05, "loss": 0.327, "step": 5540 }, { "epoch": 53.72650602409639, "grad_norm": 27.235715866088867, "learning_rate": 2.3101941747572816e-05, "loss": 0.519, "step": 5541 }, { "epoch": 53.73614457831325, "grad_norm": 12.245851516723633, "learning_rate": 2.3097087378640777e-05, "loss": 0.2472, "step": 5542 }, { "epoch": 53.74578313253012, "grad_norm": 8.250648498535156, "learning_rate": 2.3092233009708738e-05, "loss": 0.2821, "step": 5543 }, { "epoch": 53.755421686746985, "grad_norm": 26.80681037902832, "learning_rate": 2.30873786407767e-05, "loss": 0.3185, "step": 5544 }, { "epoch": 53.765060240963855, "grad_norm": 4.334231376647949, "learning_rate": 2.308252427184466e-05, "loss": 0.2023, "step": 5545 }, { "epoch": 53.774698795180726, "grad_norm": 4.874115943908691, "learning_rate": 2.307766990291262e-05, "loss": 0.3238, "step": 5546 }, { "epoch": 53.78433734939759, "grad_norm": 2.259296178817749, "learning_rate": 2.3072815533980583e-05, "loss": 0.1923, "step": 5547 }, { "epoch": 53.79397590361446, "grad_norm": 16.315845489501953, "learning_rate": 2.3067961165048547e-05, "loss": 0.243, "step": 5548 }, { "epoch": 53.80361445783132, "grad_norm": 3.585378646850586, "learning_rate": 2.306310679611651e-05, "loss": 0.1148, "step": 5549 }, { "epoch": 53.81325301204819, "grad_norm": 5.026650905609131, "learning_rate": 2.305825242718447e-05, "loss": 0.2303, "step": 5550 }, { "epoch": 53.82289156626506, "grad_norm": 13.182318687438965, "learning_rate": 2.3053398058252427e-05, "loss": 0.349, "step": 5551 }, { "epoch": 53.83253012048193, "grad_norm": 3.372854232788086, "learning_rate": 2.304854368932039e-05, "loss": 0.1916, "step": 5552 }, { "epoch": 53.8421686746988, "grad_norm": 4.302036762237549, "learning_rate": 2.304368932038835e-05, "loss": 0.2466, "step": 5553 }, { "epoch": 53.85180722891566, "grad_norm": 3.685239315032959, "learning_rate": 2.303883495145631e-05, "loss": 0.2505, "step": 5554 }, { "epoch": 53.86144578313253, "grad_norm": 3.135511875152588, "learning_rate": 2.3033980582524272e-05, "loss": 0.166, "step": 5555 }, { "epoch": 53.871084337349394, "grad_norm": 6.0662312507629395, "learning_rate": 2.3029126213592233e-05, "loss": 0.2434, "step": 5556 }, { "epoch": 53.880722891566265, "grad_norm": 4.2363786697387695, "learning_rate": 2.3024271844660194e-05, "loss": 0.2584, "step": 5557 }, { "epoch": 53.890361445783135, "grad_norm": 5.2897844314575195, "learning_rate": 2.3019417475728155e-05, "loss": 0.2872, "step": 5558 }, { "epoch": 53.9, "grad_norm": 4.227365016937256, "learning_rate": 2.3014563106796117e-05, "loss": 0.1381, "step": 5559 }, { "epoch": 53.90963855421687, "grad_norm": 12.020668983459473, "learning_rate": 2.3009708737864078e-05, "loss": 0.3968, "step": 5560 }, { "epoch": 53.91927710843373, "grad_norm": 4.409341812133789, "learning_rate": 2.3004854368932042e-05, "loss": 0.2063, "step": 5561 }, { "epoch": 53.9289156626506, "grad_norm": 4.650484085083008, "learning_rate": 2.3000000000000003e-05, "loss": 0.2927, "step": 5562 }, { "epoch": 53.93855421686747, "grad_norm": 17.31369400024414, "learning_rate": 2.2995145631067965e-05, "loss": 0.1713, "step": 5563 }, { "epoch": 53.94819277108434, "grad_norm": 4.277168273925781, "learning_rate": 2.2990291262135922e-05, "loss": 0.2873, "step": 5564 }, { "epoch": 53.95783132530121, "grad_norm": 2.2365550994873047, "learning_rate": 2.2985436893203883e-05, "loss": 0.1064, "step": 5565 }, { "epoch": 53.96746987951807, "grad_norm": 4.5370192527771, "learning_rate": 2.2980582524271845e-05, "loss": 0.2228, "step": 5566 }, { "epoch": 53.97710843373494, "grad_norm": 12.243633270263672, "learning_rate": 2.2975728155339806e-05, "loss": 0.3212, "step": 5567 }, { "epoch": 53.986746987951804, "grad_norm": 4.688271999359131, "learning_rate": 2.2970873786407767e-05, "loss": 0.3328, "step": 5568 }, { "epoch": 54.00240963855422, "grad_norm": 4.924164295196533, "learning_rate": 2.2966019417475728e-05, "loss": 0.202, "step": 5569 }, { "epoch": 54.01204819277108, "grad_norm": 4.108496189117432, "learning_rate": 2.296116504854369e-05, "loss": 0.2548, "step": 5570 }, { "epoch": 54.02168674698795, "grad_norm": 12.179654121398926, "learning_rate": 2.295631067961165e-05, "loss": 0.398, "step": 5571 }, { "epoch": 54.03132530120482, "grad_norm": 6.4367756843566895, "learning_rate": 2.295145631067961e-05, "loss": 0.3102, "step": 5572 }, { "epoch": 54.04096385542169, "grad_norm": 5.392534255981445, "learning_rate": 2.2946601941747576e-05, "loss": 0.4616, "step": 5573 }, { "epoch": 54.05060240963856, "grad_norm": 5.097750186920166, "learning_rate": 2.2941747572815537e-05, "loss": 0.1465, "step": 5574 }, { "epoch": 54.06024096385542, "grad_norm": 6.282725811004639, "learning_rate": 2.2936893203883498e-05, "loss": 0.3044, "step": 5575 }, { "epoch": 54.06987951807229, "grad_norm": 4.908473014831543, "learning_rate": 2.293203883495146e-05, "loss": 0.2055, "step": 5576 }, { "epoch": 54.079518072289154, "grad_norm": 10.489745140075684, "learning_rate": 2.292718446601942e-05, "loss": 0.2677, "step": 5577 }, { "epoch": 54.089156626506025, "grad_norm": 3.2493090629577637, "learning_rate": 2.292233009708738e-05, "loss": 0.2202, "step": 5578 }, { "epoch": 54.09879518072289, "grad_norm": 4.241769790649414, "learning_rate": 2.291747572815534e-05, "loss": 0.194, "step": 5579 }, { "epoch": 54.10843373493976, "grad_norm": 9.371991157531738, "learning_rate": 2.29126213592233e-05, "loss": 0.2314, "step": 5580 }, { "epoch": 54.11807228915663, "grad_norm": 2.8229455947875977, "learning_rate": 2.2907766990291262e-05, "loss": 0.2155, "step": 5581 }, { "epoch": 54.12771084337349, "grad_norm": 5.833423137664795, "learning_rate": 2.2902912621359223e-05, "loss": 0.1795, "step": 5582 }, { "epoch": 54.13734939759036, "grad_norm": 10.75684928894043, "learning_rate": 2.2898058252427184e-05, "loss": 0.3371, "step": 5583 }, { "epoch": 54.146987951807226, "grad_norm": 5.956971168518066, "learning_rate": 2.2893203883495145e-05, "loss": 0.3676, "step": 5584 }, { "epoch": 54.1566265060241, "grad_norm": 4.189235210418701, "learning_rate": 2.2888349514563106e-05, "loss": 0.2058, "step": 5585 }, { "epoch": 54.16626506024097, "grad_norm": 5.090167045593262, "learning_rate": 2.288349514563107e-05, "loss": 0.2712, "step": 5586 }, { "epoch": 54.17590361445783, "grad_norm": 6.289417743682861, "learning_rate": 2.2878640776699032e-05, "loss": 0.2985, "step": 5587 }, { "epoch": 54.1855421686747, "grad_norm": 3.291260004043579, "learning_rate": 2.2873786407766993e-05, "loss": 0.0833, "step": 5588 }, { "epoch": 54.195180722891564, "grad_norm": 3.8504066467285156, "learning_rate": 2.2868932038834954e-05, "loss": 0.2394, "step": 5589 }, { "epoch": 54.204819277108435, "grad_norm": 5.4989914894104, "learning_rate": 2.2864077669902915e-05, "loss": 0.3254, "step": 5590 }, { "epoch": 54.2144578313253, "grad_norm": 5.303149223327637, "learning_rate": 2.2859223300970877e-05, "loss": 0.2942, "step": 5591 }, { "epoch": 54.22409638554217, "grad_norm": 1.2534527778625488, "learning_rate": 2.2854368932038834e-05, "loss": 0.1628, "step": 5592 }, { "epoch": 54.23373493975904, "grad_norm": 3.4342260360717773, "learning_rate": 2.2849514563106796e-05, "loss": 0.1433, "step": 5593 }, { "epoch": 54.2433734939759, "grad_norm": 12.355120658874512, "learning_rate": 2.2844660194174757e-05, "loss": 0.2779, "step": 5594 }, { "epoch": 54.25301204819277, "grad_norm": 4.533487319946289, "learning_rate": 2.2839805825242718e-05, "loss": 0.2943, "step": 5595 }, { "epoch": 54.262650602409636, "grad_norm": 4.0026326179504395, "learning_rate": 2.283495145631068e-05, "loss": 0.1825, "step": 5596 }, { "epoch": 54.272289156626506, "grad_norm": 7.935513496398926, "learning_rate": 2.283009708737864e-05, "loss": 0.3631, "step": 5597 }, { "epoch": 54.28192771084338, "grad_norm": 1.6842514276504517, "learning_rate": 2.28252427184466e-05, "loss": 0.1912, "step": 5598 }, { "epoch": 54.29156626506024, "grad_norm": 2.942446231842041, "learning_rate": 2.2820388349514566e-05, "loss": 0.1393, "step": 5599 }, { "epoch": 54.30120481927711, "grad_norm": 2.616389274597168, "learning_rate": 2.2815533980582527e-05, "loss": 0.0908, "step": 5600 }, { "epoch": 54.310843373493974, "grad_norm": 4.8052978515625, "learning_rate": 2.2810679611650488e-05, "loss": 0.2611, "step": 5601 }, { "epoch": 54.320481927710844, "grad_norm": 16.172496795654297, "learning_rate": 2.280582524271845e-05, "loss": 0.2955, "step": 5602 }, { "epoch": 54.33012048192771, "grad_norm": 6.728939533233643, "learning_rate": 2.280097087378641e-05, "loss": 0.4596, "step": 5603 }, { "epoch": 54.33975903614458, "grad_norm": 7.55000638961792, "learning_rate": 2.279611650485437e-05, "loss": 0.4361, "step": 5604 }, { "epoch": 54.34939759036145, "grad_norm": 5.444432258605957, "learning_rate": 2.2791262135922333e-05, "loss": 0.1986, "step": 5605 }, { "epoch": 54.35903614457831, "grad_norm": 1.7287808656692505, "learning_rate": 2.278640776699029e-05, "loss": 0.1199, "step": 5606 }, { "epoch": 54.36867469879518, "grad_norm": 3.853917360305786, "learning_rate": 2.278155339805825e-05, "loss": 0.0805, "step": 5607 }, { "epoch": 54.378313253012045, "grad_norm": 2.8250598907470703, "learning_rate": 2.2776699029126213e-05, "loss": 0.2283, "step": 5608 }, { "epoch": 54.387951807228916, "grad_norm": 2.884711503982544, "learning_rate": 2.2771844660194174e-05, "loss": 0.1369, "step": 5609 }, { "epoch": 54.397590361445786, "grad_norm": 12.116527557373047, "learning_rate": 2.2766990291262135e-05, "loss": 0.3674, "step": 5610 }, { "epoch": 54.40722891566265, "grad_norm": 1.883861780166626, "learning_rate": 2.27621359223301e-05, "loss": 0.1008, "step": 5611 }, { "epoch": 54.41686746987952, "grad_norm": 2.4837284088134766, "learning_rate": 2.275728155339806e-05, "loss": 0.1239, "step": 5612 }, { "epoch": 54.42650602409638, "grad_norm": 4.687939167022705, "learning_rate": 2.2752427184466022e-05, "loss": 0.0957, "step": 5613 }, { "epoch": 54.436144578313254, "grad_norm": 4.001872539520264, "learning_rate": 2.2747572815533983e-05, "loss": 0.2264, "step": 5614 }, { "epoch": 54.44578313253012, "grad_norm": 3.9497666358947754, "learning_rate": 2.2742718446601944e-05, "loss": 0.2658, "step": 5615 }, { "epoch": 54.45542168674699, "grad_norm": 3.7489001750946045, "learning_rate": 2.2737864077669905e-05, "loss": 0.2878, "step": 5616 }, { "epoch": 54.46506024096386, "grad_norm": 2.5993881225585938, "learning_rate": 2.2733009708737866e-05, "loss": 0.1917, "step": 5617 }, { "epoch": 54.47469879518072, "grad_norm": 3.34987211227417, "learning_rate": 2.2728155339805828e-05, "loss": 0.1895, "step": 5618 }, { "epoch": 54.48433734939759, "grad_norm": 12.341931343078613, "learning_rate": 2.272330097087379e-05, "loss": 0.366, "step": 5619 }, { "epoch": 54.493975903614455, "grad_norm": 1.349043607711792, "learning_rate": 2.2718446601941746e-05, "loss": 0.1499, "step": 5620 }, { "epoch": 54.503614457831326, "grad_norm": 4.589273452758789, "learning_rate": 2.2713592233009708e-05, "loss": 0.3743, "step": 5621 }, { "epoch": 54.513253012048196, "grad_norm": 4.070921421051025, "learning_rate": 2.270873786407767e-05, "loss": 0.2528, "step": 5622 }, { "epoch": 54.52289156626506, "grad_norm": 3.1329994201660156, "learning_rate": 2.270388349514563e-05, "loss": 0.4102, "step": 5623 }, { "epoch": 54.53253012048193, "grad_norm": 6.283277988433838, "learning_rate": 2.2699029126213594e-05, "loss": 0.2112, "step": 5624 }, { "epoch": 54.54216867469879, "grad_norm": 4.06771993637085, "learning_rate": 2.2694174757281556e-05, "loss": 0.2731, "step": 5625 }, { "epoch": 54.55180722891566, "grad_norm": 4.346475601196289, "learning_rate": 2.2689320388349517e-05, "loss": 0.1827, "step": 5626 }, { "epoch": 54.56144578313253, "grad_norm": 5.794171333312988, "learning_rate": 2.2684466019417478e-05, "loss": 0.3788, "step": 5627 }, { "epoch": 54.5710843373494, "grad_norm": 5.308287620544434, "learning_rate": 2.267961165048544e-05, "loss": 0.0928, "step": 5628 }, { "epoch": 54.58072289156627, "grad_norm": 6.412263870239258, "learning_rate": 2.26747572815534e-05, "loss": 0.2513, "step": 5629 }, { "epoch": 54.59036144578313, "grad_norm": 5.3738932609558105, "learning_rate": 2.266990291262136e-05, "loss": 0.4125, "step": 5630 }, { "epoch": 54.6, "grad_norm": 3.2835915088653564, "learning_rate": 2.2665048543689322e-05, "loss": 0.379, "step": 5631 }, { "epoch": 54.609638554216865, "grad_norm": 3.3483681678771973, "learning_rate": 2.2660194174757284e-05, "loss": 0.2241, "step": 5632 }, { "epoch": 54.619277108433735, "grad_norm": 6.3096089363098145, "learning_rate": 2.2655339805825245e-05, "loss": 0.2466, "step": 5633 }, { "epoch": 54.628915662650606, "grad_norm": 3.810109853744507, "learning_rate": 2.2650485436893203e-05, "loss": 0.1779, "step": 5634 }, { "epoch": 54.63855421686747, "grad_norm": 4.787976264953613, "learning_rate": 2.2645631067961164e-05, "loss": 0.2809, "step": 5635 }, { "epoch": 54.64819277108434, "grad_norm": 3.45512056350708, "learning_rate": 2.2640776699029128e-05, "loss": 0.1685, "step": 5636 }, { "epoch": 54.6578313253012, "grad_norm": 14.104994773864746, "learning_rate": 2.263592233009709e-05, "loss": 0.4289, "step": 5637 }, { "epoch": 54.66746987951807, "grad_norm": 9.523648262023926, "learning_rate": 2.263106796116505e-05, "loss": 0.2463, "step": 5638 }, { "epoch": 54.67710843373494, "grad_norm": 3.5743415355682373, "learning_rate": 2.262621359223301e-05, "loss": 0.1384, "step": 5639 }, { "epoch": 54.68674698795181, "grad_norm": 7.556832313537598, "learning_rate": 2.2621359223300973e-05, "loss": 0.1828, "step": 5640 }, { "epoch": 54.69638554216868, "grad_norm": 2.036055326461792, "learning_rate": 2.2616504854368934e-05, "loss": 0.141, "step": 5641 }, { "epoch": 54.70602409638554, "grad_norm": 22.328460693359375, "learning_rate": 2.2611650485436895e-05, "loss": 0.1946, "step": 5642 }, { "epoch": 54.71566265060241, "grad_norm": 3.3345184326171875, "learning_rate": 2.2606796116504856e-05, "loss": 0.2891, "step": 5643 }, { "epoch": 54.725301204819274, "grad_norm": 8.252508163452148, "learning_rate": 2.2601941747572817e-05, "loss": 0.367, "step": 5644 }, { "epoch": 54.734939759036145, "grad_norm": 7.276041030883789, "learning_rate": 2.259708737864078e-05, "loss": 0.3274, "step": 5645 }, { "epoch": 54.744578313253015, "grad_norm": 10.49498462677002, "learning_rate": 2.259223300970874e-05, "loss": 0.2701, "step": 5646 }, { "epoch": 54.75421686746988, "grad_norm": 5.396079063415527, "learning_rate": 2.2587378640776697e-05, "loss": 0.3684, "step": 5647 }, { "epoch": 54.76385542168675, "grad_norm": 2.502063035964966, "learning_rate": 2.258252427184466e-05, "loss": 0.1932, "step": 5648 }, { "epoch": 54.77349397590361, "grad_norm": 2.1831655502319336, "learning_rate": 2.2577669902912623e-05, "loss": 0.2451, "step": 5649 }, { "epoch": 54.78313253012048, "grad_norm": 5.460685729980469, "learning_rate": 2.2572815533980584e-05, "loss": 0.3216, "step": 5650 }, { "epoch": 54.792771084337346, "grad_norm": 9.565932273864746, "learning_rate": 2.2567961165048545e-05, "loss": 0.4144, "step": 5651 }, { "epoch": 54.80240963855422, "grad_norm": 3.144564628601074, "learning_rate": 2.2563106796116507e-05, "loss": 0.2146, "step": 5652 }, { "epoch": 54.81204819277109, "grad_norm": 3.5488333702087402, "learning_rate": 2.2558252427184468e-05, "loss": 0.2152, "step": 5653 }, { "epoch": 54.82168674698795, "grad_norm": 10.423502922058105, "learning_rate": 2.255339805825243e-05, "loss": 0.429, "step": 5654 }, { "epoch": 54.83132530120482, "grad_norm": 8.569419860839844, "learning_rate": 2.254854368932039e-05, "loss": 0.2428, "step": 5655 }, { "epoch": 54.840963855421684, "grad_norm": 2.968836545944214, "learning_rate": 2.254368932038835e-05, "loss": 0.1841, "step": 5656 }, { "epoch": 54.850602409638554, "grad_norm": 5.171032905578613, "learning_rate": 2.2538834951456312e-05, "loss": 0.2725, "step": 5657 }, { "epoch": 54.860240963855425, "grad_norm": 9.308469772338867, "learning_rate": 2.2533980582524273e-05, "loss": 0.209, "step": 5658 }, { "epoch": 54.86987951807229, "grad_norm": 5.431859970092773, "learning_rate": 2.2529126213592235e-05, "loss": 0.4123, "step": 5659 }, { "epoch": 54.87951807228916, "grad_norm": 5.288876533508301, "learning_rate": 2.2524271844660196e-05, "loss": 0.1406, "step": 5660 }, { "epoch": 54.88915662650602, "grad_norm": 2.403928756713867, "learning_rate": 2.2519417475728153e-05, "loss": 0.1512, "step": 5661 }, { "epoch": 54.89879518072289, "grad_norm": 2.944852113723755, "learning_rate": 2.2514563106796118e-05, "loss": 0.1414, "step": 5662 }, { "epoch": 54.908433734939756, "grad_norm": 6.070284366607666, "learning_rate": 2.250970873786408e-05, "loss": 0.3486, "step": 5663 }, { "epoch": 54.918072289156626, "grad_norm": 2.489288330078125, "learning_rate": 2.250485436893204e-05, "loss": 0.1906, "step": 5664 }, { "epoch": 54.9277108433735, "grad_norm": 4.202097415924072, "learning_rate": 2.25e-05, "loss": 0.2432, "step": 5665 }, { "epoch": 54.93734939759036, "grad_norm": 3.7778844833374023, "learning_rate": 2.2495145631067963e-05, "loss": 0.2521, "step": 5666 }, { "epoch": 54.94698795180723, "grad_norm": 9.980147361755371, "learning_rate": 2.2490291262135924e-05, "loss": 0.2129, "step": 5667 }, { "epoch": 54.956626506024094, "grad_norm": 4.891349792480469, "learning_rate": 2.2485436893203885e-05, "loss": 0.4486, "step": 5668 }, { "epoch": 54.966265060240964, "grad_norm": 6.559269428253174, "learning_rate": 2.2480582524271846e-05, "loss": 0.1612, "step": 5669 }, { "epoch": 54.975903614457835, "grad_norm": 4.194281101226807, "learning_rate": 2.2475728155339807e-05, "loss": 0.3918, "step": 5670 }, { "epoch": 54.9855421686747, "grad_norm": 6.133951187133789, "learning_rate": 2.2470873786407768e-05, "loss": 0.2591, "step": 5671 }, { "epoch": 55.001204819277106, "grad_norm": 7.58281946182251, "learning_rate": 2.246601941747573e-05, "loss": 0.2347, "step": 5672 }, { "epoch": 55.01084337349398, "grad_norm": 6.623717784881592, "learning_rate": 2.246116504854369e-05, "loss": 0.3543, "step": 5673 }, { "epoch": 55.02048192771084, "grad_norm": 3.9425899982452393, "learning_rate": 2.2456310679611652e-05, "loss": 0.1935, "step": 5674 }, { "epoch": 55.03012048192771, "grad_norm": 6.581514358520508, "learning_rate": 2.2451456310679613e-05, "loss": 0.4674, "step": 5675 }, { "epoch": 55.03975903614458, "grad_norm": 4.257002353668213, "learning_rate": 2.2446601941747574e-05, "loss": 0.327, "step": 5676 }, { "epoch": 55.049397590361444, "grad_norm": 4.547857761383057, "learning_rate": 2.2441747572815535e-05, "loss": 0.2046, "step": 5677 }, { "epoch": 55.059036144578315, "grad_norm": 13.3339204788208, "learning_rate": 2.2436893203883496e-05, "loss": 0.1845, "step": 5678 }, { "epoch": 55.06867469879518, "grad_norm": 2.9931371212005615, "learning_rate": 2.2432038834951457e-05, "loss": 0.1972, "step": 5679 }, { "epoch": 55.07831325301205, "grad_norm": 3.379887580871582, "learning_rate": 2.242718446601942e-05, "loss": 0.1381, "step": 5680 }, { "epoch": 55.08795180722892, "grad_norm": 3.6180996894836426, "learning_rate": 2.242233009708738e-05, "loss": 0.1199, "step": 5681 }, { "epoch": 55.09759036144578, "grad_norm": 7.754327774047852, "learning_rate": 2.241747572815534e-05, "loss": 0.4479, "step": 5682 }, { "epoch": 55.10722891566265, "grad_norm": 4.831622123718262, "learning_rate": 2.2412621359223302e-05, "loss": 0.2613, "step": 5683 }, { "epoch": 55.116867469879516, "grad_norm": 1.7438362836837769, "learning_rate": 2.2407766990291263e-05, "loss": 0.0872, "step": 5684 }, { "epoch": 55.126506024096386, "grad_norm": 7.8906755447387695, "learning_rate": 2.2402912621359224e-05, "loss": 0.4351, "step": 5685 }, { "epoch": 55.13614457831325, "grad_norm": 6.908262252807617, "learning_rate": 2.2398058252427185e-05, "loss": 0.2613, "step": 5686 }, { "epoch": 55.14578313253012, "grad_norm": 3.765026330947876, "learning_rate": 2.2393203883495147e-05, "loss": 0.1775, "step": 5687 }, { "epoch": 55.15542168674699, "grad_norm": 2.6414570808410645, "learning_rate": 2.2388349514563108e-05, "loss": 0.1979, "step": 5688 }, { "epoch": 55.165060240963854, "grad_norm": 3.890772581100464, "learning_rate": 2.238349514563107e-05, "loss": 0.2599, "step": 5689 }, { "epoch": 55.174698795180724, "grad_norm": 5.23063325881958, "learning_rate": 2.237864077669903e-05, "loss": 0.1712, "step": 5690 }, { "epoch": 55.18433734939759, "grad_norm": 29.552608489990234, "learning_rate": 2.237378640776699e-05, "loss": 0.2621, "step": 5691 }, { "epoch": 55.19397590361446, "grad_norm": 3.3473992347717285, "learning_rate": 2.2368932038834952e-05, "loss": 0.2479, "step": 5692 }, { "epoch": 55.20361445783133, "grad_norm": 2.0375497341156006, "learning_rate": 2.2364077669902913e-05, "loss": 0.1427, "step": 5693 }, { "epoch": 55.21325301204819, "grad_norm": 11.184996604919434, "learning_rate": 2.2359223300970875e-05, "loss": 0.1918, "step": 5694 }, { "epoch": 55.22289156626506, "grad_norm": 4.350978851318359, "learning_rate": 2.2354368932038836e-05, "loss": 0.1569, "step": 5695 }, { "epoch": 55.232530120481925, "grad_norm": 4.626218795776367, "learning_rate": 2.2349514563106797e-05, "loss": 0.185, "step": 5696 }, { "epoch": 55.242168674698796, "grad_norm": 4.456257343292236, "learning_rate": 2.2344660194174758e-05, "loss": 0.2372, "step": 5697 }, { "epoch": 55.25180722891566, "grad_norm": 4.332968711853027, "learning_rate": 2.233980582524272e-05, "loss": 0.2723, "step": 5698 }, { "epoch": 55.26144578313253, "grad_norm": 7.495181083679199, "learning_rate": 2.233495145631068e-05, "loss": 0.3151, "step": 5699 }, { "epoch": 55.2710843373494, "grad_norm": 7.833316802978516, "learning_rate": 2.233009708737864e-05, "loss": 0.2325, "step": 5700 }, { "epoch": 55.28072289156626, "grad_norm": 5.552583694458008, "learning_rate": 2.2325242718446603e-05, "loss": 0.2025, "step": 5701 }, { "epoch": 55.290361445783134, "grad_norm": 3.8234825134277344, "learning_rate": 2.2320388349514564e-05, "loss": 0.1875, "step": 5702 }, { "epoch": 55.3, "grad_norm": 3.006284475326538, "learning_rate": 2.2315533980582525e-05, "loss": 0.2424, "step": 5703 }, { "epoch": 55.30963855421687, "grad_norm": 6.366328716278076, "learning_rate": 2.2310679611650486e-05, "loss": 0.3199, "step": 5704 }, { "epoch": 55.31927710843374, "grad_norm": 4.5601606369018555, "learning_rate": 2.2305825242718447e-05, "loss": 0.2283, "step": 5705 }, { "epoch": 55.3289156626506, "grad_norm": 9.029167175292969, "learning_rate": 2.230097087378641e-05, "loss": 0.1488, "step": 5706 }, { "epoch": 55.33855421686747, "grad_norm": 8.790006637573242, "learning_rate": 2.229611650485437e-05, "loss": 0.1864, "step": 5707 }, { "epoch": 55.348192771084335, "grad_norm": 6.532261848449707, "learning_rate": 2.229126213592233e-05, "loss": 0.3428, "step": 5708 }, { "epoch": 55.357831325301206, "grad_norm": 3.242220163345337, "learning_rate": 2.2286407766990292e-05, "loss": 0.2469, "step": 5709 }, { "epoch": 55.36746987951807, "grad_norm": 6.252491474151611, "learning_rate": 2.2281553398058253e-05, "loss": 0.2749, "step": 5710 }, { "epoch": 55.37710843373494, "grad_norm": 11.94841194152832, "learning_rate": 2.2276699029126214e-05, "loss": 0.2239, "step": 5711 }, { "epoch": 55.38674698795181, "grad_norm": 8.511874198913574, "learning_rate": 2.227184466019418e-05, "loss": 0.2543, "step": 5712 }, { "epoch": 55.39638554216867, "grad_norm": 7.930388450622559, "learning_rate": 2.2266990291262136e-05, "loss": 0.3916, "step": 5713 }, { "epoch": 55.40602409638554, "grad_norm": 12.074884414672852, "learning_rate": 2.2262135922330098e-05, "loss": 0.2938, "step": 5714 }, { "epoch": 55.41566265060241, "grad_norm": 3.492452383041382, "learning_rate": 2.225728155339806e-05, "loss": 0.1986, "step": 5715 }, { "epoch": 55.42530120481928, "grad_norm": 4.6670708656311035, "learning_rate": 2.225242718446602e-05, "loss": 0.2176, "step": 5716 }, { "epoch": 55.43493975903615, "grad_norm": 9.561995506286621, "learning_rate": 2.224757281553398e-05, "loss": 0.2663, "step": 5717 }, { "epoch": 55.44457831325301, "grad_norm": 3.92775559425354, "learning_rate": 2.2242718446601942e-05, "loss": 0.2849, "step": 5718 }, { "epoch": 55.45421686746988, "grad_norm": 8.071988105773926, "learning_rate": 2.2237864077669903e-05, "loss": 0.4255, "step": 5719 }, { "epoch": 55.463855421686745, "grad_norm": 4.272641658782959, "learning_rate": 2.2233009708737864e-05, "loss": 0.1775, "step": 5720 }, { "epoch": 55.473493975903615, "grad_norm": 4.251200199127197, "learning_rate": 2.2228155339805826e-05, "loss": 0.0779, "step": 5721 }, { "epoch": 55.48313253012048, "grad_norm": 6.670562744140625, "learning_rate": 2.2223300970873787e-05, "loss": 0.4905, "step": 5722 }, { "epoch": 55.49277108433735, "grad_norm": 3.9404852390289307, "learning_rate": 2.2218446601941748e-05, "loss": 0.1479, "step": 5723 }, { "epoch": 55.50240963855422, "grad_norm": 6.5663886070251465, "learning_rate": 2.221359223300971e-05, "loss": 0.1973, "step": 5724 }, { "epoch": 55.51204819277108, "grad_norm": 4.140435218811035, "learning_rate": 2.2208737864077674e-05, "loss": 0.2052, "step": 5725 }, { "epoch": 55.52168674698795, "grad_norm": 9.17310619354248, "learning_rate": 2.2203883495145635e-05, "loss": 0.2235, "step": 5726 }, { "epoch": 55.53132530120482, "grad_norm": 7.297004222869873, "learning_rate": 2.2199029126213592e-05, "loss": 0.324, "step": 5727 }, { "epoch": 55.54096385542169, "grad_norm": 2.1241049766540527, "learning_rate": 2.2194174757281554e-05, "loss": 0.1996, "step": 5728 }, { "epoch": 55.55060240963856, "grad_norm": 5.880243301391602, "learning_rate": 2.2189320388349515e-05, "loss": 0.4394, "step": 5729 }, { "epoch": 55.56024096385542, "grad_norm": 7.690224647521973, "learning_rate": 2.2184466019417476e-05, "loss": 0.5082, "step": 5730 }, { "epoch": 55.56987951807229, "grad_norm": 6.586236000061035, "learning_rate": 2.2179611650485437e-05, "loss": 0.4781, "step": 5731 }, { "epoch": 55.579518072289154, "grad_norm": 5.689458847045898, "learning_rate": 2.2174757281553398e-05, "loss": 0.3065, "step": 5732 }, { "epoch": 55.589156626506025, "grad_norm": 3.880502939224243, "learning_rate": 2.216990291262136e-05, "loss": 0.1997, "step": 5733 }, { "epoch": 55.59879518072289, "grad_norm": 5.367125034332275, "learning_rate": 2.216504854368932e-05, "loss": 0.2351, "step": 5734 }, { "epoch": 55.60843373493976, "grad_norm": 6.134960651397705, "learning_rate": 2.216019417475728e-05, "loss": 0.2414, "step": 5735 }, { "epoch": 55.61807228915663, "grad_norm": 15.356483459472656, "learning_rate": 2.2155339805825243e-05, "loss": 0.2337, "step": 5736 }, { "epoch": 55.62771084337349, "grad_norm": 5.9272332191467285, "learning_rate": 2.2150485436893207e-05, "loss": 0.2898, "step": 5737 }, { "epoch": 55.63734939759036, "grad_norm": 7.642618179321289, "learning_rate": 2.214563106796117e-05, "loss": 0.2187, "step": 5738 }, { "epoch": 55.646987951807226, "grad_norm": 11.109512329101562, "learning_rate": 2.214077669902913e-05, "loss": 0.3831, "step": 5739 }, { "epoch": 55.6566265060241, "grad_norm": 4.055840015411377, "learning_rate": 2.213592233009709e-05, "loss": 0.2168, "step": 5740 }, { "epoch": 55.66626506024097, "grad_norm": 3.5907740592956543, "learning_rate": 2.213106796116505e-05, "loss": 0.1296, "step": 5741 }, { "epoch": 55.67590361445783, "grad_norm": 4.869705677032471, "learning_rate": 2.212621359223301e-05, "loss": 0.247, "step": 5742 }, { "epoch": 55.6855421686747, "grad_norm": 6.55357551574707, "learning_rate": 2.212135922330097e-05, "loss": 0.1959, "step": 5743 }, { "epoch": 55.695180722891564, "grad_norm": 3.730722188949585, "learning_rate": 2.2116504854368932e-05, "loss": 0.3454, "step": 5744 }, { "epoch": 55.704819277108435, "grad_norm": 4.4150261878967285, "learning_rate": 2.2111650485436893e-05, "loss": 0.2715, "step": 5745 }, { "epoch": 55.7144578313253, "grad_norm": 2.9028186798095703, "learning_rate": 2.2106796116504854e-05, "loss": 0.1623, "step": 5746 }, { "epoch": 55.72409638554217, "grad_norm": 7.1979570388793945, "learning_rate": 2.2101941747572815e-05, "loss": 0.1564, "step": 5747 }, { "epoch": 55.73373493975904, "grad_norm": 5.557929515838623, "learning_rate": 2.2097087378640777e-05, "loss": 0.3899, "step": 5748 }, { "epoch": 55.7433734939759, "grad_norm": 3.581517219543457, "learning_rate": 2.2092233009708738e-05, "loss": 0.2618, "step": 5749 }, { "epoch": 55.75301204819277, "grad_norm": 3.643500804901123, "learning_rate": 2.2087378640776702e-05, "loss": 0.2853, "step": 5750 }, { "epoch": 55.762650602409636, "grad_norm": 5.9846367835998535, "learning_rate": 2.2082524271844663e-05, "loss": 0.2443, "step": 5751 }, { "epoch": 55.772289156626506, "grad_norm": 5.9172821044921875, "learning_rate": 2.2077669902912624e-05, "loss": 0.2125, "step": 5752 }, { "epoch": 55.78192771084338, "grad_norm": 4.411606788635254, "learning_rate": 2.2072815533980586e-05, "loss": 0.1223, "step": 5753 }, { "epoch": 55.79156626506024, "grad_norm": 6.4214606285095215, "learning_rate": 2.2067961165048547e-05, "loss": 0.2352, "step": 5754 }, { "epoch": 55.80120481927711, "grad_norm": 8.289748191833496, "learning_rate": 2.2063106796116505e-05, "loss": 0.3772, "step": 5755 }, { "epoch": 55.810843373493974, "grad_norm": 9.218714714050293, "learning_rate": 2.2058252427184466e-05, "loss": 0.2403, "step": 5756 }, { "epoch": 55.820481927710844, "grad_norm": 3.3388357162475586, "learning_rate": 2.2053398058252427e-05, "loss": 0.1328, "step": 5757 }, { "epoch": 55.83012048192771, "grad_norm": 4.764873504638672, "learning_rate": 2.2048543689320388e-05, "loss": 0.2412, "step": 5758 }, { "epoch": 55.83975903614458, "grad_norm": 6.281487941741943, "learning_rate": 2.204368932038835e-05, "loss": 0.2398, "step": 5759 }, { "epoch": 55.84939759036145, "grad_norm": 7.764011383056641, "learning_rate": 2.203883495145631e-05, "loss": 0.3019, "step": 5760 }, { "epoch": 55.85903614457831, "grad_norm": 8.194562911987305, "learning_rate": 2.203398058252427e-05, "loss": 0.3087, "step": 5761 }, { "epoch": 55.86867469879518, "grad_norm": 7.547508716583252, "learning_rate": 2.2029126213592233e-05, "loss": 0.1113, "step": 5762 }, { "epoch": 55.878313253012045, "grad_norm": 19.836933135986328, "learning_rate": 2.2024271844660197e-05, "loss": 0.3466, "step": 5763 }, { "epoch": 55.887951807228916, "grad_norm": 8.292682647705078, "learning_rate": 2.2019417475728158e-05, "loss": 0.4571, "step": 5764 }, { "epoch": 55.897590361445786, "grad_norm": 6.9896626472473145, "learning_rate": 2.201456310679612e-05, "loss": 0.1359, "step": 5765 }, { "epoch": 55.90722891566265, "grad_norm": 6.105642318725586, "learning_rate": 2.200970873786408e-05, "loss": 0.3335, "step": 5766 }, { "epoch": 55.91686746987952, "grad_norm": 1.4343208074569702, "learning_rate": 2.200485436893204e-05, "loss": 0.0695, "step": 5767 }, { "epoch": 55.92650602409638, "grad_norm": 5.994871616363525, "learning_rate": 2.2000000000000003e-05, "loss": 0.2276, "step": 5768 }, { "epoch": 55.936144578313254, "grad_norm": 2.812220811843872, "learning_rate": 2.199514563106796e-05, "loss": 0.0986, "step": 5769 }, { "epoch": 55.94578313253012, "grad_norm": 4.0255608558654785, "learning_rate": 2.1990291262135922e-05, "loss": 0.3537, "step": 5770 }, { "epoch": 55.95542168674699, "grad_norm": 17.324241638183594, "learning_rate": 2.1985436893203883e-05, "loss": 0.2312, "step": 5771 }, { "epoch": 55.96506024096386, "grad_norm": 6.011123180389404, "learning_rate": 2.1980582524271844e-05, "loss": 0.3036, "step": 5772 }, { "epoch": 55.97469879518072, "grad_norm": 2.4999141693115234, "learning_rate": 2.1975728155339805e-05, "loss": 0.2542, "step": 5773 }, { "epoch": 55.98433734939759, "grad_norm": 4.619062423706055, "learning_rate": 2.1970873786407766e-05, "loss": 0.207, "step": 5774 }, { "epoch": 55.993975903614455, "grad_norm": 7.21535587310791, "learning_rate": 2.196601941747573e-05, "loss": 0.3411, "step": 5775 }, { "epoch": 56.00963855421687, "grad_norm": 2.750403881072998, "learning_rate": 2.1961165048543692e-05, "loss": 0.1931, "step": 5776 }, { "epoch": 56.019277108433734, "grad_norm": 3.717790126800537, "learning_rate": 2.1956310679611653e-05, "loss": 0.3049, "step": 5777 }, { "epoch": 56.028915662650604, "grad_norm": 9.189784049987793, "learning_rate": 2.1951456310679614e-05, "loss": 0.4532, "step": 5778 }, { "epoch": 56.03855421686747, "grad_norm": 3.307419776916504, "learning_rate": 2.1946601941747575e-05, "loss": 0.2452, "step": 5779 }, { "epoch": 56.04819277108434, "grad_norm": 3.1884164810180664, "learning_rate": 2.1941747572815537e-05, "loss": 0.2625, "step": 5780 }, { "epoch": 56.0578313253012, "grad_norm": 8.909733772277832, "learning_rate": 2.1936893203883498e-05, "loss": 0.2326, "step": 5781 }, { "epoch": 56.06746987951807, "grad_norm": 4.1544318199157715, "learning_rate": 2.193203883495146e-05, "loss": 0.1118, "step": 5782 }, { "epoch": 56.07710843373494, "grad_norm": 4.389026641845703, "learning_rate": 2.1927184466019417e-05, "loss": 0.2576, "step": 5783 }, { "epoch": 56.086746987951805, "grad_norm": 9.867205619812012, "learning_rate": 2.1922330097087378e-05, "loss": 0.3435, "step": 5784 }, { "epoch": 56.096385542168676, "grad_norm": 10.434611320495605, "learning_rate": 2.191747572815534e-05, "loss": 0.1176, "step": 5785 }, { "epoch": 56.10602409638554, "grad_norm": 4.976791858673096, "learning_rate": 2.19126213592233e-05, "loss": 0.1791, "step": 5786 }, { "epoch": 56.11566265060241, "grad_norm": 4.506697654724121, "learning_rate": 2.190776699029126e-05, "loss": 0.16, "step": 5787 }, { "epoch": 56.12530120481928, "grad_norm": 5.894115924835205, "learning_rate": 2.1902912621359226e-05, "loss": 0.2386, "step": 5788 }, { "epoch": 56.13493975903614, "grad_norm": 2.939157247543335, "learning_rate": 2.1898058252427187e-05, "loss": 0.1982, "step": 5789 }, { "epoch": 56.144578313253014, "grad_norm": 8.553075790405273, "learning_rate": 2.1893203883495148e-05, "loss": 0.2439, "step": 5790 }, { "epoch": 56.15421686746988, "grad_norm": 3.9680964946746826, "learning_rate": 2.188834951456311e-05, "loss": 0.4104, "step": 5791 }, { "epoch": 56.16385542168675, "grad_norm": 3.1082346439361572, "learning_rate": 2.188349514563107e-05, "loss": 0.1536, "step": 5792 }, { "epoch": 56.17349397590361, "grad_norm": 6.462829113006592, "learning_rate": 2.187864077669903e-05, "loss": 0.1811, "step": 5793 }, { "epoch": 56.18313253012048, "grad_norm": 4.440794467926025, "learning_rate": 2.1873786407766993e-05, "loss": 0.359, "step": 5794 }, { "epoch": 56.19277108433735, "grad_norm": 5.645678520202637, "learning_rate": 2.1868932038834954e-05, "loss": 0.2845, "step": 5795 }, { "epoch": 56.202409638554215, "grad_norm": 2.792971611022949, "learning_rate": 2.186407766990291e-05, "loss": 0.1824, "step": 5796 }, { "epoch": 56.212048192771086, "grad_norm": 5.198076248168945, "learning_rate": 2.1859223300970873e-05, "loss": 0.3221, "step": 5797 }, { "epoch": 56.22168674698795, "grad_norm": 3.681932210922241, "learning_rate": 2.1854368932038834e-05, "loss": 0.1622, "step": 5798 }, { "epoch": 56.23132530120482, "grad_norm": 4.3679609298706055, "learning_rate": 2.1849514563106795e-05, "loss": 0.1625, "step": 5799 }, { "epoch": 56.24096385542169, "grad_norm": 3.4256644248962402, "learning_rate": 2.1844660194174756e-05, "loss": 0.2414, "step": 5800 }, { "epoch": 56.25060240963855, "grad_norm": 5.185581684112549, "learning_rate": 2.183980582524272e-05, "loss": 0.3242, "step": 5801 }, { "epoch": 56.26024096385542, "grad_norm": 5.102097511291504, "learning_rate": 2.1834951456310682e-05, "loss": 0.2116, "step": 5802 }, { "epoch": 56.26987951807229, "grad_norm": 6.009120464324951, "learning_rate": 2.1830097087378643e-05, "loss": 0.1789, "step": 5803 }, { "epoch": 56.27951807228916, "grad_norm": 9.152305603027344, "learning_rate": 2.1825242718446604e-05, "loss": 0.3202, "step": 5804 }, { "epoch": 56.28915662650602, "grad_norm": 3.993398904800415, "learning_rate": 2.1820388349514565e-05, "loss": 0.2214, "step": 5805 }, { "epoch": 56.29879518072289, "grad_norm": 5.142245769500732, "learning_rate": 2.1815533980582526e-05, "loss": 0.3022, "step": 5806 }, { "epoch": 56.30843373493976, "grad_norm": 11.96977710723877, "learning_rate": 2.1810679611650487e-05, "loss": 0.1849, "step": 5807 }, { "epoch": 56.318072289156625, "grad_norm": 3.6644816398620605, "learning_rate": 2.180582524271845e-05, "loss": 0.1369, "step": 5808 }, { "epoch": 56.327710843373495, "grad_norm": 4.0794830322265625, "learning_rate": 2.180097087378641e-05, "loss": 0.2818, "step": 5809 }, { "epoch": 56.33734939759036, "grad_norm": 5.746973991394043, "learning_rate": 2.1796116504854368e-05, "loss": 0.2718, "step": 5810 }, { "epoch": 56.34698795180723, "grad_norm": 3.8217902183532715, "learning_rate": 2.179126213592233e-05, "loss": 0.1992, "step": 5811 }, { "epoch": 56.3566265060241, "grad_norm": 4.304956436157227, "learning_rate": 2.178640776699029e-05, "loss": 0.2794, "step": 5812 }, { "epoch": 56.36626506024096, "grad_norm": 4.540401458740234, "learning_rate": 2.1781553398058254e-05, "loss": 0.2575, "step": 5813 }, { "epoch": 56.37590361445783, "grad_norm": 9.085604667663574, "learning_rate": 2.1776699029126216e-05, "loss": 0.4728, "step": 5814 }, { "epoch": 56.3855421686747, "grad_norm": 3.551286458969116, "learning_rate": 2.1771844660194177e-05, "loss": 0.1065, "step": 5815 }, { "epoch": 56.39518072289157, "grad_norm": 4.937958240509033, "learning_rate": 2.1766990291262138e-05, "loss": 0.2454, "step": 5816 }, { "epoch": 56.40481927710843, "grad_norm": 3.555473566055298, "learning_rate": 2.17621359223301e-05, "loss": 0.1199, "step": 5817 }, { "epoch": 56.4144578313253, "grad_norm": 5.553986072540283, "learning_rate": 2.175728155339806e-05, "loss": 0.3872, "step": 5818 }, { "epoch": 56.42409638554217, "grad_norm": 3.0559606552124023, "learning_rate": 2.175242718446602e-05, "loss": 0.2173, "step": 5819 }, { "epoch": 56.433734939759034, "grad_norm": 8.832612991333008, "learning_rate": 2.1747572815533982e-05, "loss": 0.1436, "step": 5820 }, { "epoch": 56.443373493975905, "grad_norm": 7.815285682678223, "learning_rate": 2.1742718446601944e-05, "loss": 0.3506, "step": 5821 }, { "epoch": 56.45301204819277, "grad_norm": 12.52436637878418, "learning_rate": 2.1737864077669905e-05, "loss": 0.4963, "step": 5822 }, { "epoch": 56.46265060240964, "grad_norm": 18.8331298828125, "learning_rate": 2.1733009708737866e-05, "loss": 0.2395, "step": 5823 }, { "epoch": 56.47228915662651, "grad_norm": 6.417325973510742, "learning_rate": 2.1728155339805824e-05, "loss": 0.2419, "step": 5824 }, { "epoch": 56.48192771084337, "grad_norm": 3.8645517826080322, "learning_rate": 2.1723300970873785e-05, "loss": 0.3217, "step": 5825 }, { "epoch": 56.49156626506024, "grad_norm": 2.0730748176574707, "learning_rate": 2.171844660194175e-05, "loss": 0.1442, "step": 5826 }, { "epoch": 56.501204819277106, "grad_norm": 3.9653241634368896, "learning_rate": 2.171359223300971e-05, "loss": 0.2722, "step": 5827 }, { "epoch": 56.51084337349398, "grad_norm": 4.729495048522949, "learning_rate": 2.170873786407767e-05, "loss": 0.2051, "step": 5828 }, { "epoch": 56.52048192771084, "grad_norm": 4.8583502769470215, "learning_rate": 2.1703883495145633e-05, "loss": 0.1386, "step": 5829 }, { "epoch": 56.53012048192771, "grad_norm": 7.982233047485352, "learning_rate": 2.1699029126213594e-05, "loss": 0.4925, "step": 5830 }, { "epoch": 56.53975903614458, "grad_norm": 9.16120433807373, "learning_rate": 2.1694174757281555e-05, "loss": 0.1937, "step": 5831 }, { "epoch": 56.549397590361444, "grad_norm": 6.875736236572266, "learning_rate": 2.1689320388349516e-05, "loss": 0.3623, "step": 5832 }, { "epoch": 56.559036144578315, "grad_norm": 4.153230667114258, "learning_rate": 2.1684466019417477e-05, "loss": 0.1723, "step": 5833 }, { "epoch": 56.56867469879518, "grad_norm": 3.2710418701171875, "learning_rate": 2.167961165048544e-05, "loss": 0.1958, "step": 5834 }, { "epoch": 56.57831325301205, "grad_norm": 2.736401319503784, "learning_rate": 2.16747572815534e-05, "loss": 0.2316, "step": 5835 }, { "epoch": 56.58795180722892, "grad_norm": 5.398691177368164, "learning_rate": 2.166990291262136e-05, "loss": 0.265, "step": 5836 }, { "epoch": 56.59759036144578, "grad_norm": 35.49637985229492, "learning_rate": 2.1665048543689322e-05, "loss": 0.3655, "step": 5837 }, { "epoch": 56.60722891566265, "grad_norm": 7.84057092666626, "learning_rate": 2.1660194174757283e-05, "loss": 0.0747, "step": 5838 }, { "epoch": 56.616867469879516, "grad_norm": 4.920572280883789, "learning_rate": 2.1655339805825244e-05, "loss": 0.1835, "step": 5839 }, { "epoch": 56.626506024096386, "grad_norm": 3.170553684234619, "learning_rate": 2.1650485436893205e-05, "loss": 0.1615, "step": 5840 }, { "epoch": 56.63614457831325, "grad_norm": 4.75973653793335, "learning_rate": 2.1645631067961166e-05, "loss": 0.249, "step": 5841 }, { "epoch": 56.64578313253012, "grad_norm": 7.434493064880371, "learning_rate": 2.1640776699029128e-05, "loss": 0.3385, "step": 5842 }, { "epoch": 56.65542168674699, "grad_norm": 3.229844808578491, "learning_rate": 2.163592233009709e-05, "loss": 0.2852, "step": 5843 }, { "epoch": 56.665060240963854, "grad_norm": 4.546138763427734, "learning_rate": 2.163106796116505e-05, "loss": 0.2885, "step": 5844 }, { "epoch": 56.674698795180724, "grad_norm": 4.562071323394775, "learning_rate": 2.162621359223301e-05, "loss": 0.1274, "step": 5845 }, { "epoch": 56.68433734939759, "grad_norm": 5.8765764236450195, "learning_rate": 2.1621359223300972e-05, "loss": 0.2899, "step": 5846 }, { "epoch": 56.69397590361446, "grad_norm": 4.1740241050720215, "learning_rate": 2.1616504854368933e-05, "loss": 0.1825, "step": 5847 }, { "epoch": 56.70361445783133, "grad_norm": 7.536796569824219, "learning_rate": 2.1611650485436894e-05, "loss": 0.3575, "step": 5848 }, { "epoch": 56.71325301204819, "grad_norm": 3.6604955196380615, "learning_rate": 2.1606796116504856e-05, "loss": 0.2134, "step": 5849 }, { "epoch": 56.72289156626506, "grad_norm": 4.222436428070068, "learning_rate": 2.1601941747572817e-05, "loss": 0.2959, "step": 5850 }, { "epoch": 56.732530120481925, "grad_norm": 7.029292106628418, "learning_rate": 2.1597087378640778e-05, "loss": 0.2233, "step": 5851 }, { "epoch": 56.742168674698796, "grad_norm": 11.282459259033203, "learning_rate": 2.159223300970874e-05, "loss": 0.6118, "step": 5852 }, { "epoch": 56.75180722891566, "grad_norm": 6.997131824493408, "learning_rate": 2.15873786407767e-05, "loss": 0.2349, "step": 5853 }, { "epoch": 56.76144578313253, "grad_norm": 3.937102794647217, "learning_rate": 2.158252427184466e-05, "loss": 0.3144, "step": 5854 }, { "epoch": 56.7710843373494, "grad_norm": 16.474040985107422, "learning_rate": 2.1577669902912622e-05, "loss": 0.1999, "step": 5855 }, { "epoch": 56.78072289156626, "grad_norm": 4.588617324829102, "learning_rate": 2.1572815533980584e-05, "loss": 0.1274, "step": 5856 }, { "epoch": 56.790361445783134, "grad_norm": 4.553621768951416, "learning_rate": 2.1567961165048545e-05, "loss": 0.1815, "step": 5857 }, { "epoch": 56.8, "grad_norm": 3.70225191116333, "learning_rate": 2.1563106796116506e-05, "loss": 0.1707, "step": 5858 }, { "epoch": 56.80963855421687, "grad_norm": 5.80915641784668, "learning_rate": 2.1558252427184467e-05, "loss": 0.2127, "step": 5859 }, { "epoch": 56.81927710843374, "grad_norm": 6.211380481719971, "learning_rate": 2.1553398058252428e-05, "loss": 0.275, "step": 5860 }, { "epoch": 56.8289156626506, "grad_norm": 4.728788375854492, "learning_rate": 2.154854368932039e-05, "loss": 0.0949, "step": 5861 }, { "epoch": 56.83855421686747, "grad_norm": 6.086292743682861, "learning_rate": 2.154368932038835e-05, "loss": 0.2617, "step": 5862 }, { "epoch": 56.848192771084335, "grad_norm": 2.984172821044922, "learning_rate": 2.153883495145631e-05, "loss": 0.0912, "step": 5863 }, { "epoch": 56.857831325301206, "grad_norm": 4.266563892364502, "learning_rate": 2.1533980582524273e-05, "loss": 0.1894, "step": 5864 }, { "epoch": 56.86746987951807, "grad_norm": 8.615265846252441, "learning_rate": 2.1529126213592234e-05, "loss": 0.1926, "step": 5865 }, { "epoch": 56.87710843373494, "grad_norm": 3.6090447902679443, "learning_rate": 2.1524271844660195e-05, "loss": 0.1393, "step": 5866 }, { "epoch": 56.88674698795181, "grad_norm": 7.364493370056152, "learning_rate": 2.1519417475728156e-05, "loss": 0.2457, "step": 5867 }, { "epoch": 56.89638554216867, "grad_norm": 5.503935813903809, "learning_rate": 2.1514563106796117e-05, "loss": 0.2329, "step": 5868 }, { "epoch": 56.90602409638554, "grad_norm": 5.317118167877197, "learning_rate": 2.150970873786408e-05, "loss": 0.4081, "step": 5869 }, { "epoch": 56.91566265060241, "grad_norm": 3.3767971992492676, "learning_rate": 2.150485436893204e-05, "loss": 0.1068, "step": 5870 }, { "epoch": 56.92530120481928, "grad_norm": 4.4843316078186035, "learning_rate": 2.15e-05, "loss": 0.217, "step": 5871 }, { "epoch": 56.93493975903615, "grad_norm": 7.4138617515563965, "learning_rate": 2.1495145631067962e-05, "loss": 0.2317, "step": 5872 }, { "epoch": 56.94457831325301, "grad_norm": 4.709306716918945, "learning_rate": 2.1490291262135923e-05, "loss": 0.2637, "step": 5873 }, { "epoch": 56.95421686746988, "grad_norm": 12.512312889099121, "learning_rate": 2.1485436893203884e-05, "loss": 0.4362, "step": 5874 }, { "epoch": 56.963855421686745, "grad_norm": 8.968624114990234, "learning_rate": 2.1480582524271845e-05, "loss": 0.5403, "step": 5875 }, { "epoch": 56.973493975903615, "grad_norm": 2.009556770324707, "learning_rate": 2.1475728155339807e-05, "loss": 0.1484, "step": 5876 }, { "epoch": 56.98313253012048, "grad_norm": 6.755132675170898, "learning_rate": 2.1470873786407768e-05, "loss": 0.3134, "step": 5877 }, { "epoch": 56.99277108433735, "grad_norm": 3.403388738632202, "learning_rate": 2.146601941747573e-05, "loss": 0.315, "step": 5878 }, { "epoch": 57.00843373493976, "grad_norm": 16.26483154296875, "learning_rate": 2.146116504854369e-05, "loss": 0.3935, "step": 5879 }, { "epoch": 57.01807228915663, "grad_norm": 10.533905982971191, "learning_rate": 2.145631067961165e-05, "loss": 0.2316, "step": 5880 }, { "epoch": 57.02771084337349, "grad_norm": 4.8025898933410645, "learning_rate": 2.1451456310679612e-05, "loss": 0.2301, "step": 5881 }, { "epoch": 57.03734939759036, "grad_norm": 5.6015214920043945, "learning_rate": 2.1446601941747573e-05, "loss": 0.1399, "step": 5882 }, { "epoch": 57.04698795180723, "grad_norm": 21.211347579956055, "learning_rate": 2.1441747572815535e-05, "loss": 0.2425, "step": 5883 }, { "epoch": 57.056626506024095, "grad_norm": 3.622798204421997, "learning_rate": 2.1436893203883496e-05, "loss": 0.5146, "step": 5884 }, { "epoch": 57.066265060240966, "grad_norm": 18.258440017700195, "learning_rate": 2.1432038834951457e-05, "loss": 0.1534, "step": 5885 }, { "epoch": 57.07590361445783, "grad_norm": 10.233071327209473, "learning_rate": 2.1427184466019418e-05, "loss": 0.2315, "step": 5886 }, { "epoch": 57.0855421686747, "grad_norm": 7.474612236022949, "learning_rate": 2.142233009708738e-05, "loss": 0.149, "step": 5887 }, { "epoch": 57.09518072289157, "grad_norm": 34.59977340698242, "learning_rate": 2.141747572815534e-05, "loss": 0.4374, "step": 5888 }, { "epoch": 57.10481927710843, "grad_norm": 6.581010341644287, "learning_rate": 2.1412621359223305e-05, "loss": 0.201, "step": 5889 }, { "epoch": 57.1144578313253, "grad_norm": 4.277044296264648, "learning_rate": 2.1407766990291263e-05, "loss": 0.2928, "step": 5890 }, { "epoch": 57.12409638554217, "grad_norm": 3.7778635025024414, "learning_rate": 2.1402912621359224e-05, "loss": 0.3836, "step": 5891 }, { "epoch": 57.13373493975904, "grad_norm": 10.114303588867188, "learning_rate": 2.1398058252427185e-05, "loss": 0.2115, "step": 5892 }, { "epoch": 57.1433734939759, "grad_norm": 1.6051005125045776, "learning_rate": 2.1393203883495146e-05, "loss": 0.1812, "step": 5893 }, { "epoch": 57.15301204819277, "grad_norm": 13.639455795288086, "learning_rate": 2.1388349514563107e-05, "loss": 0.2677, "step": 5894 }, { "epoch": 57.16265060240964, "grad_norm": 12.232962608337402, "learning_rate": 2.138349514563107e-05, "loss": 0.3563, "step": 5895 }, { "epoch": 57.172289156626505, "grad_norm": 3.154888153076172, "learning_rate": 2.137864077669903e-05, "loss": 0.2657, "step": 5896 }, { "epoch": 57.181927710843375, "grad_norm": 18.925657272338867, "learning_rate": 2.137378640776699e-05, "loss": 0.2507, "step": 5897 }, { "epoch": 57.19156626506024, "grad_norm": 16.744609832763672, "learning_rate": 2.1368932038834952e-05, "loss": 0.2233, "step": 5898 }, { "epoch": 57.20120481927711, "grad_norm": 2.8373093605041504, "learning_rate": 2.1364077669902913e-05, "loss": 0.3282, "step": 5899 }, { "epoch": 57.21084337349398, "grad_norm": 10.484814643859863, "learning_rate": 2.1359223300970874e-05, "loss": 0.1608, "step": 5900 }, { "epoch": 57.22048192771084, "grad_norm": 2.096803665161133, "learning_rate": 2.1354368932038835e-05, "loss": 0.1539, "step": 5901 }, { "epoch": 57.23012048192771, "grad_norm": 17.383913040161133, "learning_rate": 2.13495145631068e-05, "loss": 0.2051, "step": 5902 }, { "epoch": 57.23975903614458, "grad_norm": 16.65846824645996, "learning_rate": 2.134466019417476e-05, "loss": 0.2326, "step": 5903 }, { "epoch": 57.24939759036145, "grad_norm": 11.879277229309082, "learning_rate": 2.133980582524272e-05, "loss": 0.3375, "step": 5904 }, { "epoch": 57.25903614457831, "grad_norm": 1.800352931022644, "learning_rate": 2.133495145631068e-05, "loss": 0.179, "step": 5905 }, { "epoch": 57.26867469879518, "grad_norm": 3.7806169986724854, "learning_rate": 2.133009708737864e-05, "loss": 0.1542, "step": 5906 }, { "epoch": 57.27831325301205, "grad_norm": 1.6633315086364746, "learning_rate": 2.1325242718446602e-05, "loss": 0.1963, "step": 5907 }, { "epoch": 57.287951807228914, "grad_norm": 2.470315456390381, "learning_rate": 2.1320388349514563e-05, "loss": 0.1832, "step": 5908 }, { "epoch": 57.297590361445785, "grad_norm": 4.549215793609619, "learning_rate": 2.1315533980582524e-05, "loss": 0.0852, "step": 5909 }, { "epoch": 57.30722891566265, "grad_norm": 9.029528617858887, "learning_rate": 2.1310679611650486e-05, "loss": 0.2015, "step": 5910 }, { "epoch": 57.31686746987952, "grad_norm": 2.6653637886047363, "learning_rate": 2.1305825242718447e-05, "loss": 0.2704, "step": 5911 }, { "epoch": 57.32650602409639, "grad_norm": 16.15947723388672, "learning_rate": 2.1300970873786408e-05, "loss": 0.172, "step": 5912 }, { "epoch": 57.33614457831325, "grad_norm": 7.129370212554932, "learning_rate": 2.129611650485437e-05, "loss": 0.1788, "step": 5913 }, { "epoch": 57.34578313253012, "grad_norm": 8.580219268798828, "learning_rate": 2.1291262135922333e-05, "loss": 0.1434, "step": 5914 }, { "epoch": 57.355421686746986, "grad_norm": 3.500227212905884, "learning_rate": 2.1286407766990295e-05, "loss": 0.1614, "step": 5915 }, { "epoch": 57.36506024096386, "grad_norm": 0.8267112970352173, "learning_rate": 2.1281553398058256e-05, "loss": 0.4866, "step": 5916 }, { "epoch": 57.37469879518072, "grad_norm": 1.6760319471359253, "learning_rate": 2.1276699029126217e-05, "loss": 0.1563, "step": 5917 }, { "epoch": 57.38433734939759, "grad_norm": 17.53912925720215, "learning_rate": 2.1271844660194175e-05, "loss": 0.2314, "step": 5918 }, { "epoch": 57.39397590361446, "grad_norm": 11.679086685180664, "learning_rate": 2.1266990291262136e-05, "loss": 0.3805, "step": 5919 }, { "epoch": 57.403614457831324, "grad_norm": 5.578109264373779, "learning_rate": 2.1262135922330097e-05, "loss": 0.2082, "step": 5920 }, { "epoch": 57.413253012048195, "grad_norm": 21.6810359954834, "learning_rate": 2.1257281553398058e-05, "loss": 0.2585, "step": 5921 }, { "epoch": 57.42289156626506, "grad_norm": 28.39111328125, "learning_rate": 2.125242718446602e-05, "loss": 0.354, "step": 5922 }, { "epoch": 57.43253012048193, "grad_norm": 16.436664581298828, "learning_rate": 2.124757281553398e-05, "loss": 0.1966, "step": 5923 }, { "epoch": 57.44216867469879, "grad_norm": 25.056610107421875, "learning_rate": 2.124271844660194e-05, "loss": 0.3579, "step": 5924 }, { "epoch": 57.45180722891566, "grad_norm": 14.539295196533203, "learning_rate": 2.1237864077669903e-05, "loss": 0.4009, "step": 5925 }, { "epoch": 57.46144578313253, "grad_norm": 11.715115547180176, "learning_rate": 2.1233009708737864e-05, "loss": 0.2966, "step": 5926 }, { "epoch": 57.471084337349396, "grad_norm": 6.210849761962891, "learning_rate": 2.122815533980583e-05, "loss": 0.2119, "step": 5927 }, { "epoch": 57.480722891566266, "grad_norm": 3.701822519302368, "learning_rate": 2.122330097087379e-05, "loss": 0.2042, "step": 5928 }, { "epoch": 57.49036144578313, "grad_norm": 24.977243423461914, "learning_rate": 2.121844660194175e-05, "loss": 0.1869, "step": 5929 }, { "epoch": 57.5, "grad_norm": 9.317892074584961, "learning_rate": 2.1213592233009712e-05, "loss": 0.4802, "step": 5930 }, { "epoch": 57.50963855421687, "grad_norm": 10.16365909576416, "learning_rate": 2.1208737864077673e-05, "loss": 0.1284, "step": 5931 }, { "epoch": 57.519277108433734, "grad_norm": 1.5584263801574707, "learning_rate": 2.120388349514563e-05, "loss": 0.2207, "step": 5932 }, { "epoch": 57.528915662650604, "grad_norm": 20.805612564086914, "learning_rate": 2.1199029126213592e-05, "loss": 0.2552, "step": 5933 }, { "epoch": 57.53855421686747, "grad_norm": 9.817665100097656, "learning_rate": 2.1194174757281553e-05, "loss": 0.3051, "step": 5934 }, { "epoch": 57.54819277108434, "grad_norm": 1.535173773765564, "learning_rate": 2.1189320388349514e-05, "loss": 0.3251, "step": 5935 }, { "epoch": 57.55783132530121, "grad_norm": 3.1079390048980713, "learning_rate": 2.1184466019417475e-05, "loss": 0.1213, "step": 5936 }, { "epoch": 57.56746987951807, "grad_norm": 6.001056671142578, "learning_rate": 2.1179611650485436e-05, "loss": 0.2714, "step": 5937 }, { "epoch": 57.57710843373494, "grad_norm": 39.76083755493164, "learning_rate": 2.1174757281553398e-05, "loss": 0.3183, "step": 5938 }, { "epoch": 57.586746987951805, "grad_norm": 1.3327019214630127, "learning_rate": 2.1169902912621362e-05, "loss": 0.0995, "step": 5939 }, { "epoch": 57.596385542168676, "grad_norm": 19.857444763183594, "learning_rate": 2.1165048543689323e-05, "loss": 0.4195, "step": 5940 }, { "epoch": 57.60602409638554, "grad_norm": 10.366683006286621, "learning_rate": 2.1160194174757284e-05, "loss": 0.1622, "step": 5941 }, { "epoch": 57.61566265060241, "grad_norm": 2.512465476989746, "learning_rate": 2.1155339805825246e-05, "loss": 0.3783, "step": 5942 }, { "epoch": 57.62530120481928, "grad_norm": 23.274837493896484, "learning_rate": 2.1150485436893207e-05, "loss": 0.2258, "step": 5943 }, { "epoch": 57.63493975903614, "grad_norm": 6.464802265167236, "learning_rate": 2.1145631067961168e-05, "loss": 0.1748, "step": 5944 }, { "epoch": 57.644578313253014, "grad_norm": 16.904109954833984, "learning_rate": 2.1140776699029126e-05, "loss": 0.3567, "step": 5945 }, { "epoch": 57.65421686746988, "grad_norm": 7.5785231590271, "learning_rate": 2.1135922330097087e-05, "loss": 0.2812, "step": 5946 }, { "epoch": 57.66385542168675, "grad_norm": 50.417449951171875, "learning_rate": 2.1131067961165048e-05, "loss": 0.3477, "step": 5947 }, { "epoch": 57.67349397590361, "grad_norm": 3.6839659214019775, "learning_rate": 2.112621359223301e-05, "loss": 0.2729, "step": 5948 }, { "epoch": 57.68313253012048, "grad_norm": 6.892948150634766, "learning_rate": 2.112135922330097e-05, "loss": 0.2488, "step": 5949 }, { "epoch": 57.69277108433735, "grad_norm": 4.93496561050415, "learning_rate": 2.111650485436893e-05, "loss": 0.208, "step": 5950 }, { "epoch": 57.702409638554215, "grad_norm": 17.51980972290039, "learning_rate": 2.1111650485436892e-05, "loss": 0.1794, "step": 5951 }, { "epoch": 57.712048192771086, "grad_norm": 0.9573351740837097, "learning_rate": 2.1106796116504857e-05, "loss": 0.223, "step": 5952 }, { "epoch": 57.72168674698795, "grad_norm": 5.0851874351501465, "learning_rate": 2.1101941747572818e-05, "loss": 0.2788, "step": 5953 }, { "epoch": 57.73132530120482, "grad_norm": 11.77540397644043, "learning_rate": 2.109708737864078e-05, "loss": 0.3801, "step": 5954 }, { "epoch": 57.74096385542169, "grad_norm": 1.4844615459442139, "learning_rate": 2.109223300970874e-05, "loss": 0.1692, "step": 5955 }, { "epoch": 57.75060240963855, "grad_norm": 7.7122416496276855, "learning_rate": 2.10873786407767e-05, "loss": 0.1857, "step": 5956 }, { "epoch": 57.76024096385542, "grad_norm": 2.3792338371276855, "learning_rate": 2.1082524271844663e-05, "loss": 0.1844, "step": 5957 }, { "epoch": 57.76987951807229, "grad_norm": 28.031158447265625, "learning_rate": 2.1077669902912624e-05, "loss": 0.4263, "step": 5958 }, { "epoch": 57.77951807228916, "grad_norm": 8.763328552246094, "learning_rate": 2.107281553398058e-05, "loss": 0.3372, "step": 5959 }, { "epoch": 57.78915662650603, "grad_norm": 15.914319038391113, "learning_rate": 2.1067961165048543e-05, "loss": 0.2956, "step": 5960 }, { "epoch": 57.79879518072289, "grad_norm": 3.1259877681732178, "learning_rate": 2.1063106796116504e-05, "loss": 0.218, "step": 5961 }, { "epoch": 57.80843373493976, "grad_norm": 1.5489726066589355, "learning_rate": 2.1058252427184465e-05, "loss": 0.1213, "step": 5962 }, { "epoch": 57.818072289156625, "grad_norm": 11.093467712402344, "learning_rate": 2.1053398058252426e-05, "loss": 0.4576, "step": 5963 }, { "epoch": 57.827710843373495, "grad_norm": 4.7743964195251465, "learning_rate": 2.1048543689320387e-05, "loss": 0.1502, "step": 5964 }, { "epoch": 57.83734939759036, "grad_norm": 4.731731414794922, "learning_rate": 2.1043689320388352e-05, "loss": 0.237, "step": 5965 }, { "epoch": 57.84698795180723, "grad_norm": 6.491014003753662, "learning_rate": 2.1038834951456313e-05, "loss": 0.2814, "step": 5966 }, { "epoch": 57.8566265060241, "grad_norm": 14.36970043182373, "learning_rate": 2.1033980582524274e-05, "loss": 0.2746, "step": 5967 }, { "epoch": 57.86626506024096, "grad_norm": 19.324047088623047, "learning_rate": 2.1029126213592235e-05, "loss": 0.1962, "step": 5968 }, { "epoch": 57.87590361445783, "grad_norm": 10.901752471923828, "learning_rate": 2.1024271844660196e-05, "loss": 0.352, "step": 5969 }, { "epoch": 57.8855421686747, "grad_norm": 3.47170352935791, "learning_rate": 2.1019417475728158e-05, "loss": 0.2944, "step": 5970 }, { "epoch": 57.89518072289157, "grad_norm": 8.185713768005371, "learning_rate": 2.101456310679612e-05, "loss": 0.1949, "step": 5971 }, { "epoch": 57.90481927710843, "grad_norm": 15.663561820983887, "learning_rate": 2.100970873786408e-05, "loss": 0.1558, "step": 5972 }, { "epoch": 57.9144578313253, "grad_norm": 4.411247730255127, "learning_rate": 2.1004854368932038e-05, "loss": 0.1332, "step": 5973 }, { "epoch": 57.92409638554217, "grad_norm": 3.3611807823181152, "learning_rate": 2.1e-05, "loss": 0.295, "step": 5974 }, { "epoch": 57.933734939759034, "grad_norm": 2.583418369293213, "learning_rate": 2.099514563106796e-05, "loss": 0.2779, "step": 5975 }, { "epoch": 57.943373493975905, "grad_norm": 6.387343406677246, "learning_rate": 2.099029126213592e-05, "loss": 0.3014, "step": 5976 }, { "epoch": 57.95301204819277, "grad_norm": 14.2142972946167, "learning_rate": 2.0985436893203886e-05, "loss": 0.1852, "step": 5977 }, { "epoch": 57.96265060240964, "grad_norm": 29.965953826904297, "learning_rate": 2.0980582524271847e-05, "loss": 0.1965, "step": 5978 }, { "epoch": 57.97228915662651, "grad_norm": 6.803399562835693, "learning_rate": 2.0975728155339808e-05, "loss": 0.2026, "step": 5979 }, { "epoch": 57.98192771084337, "grad_norm": 4.01730489730835, "learning_rate": 2.097087378640777e-05, "loss": 0.1356, "step": 5980 }, { "epoch": 57.99156626506024, "grad_norm": 7.629676818847656, "learning_rate": 2.096601941747573e-05, "loss": 0.2423, "step": 5981 }, { "epoch": 58.00722891566265, "grad_norm": 2.933034658432007, "learning_rate": 2.096116504854369e-05, "loss": 0.155, "step": 5982 }, { "epoch": 58.01686746987952, "grad_norm": 19.045825958251953, "learning_rate": 2.0956310679611653e-05, "loss": 0.149, "step": 5983 }, { "epoch": 58.026506024096385, "grad_norm": 7.948464870452881, "learning_rate": 2.0951456310679614e-05, "loss": 0.1738, "step": 5984 }, { "epoch": 58.036144578313255, "grad_norm": 9.187908172607422, "learning_rate": 2.0946601941747575e-05, "loss": 0.2613, "step": 5985 }, { "epoch": 58.04578313253012, "grad_norm": 4.939719200134277, "learning_rate": 2.0941747572815536e-05, "loss": 0.2424, "step": 5986 }, { "epoch": 58.05542168674699, "grad_norm": 2.362952947616577, "learning_rate": 2.0936893203883494e-05, "loss": 0.2391, "step": 5987 }, { "epoch": 58.06506024096385, "grad_norm": 2.0391173362731934, "learning_rate": 2.0932038834951455e-05, "loss": 0.1572, "step": 5988 }, { "epoch": 58.07469879518072, "grad_norm": 2.5558013916015625, "learning_rate": 2.0927184466019416e-05, "loss": 0.1518, "step": 5989 }, { "epoch": 58.08433734939759, "grad_norm": 6.913815975189209, "learning_rate": 2.092233009708738e-05, "loss": 0.2397, "step": 5990 }, { "epoch": 58.09397590361446, "grad_norm": 5.213879585266113, "learning_rate": 2.0917475728155342e-05, "loss": 0.1615, "step": 5991 }, { "epoch": 58.10361445783133, "grad_norm": 3.20542049407959, "learning_rate": 2.0912621359223303e-05, "loss": 0.4079, "step": 5992 }, { "epoch": 58.11325301204819, "grad_norm": 13.475276947021484, "learning_rate": 2.0907766990291264e-05, "loss": 0.1851, "step": 5993 }, { "epoch": 58.12289156626506, "grad_norm": 2.4174654483795166, "learning_rate": 2.0902912621359225e-05, "loss": 0.221, "step": 5994 }, { "epoch": 58.13253012048193, "grad_norm": 3.51467227935791, "learning_rate": 2.0898058252427186e-05, "loss": 0.2463, "step": 5995 }, { "epoch": 58.142168674698794, "grad_norm": 16.472482681274414, "learning_rate": 2.0893203883495147e-05, "loss": 0.2145, "step": 5996 }, { "epoch": 58.151807228915665, "grad_norm": 7.569138050079346, "learning_rate": 2.088834951456311e-05, "loss": 0.3078, "step": 5997 }, { "epoch": 58.16144578313253, "grad_norm": 2.681283712387085, "learning_rate": 2.088349514563107e-05, "loss": 0.3772, "step": 5998 }, { "epoch": 58.1710843373494, "grad_norm": 2.66361665725708, "learning_rate": 2.087864077669903e-05, "loss": 0.1539, "step": 5999 }, { "epoch": 58.18072289156626, "grad_norm": 3.4446756839752197, "learning_rate": 2.0873786407766992e-05, "loss": 0.2028, "step": 6000 }, { "epoch": 58.19036144578313, "grad_norm": 4.959689140319824, "learning_rate": 2.086893203883495e-05, "loss": 0.1455, "step": 6001 }, { "epoch": 58.2, "grad_norm": 6.677829265594482, "learning_rate": 2.086407766990291e-05, "loss": 0.2204, "step": 6002 }, { "epoch": 58.209638554216866, "grad_norm": 4.122500896453857, "learning_rate": 2.0859223300970875e-05, "loss": 0.2583, "step": 6003 }, { "epoch": 58.21927710843374, "grad_norm": 2.424272060394287, "learning_rate": 2.0854368932038837e-05, "loss": 0.1902, "step": 6004 }, { "epoch": 58.2289156626506, "grad_norm": 9.010444641113281, "learning_rate": 2.0849514563106798e-05, "loss": 0.2765, "step": 6005 }, { "epoch": 58.23855421686747, "grad_norm": 21.99677085876465, "learning_rate": 2.084466019417476e-05, "loss": 0.1962, "step": 6006 }, { "epoch": 58.24819277108434, "grad_norm": 6.609952926635742, "learning_rate": 2.083980582524272e-05, "loss": 0.1656, "step": 6007 }, { "epoch": 58.257831325301204, "grad_norm": 6.487585544586182, "learning_rate": 2.083495145631068e-05, "loss": 0.1994, "step": 6008 }, { "epoch": 58.267469879518075, "grad_norm": 3.628756046295166, "learning_rate": 2.0830097087378642e-05, "loss": 0.2651, "step": 6009 }, { "epoch": 58.27710843373494, "grad_norm": 1.262576937675476, "learning_rate": 2.0825242718446603e-05, "loss": 0.2463, "step": 6010 }, { "epoch": 58.28674698795181, "grad_norm": 6.496148109436035, "learning_rate": 2.0820388349514565e-05, "loss": 0.3865, "step": 6011 }, { "epoch": 58.29638554216867, "grad_norm": 5.009007453918457, "learning_rate": 2.0815533980582526e-05, "loss": 0.1753, "step": 6012 }, { "epoch": 58.30602409638554, "grad_norm": 8.48260498046875, "learning_rate": 2.0810679611650487e-05, "loss": 0.3148, "step": 6013 }, { "epoch": 58.31566265060241, "grad_norm": 8.34933853149414, "learning_rate": 2.0805825242718445e-05, "loss": 0.3229, "step": 6014 }, { "epoch": 58.325301204819276, "grad_norm": 7.70054817199707, "learning_rate": 2.080097087378641e-05, "loss": 0.1508, "step": 6015 }, { "epoch": 58.334939759036146, "grad_norm": 5.126481056213379, "learning_rate": 2.079611650485437e-05, "loss": 0.2106, "step": 6016 }, { "epoch": 58.34457831325301, "grad_norm": 2.301090955734253, "learning_rate": 2.079126213592233e-05, "loss": 0.232, "step": 6017 }, { "epoch": 58.35421686746988, "grad_norm": 12.221123695373535, "learning_rate": 2.0786407766990293e-05, "loss": 0.1836, "step": 6018 }, { "epoch": 58.36385542168675, "grad_norm": 6.939754486083984, "learning_rate": 2.0781553398058254e-05, "loss": 0.2427, "step": 6019 }, { "epoch": 58.373493975903614, "grad_norm": 4.399256229400635, "learning_rate": 2.0776699029126215e-05, "loss": 0.1823, "step": 6020 }, { "epoch": 58.383132530120484, "grad_norm": 15.923842430114746, "learning_rate": 2.0771844660194176e-05, "loss": 0.2796, "step": 6021 }, { "epoch": 58.39277108433735, "grad_norm": 26.22252655029297, "learning_rate": 2.0766990291262137e-05, "loss": 0.2753, "step": 6022 }, { "epoch": 58.40240963855422, "grad_norm": 5.20791482925415, "learning_rate": 2.07621359223301e-05, "loss": 0.2826, "step": 6023 }, { "epoch": 58.41204819277108, "grad_norm": 6.420454502105713, "learning_rate": 2.075728155339806e-05, "loss": 0.2328, "step": 6024 }, { "epoch": 58.42168674698795, "grad_norm": 1.703110694885254, "learning_rate": 2.075242718446602e-05, "loss": 0.3512, "step": 6025 }, { "epoch": 58.43132530120482, "grad_norm": 7.103026390075684, "learning_rate": 2.0747572815533982e-05, "loss": 0.2932, "step": 6026 }, { "epoch": 58.440963855421685, "grad_norm": 16.545581817626953, "learning_rate": 2.0742718446601943e-05, "loss": 0.1401, "step": 6027 }, { "epoch": 58.450602409638556, "grad_norm": 10.526590347290039, "learning_rate": 2.0737864077669904e-05, "loss": 0.1158, "step": 6028 }, { "epoch": 58.46024096385542, "grad_norm": 4.393289089202881, "learning_rate": 2.0733009708737865e-05, "loss": 0.2579, "step": 6029 }, { "epoch": 58.46987951807229, "grad_norm": 3.047555685043335, "learning_rate": 2.0728155339805826e-05, "loss": 0.1748, "step": 6030 }, { "epoch": 58.47951807228916, "grad_norm": 11.853759765625, "learning_rate": 2.0723300970873788e-05, "loss": 0.256, "step": 6031 }, { "epoch": 58.48915662650602, "grad_norm": 10.672048568725586, "learning_rate": 2.071844660194175e-05, "loss": 0.2536, "step": 6032 }, { "epoch": 58.498795180722894, "grad_norm": 6.405778884887695, "learning_rate": 2.071359223300971e-05, "loss": 0.3387, "step": 6033 }, { "epoch": 58.50843373493976, "grad_norm": 19.152616500854492, "learning_rate": 2.070873786407767e-05, "loss": 0.1463, "step": 6034 }, { "epoch": 58.51807228915663, "grad_norm": 14.327242851257324, "learning_rate": 2.0703883495145632e-05, "loss": 0.2161, "step": 6035 }, { "epoch": 58.52771084337349, "grad_norm": 7.003933429718018, "learning_rate": 2.0699029126213593e-05, "loss": 0.3112, "step": 6036 }, { "epoch": 58.53734939759036, "grad_norm": 38.902565002441406, "learning_rate": 2.0694174757281554e-05, "loss": 0.1584, "step": 6037 }, { "epoch": 58.54698795180723, "grad_norm": 7.407019138336182, "learning_rate": 2.0689320388349516e-05, "loss": 0.6421, "step": 6038 }, { "epoch": 58.556626506024095, "grad_norm": 11.733356475830078, "learning_rate": 2.0684466019417477e-05, "loss": 0.2883, "step": 6039 }, { "epoch": 58.566265060240966, "grad_norm": 17.976980209350586, "learning_rate": 2.0679611650485438e-05, "loss": 0.3434, "step": 6040 }, { "epoch": 58.57590361445783, "grad_norm": 3.157468557357788, "learning_rate": 2.06747572815534e-05, "loss": 0.1474, "step": 6041 }, { "epoch": 58.5855421686747, "grad_norm": 2.4202136993408203, "learning_rate": 2.066990291262136e-05, "loss": 0.0614, "step": 6042 }, { "epoch": 58.59518072289157, "grad_norm": 7.1895904541015625, "learning_rate": 2.066504854368932e-05, "loss": 0.2316, "step": 6043 }, { "epoch": 58.60481927710843, "grad_norm": 28.6302433013916, "learning_rate": 2.0660194174757282e-05, "loss": 0.1762, "step": 6044 }, { "epoch": 58.6144578313253, "grad_norm": 3.816744089126587, "learning_rate": 2.0655339805825244e-05, "loss": 0.1507, "step": 6045 }, { "epoch": 58.62409638554217, "grad_norm": 10.970535278320312, "learning_rate": 2.0650485436893205e-05, "loss": 0.217, "step": 6046 }, { "epoch": 58.63373493975904, "grad_norm": 55.31444549560547, "learning_rate": 2.0645631067961166e-05, "loss": 0.1727, "step": 6047 }, { "epoch": 58.6433734939759, "grad_norm": 1.6716032028198242, "learning_rate": 2.0640776699029127e-05, "loss": 0.1154, "step": 6048 }, { "epoch": 58.65301204819277, "grad_norm": 2.97714900970459, "learning_rate": 2.0635922330097088e-05, "loss": 0.1427, "step": 6049 }, { "epoch": 58.66265060240964, "grad_norm": 5.292898654937744, "learning_rate": 2.063106796116505e-05, "loss": 0.2068, "step": 6050 }, { "epoch": 58.672289156626505, "grad_norm": 2.5490283966064453, "learning_rate": 2.062621359223301e-05, "loss": 0.2308, "step": 6051 }, { "epoch": 58.681927710843375, "grad_norm": 9.024880409240723, "learning_rate": 2.062135922330097e-05, "loss": 0.3422, "step": 6052 }, { "epoch": 58.69156626506024, "grad_norm": 22.788135528564453, "learning_rate": 2.0616504854368933e-05, "loss": 0.4996, "step": 6053 }, { "epoch": 58.70120481927711, "grad_norm": 3.9747769832611084, "learning_rate": 2.0611650485436894e-05, "loss": 0.2027, "step": 6054 }, { "epoch": 58.71084337349397, "grad_norm": 10.369989395141602, "learning_rate": 2.0606796116504855e-05, "loss": 0.2423, "step": 6055 }, { "epoch": 58.72048192771084, "grad_norm": 10.129273414611816, "learning_rate": 2.0601941747572816e-05, "loss": 0.2093, "step": 6056 }, { "epoch": 58.73012048192771, "grad_norm": 9.061002731323242, "learning_rate": 2.0597087378640777e-05, "loss": 0.2625, "step": 6057 }, { "epoch": 58.73975903614458, "grad_norm": 6.173739910125732, "learning_rate": 2.059223300970874e-05, "loss": 0.2348, "step": 6058 }, { "epoch": 58.74939759036145, "grad_norm": 19.80194664001465, "learning_rate": 2.05873786407767e-05, "loss": 0.1812, "step": 6059 }, { "epoch": 58.75903614457831, "grad_norm": 1.0102437734603882, "learning_rate": 2.058252427184466e-05, "loss": 0.1453, "step": 6060 }, { "epoch": 58.76867469879518, "grad_norm": 7.668771743774414, "learning_rate": 2.0577669902912622e-05, "loss": 0.2596, "step": 6061 }, { "epoch": 58.77831325301205, "grad_norm": 2.9356417655944824, "learning_rate": 2.0572815533980583e-05, "loss": 0.1586, "step": 6062 }, { "epoch": 58.787951807228914, "grad_norm": 7.112295150756836, "learning_rate": 2.0567961165048544e-05, "loss": 0.2675, "step": 6063 }, { "epoch": 58.797590361445785, "grad_norm": 20.55185890197754, "learning_rate": 2.0563106796116505e-05, "loss": 0.1856, "step": 6064 }, { "epoch": 58.80722891566265, "grad_norm": 8.197195053100586, "learning_rate": 2.0558252427184466e-05, "loss": 0.2787, "step": 6065 }, { "epoch": 58.81686746987952, "grad_norm": 3.5055527687072754, "learning_rate": 2.055339805825243e-05, "loss": 0.1053, "step": 6066 }, { "epoch": 58.82650602409639, "grad_norm": 12.82098388671875, "learning_rate": 2.054854368932039e-05, "loss": 0.2809, "step": 6067 }, { "epoch": 58.83614457831325, "grad_norm": 4.059922695159912, "learning_rate": 2.054368932038835e-05, "loss": 0.2051, "step": 6068 }, { "epoch": 58.84578313253012, "grad_norm": 8.218073844909668, "learning_rate": 2.053883495145631e-05, "loss": 0.1853, "step": 6069 }, { "epoch": 58.855421686746986, "grad_norm": 1.2297621965408325, "learning_rate": 2.0533980582524272e-05, "loss": 0.1069, "step": 6070 }, { "epoch": 58.86506024096386, "grad_norm": 13.555768966674805, "learning_rate": 2.0529126213592233e-05, "loss": 0.2215, "step": 6071 }, { "epoch": 58.87469879518072, "grad_norm": 3.030702829360962, "learning_rate": 2.0524271844660195e-05, "loss": 0.1285, "step": 6072 }, { "epoch": 58.88433734939759, "grad_norm": 4.744422435760498, "learning_rate": 2.0519417475728156e-05, "loss": 0.3288, "step": 6073 }, { "epoch": 58.89397590361446, "grad_norm": 13.585548400878906, "learning_rate": 2.0514563106796117e-05, "loss": 0.4719, "step": 6074 }, { "epoch": 58.903614457831324, "grad_norm": 5.090409278869629, "learning_rate": 2.0509708737864078e-05, "loss": 0.3267, "step": 6075 }, { "epoch": 58.913253012048195, "grad_norm": 13.880730628967285, "learning_rate": 2.050485436893204e-05, "loss": 0.3254, "step": 6076 }, { "epoch": 58.92289156626506, "grad_norm": 17.399433135986328, "learning_rate": 2.05e-05, "loss": 0.2262, "step": 6077 }, { "epoch": 58.93253012048193, "grad_norm": 6.098264217376709, "learning_rate": 2.0495145631067965e-05, "loss": 0.2942, "step": 6078 }, { "epoch": 58.94216867469879, "grad_norm": 23.197935104370117, "learning_rate": 2.0490291262135926e-05, "loss": 0.1364, "step": 6079 }, { "epoch": 58.95180722891566, "grad_norm": 15.515167236328125, "learning_rate": 2.0485436893203887e-05, "loss": 0.1681, "step": 6080 }, { "epoch": 58.96144578313253, "grad_norm": 6.699717998504639, "learning_rate": 2.0480582524271845e-05, "loss": 0.1318, "step": 6081 }, { "epoch": 58.971084337349396, "grad_norm": 1.807668924331665, "learning_rate": 2.0475728155339806e-05, "loss": 0.2462, "step": 6082 }, { "epoch": 58.980722891566266, "grad_norm": 5.631141185760498, "learning_rate": 2.0470873786407767e-05, "loss": 0.3145, "step": 6083 }, { "epoch": 58.99036144578313, "grad_norm": 4.552858352661133, "learning_rate": 2.0466019417475728e-05, "loss": 0.183, "step": 6084 }, { "epoch": 59.006024096385545, "grad_norm": 3.2634546756744385, "learning_rate": 2.046116504854369e-05, "loss": 0.1309, "step": 6085 }, { "epoch": 59.01566265060241, "grad_norm": 3.0914158821105957, "learning_rate": 2.045631067961165e-05, "loss": 0.417, "step": 6086 }, { "epoch": 59.02530120481928, "grad_norm": 32.61355209350586, "learning_rate": 2.0451456310679612e-05, "loss": 0.1571, "step": 6087 }, { "epoch": 59.03493975903614, "grad_norm": 6.4040679931640625, "learning_rate": 2.0446601941747573e-05, "loss": 0.378, "step": 6088 }, { "epoch": 59.04457831325301, "grad_norm": 3.1721951961517334, "learning_rate": 2.0441747572815534e-05, "loss": 0.3554, "step": 6089 }, { "epoch": 59.05421686746988, "grad_norm": 3.522763967514038, "learning_rate": 2.0436893203883495e-05, "loss": 0.3576, "step": 6090 }, { "epoch": 59.063855421686746, "grad_norm": 4.922585487365723, "learning_rate": 2.043203883495146e-05, "loss": 0.2378, "step": 6091 }, { "epoch": 59.07349397590362, "grad_norm": 4.114671230316162, "learning_rate": 2.042718446601942e-05, "loss": 0.1027, "step": 6092 }, { "epoch": 59.08313253012048, "grad_norm": 2.529006242752075, "learning_rate": 2.0422330097087382e-05, "loss": 0.1819, "step": 6093 }, { "epoch": 59.09277108433735, "grad_norm": 16.371540069580078, "learning_rate": 2.041747572815534e-05, "loss": 0.2285, "step": 6094 }, { "epoch": 59.102409638554214, "grad_norm": 5.468705177307129, "learning_rate": 2.04126213592233e-05, "loss": 0.2323, "step": 6095 }, { "epoch": 59.112048192771084, "grad_norm": 3.829195022583008, "learning_rate": 2.0407766990291262e-05, "loss": 0.2711, "step": 6096 }, { "epoch": 59.121686746987955, "grad_norm": 5.738965034484863, "learning_rate": 2.0402912621359223e-05, "loss": 0.1247, "step": 6097 }, { "epoch": 59.13132530120482, "grad_norm": 2.835015058517456, "learning_rate": 2.0398058252427184e-05, "loss": 0.1686, "step": 6098 }, { "epoch": 59.14096385542169, "grad_norm": 2.7342686653137207, "learning_rate": 2.0393203883495145e-05, "loss": 0.23, "step": 6099 }, { "epoch": 59.15060240963855, "grad_norm": 11.09571647644043, "learning_rate": 2.0388349514563107e-05, "loss": 0.2885, "step": 6100 }, { "epoch": 59.16024096385542, "grad_norm": 15.87723445892334, "learning_rate": 2.0383495145631068e-05, "loss": 0.2199, "step": 6101 }, { "epoch": 59.16987951807229, "grad_norm": 11.093008995056152, "learning_rate": 2.037864077669903e-05, "loss": 0.1191, "step": 6102 }, { "epoch": 59.179518072289156, "grad_norm": 7.812281131744385, "learning_rate": 2.037378640776699e-05, "loss": 0.1502, "step": 6103 }, { "epoch": 59.189156626506026, "grad_norm": 2.058619260787964, "learning_rate": 2.0368932038834955e-05, "loss": 0.119, "step": 6104 }, { "epoch": 59.19879518072289, "grad_norm": 5.54361629486084, "learning_rate": 2.0364077669902916e-05, "loss": 0.1988, "step": 6105 }, { "epoch": 59.20843373493976, "grad_norm": 2.5062761306762695, "learning_rate": 2.0359223300970877e-05, "loss": 0.1086, "step": 6106 }, { "epoch": 59.21807228915662, "grad_norm": 5.147695541381836, "learning_rate": 2.0354368932038838e-05, "loss": 0.2864, "step": 6107 }, { "epoch": 59.227710843373494, "grad_norm": 1.6402692794799805, "learning_rate": 2.0349514563106796e-05, "loss": 0.201, "step": 6108 }, { "epoch": 59.237349397590364, "grad_norm": 17.59823989868164, "learning_rate": 2.0344660194174757e-05, "loss": 0.4008, "step": 6109 }, { "epoch": 59.24698795180723, "grad_norm": 13.958749771118164, "learning_rate": 2.0339805825242718e-05, "loss": 0.3624, "step": 6110 }, { "epoch": 59.2566265060241, "grad_norm": 7.032955646514893, "learning_rate": 2.033495145631068e-05, "loss": 0.1993, "step": 6111 }, { "epoch": 59.26626506024096, "grad_norm": 7.957627773284912, "learning_rate": 2.033009708737864e-05, "loss": 0.2458, "step": 6112 }, { "epoch": 59.27590361445783, "grad_norm": 2.7734994888305664, "learning_rate": 2.03252427184466e-05, "loss": 0.197, "step": 6113 }, { "epoch": 59.2855421686747, "grad_norm": 13.514097213745117, "learning_rate": 2.0320388349514563e-05, "loss": 0.3915, "step": 6114 }, { "epoch": 59.295180722891565, "grad_norm": 6.896015167236328, "learning_rate": 2.0315533980582524e-05, "loss": 0.2602, "step": 6115 }, { "epoch": 59.304819277108436, "grad_norm": 11.579547882080078, "learning_rate": 2.031067961165049e-05, "loss": 0.301, "step": 6116 }, { "epoch": 59.3144578313253, "grad_norm": 14.516131401062012, "learning_rate": 2.030582524271845e-05, "loss": 0.2729, "step": 6117 }, { "epoch": 59.32409638554217, "grad_norm": 2.713132381439209, "learning_rate": 2.030097087378641e-05, "loss": 0.2244, "step": 6118 }, { "epoch": 59.33373493975903, "grad_norm": 6.820803165435791, "learning_rate": 2.0296116504854372e-05, "loss": 0.3305, "step": 6119 }, { "epoch": 59.3433734939759, "grad_norm": 2.8592519760131836, "learning_rate": 2.0291262135922333e-05, "loss": 0.2897, "step": 6120 }, { "epoch": 59.353012048192774, "grad_norm": 18.661848068237305, "learning_rate": 2.0286407766990294e-05, "loss": 0.4959, "step": 6121 }, { "epoch": 59.36265060240964, "grad_norm": 5.080601692199707, "learning_rate": 2.0281553398058252e-05, "loss": 0.1341, "step": 6122 }, { "epoch": 59.37228915662651, "grad_norm": 3.526942729949951, "learning_rate": 2.0276699029126213e-05, "loss": 0.2017, "step": 6123 }, { "epoch": 59.38192771084337, "grad_norm": 18.87091064453125, "learning_rate": 2.0271844660194174e-05, "loss": 0.2088, "step": 6124 }, { "epoch": 59.39156626506024, "grad_norm": 4.456615924835205, "learning_rate": 2.0266990291262135e-05, "loss": 0.2651, "step": 6125 }, { "epoch": 59.40120481927711, "grad_norm": 0.8194609880447388, "learning_rate": 2.0262135922330096e-05, "loss": 0.0394, "step": 6126 }, { "epoch": 59.410843373493975, "grad_norm": 3.440284252166748, "learning_rate": 2.0257281553398058e-05, "loss": 0.0673, "step": 6127 }, { "epoch": 59.420481927710846, "grad_norm": 2.8210041522979736, "learning_rate": 2.025242718446602e-05, "loss": 0.2169, "step": 6128 }, { "epoch": 59.43012048192771, "grad_norm": 2.1570727825164795, "learning_rate": 2.0247572815533983e-05, "loss": 0.238, "step": 6129 }, { "epoch": 59.43975903614458, "grad_norm": 6.441417694091797, "learning_rate": 2.0242718446601944e-05, "loss": 0.2376, "step": 6130 }, { "epoch": 59.44939759036144, "grad_norm": 30.06867790222168, "learning_rate": 2.0237864077669905e-05, "loss": 0.2231, "step": 6131 }, { "epoch": 59.45903614457831, "grad_norm": 1.7506908178329468, "learning_rate": 2.0233009708737867e-05, "loss": 0.1746, "step": 6132 }, { "epoch": 59.46867469879518, "grad_norm": 3.4962141513824463, "learning_rate": 2.0228155339805828e-05, "loss": 0.2691, "step": 6133 }, { "epoch": 59.47831325301205, "grad_norm": 3.0460333824157715, "learning_rate": 2.022330097087379e-05, "loss": 0.2712, "step": 6134 }, { "epoch": 59.48795180722892, "grad_norm": 2.073822021484375, "learning_rate": 2.021844660194175e-05, "loss": 0.2657, "step": 6135 }, { "epoch": 59.49759036144578, "grad_norm": 7.807738304138184, "learning_rate": 2.0213592233009708e-05, "loss": 0.2564, "step": 6136 }, { "epoch": 59.50722891566265, "grad_norm": 18.881044387817383, "learning_rate": 2.020873786407767e-05, "loss": 0.2935, "step": 6137 }, { "epoch": 59.51686746987952, "grad_norm": 3.6180830001831055, "learning_rate": 2.020388349514563e-05, "loss": 0.2475, "step": 6138 }, { "epoch": 59.526506024096385, "grad_norm": 5.548140048980713, "learning_rate": 2.019902912621359e-05, "loss": 0.139, "step": 6139 }, { "epoch": 59.536144578313255, "grad_norm": 3.435823440551758, "learning_rate": 2.0194174757281552e-05, "loss": 0.1895, "step": 6140 }, { "epoch": 59.54578313253012, "grad_norm": 7.575066566467285, "learning_rate": 2.0189320388349517e-05, "loss": 0.2929, "step": 6141 }, { "epoch": 59.55542168674699, "grad_norm": 13.957829475402832, "learning_rate": 2.0184466019417478e-05, "loss": 0.3178, "step": 6142 }, { "epoch": 59.56506024096385, "grad_norm": 8.468903541564941, "learning_rate": 2.017961165048544e-05, "loss": 0.3956, "step": 6143 }, { "epoch": 59.57469879518072, "grad_norm": 2.827648401260376, "learning_rate": 2.01747572815534e-05, "loss": 0.167, "step": 6144 }, { "epoch": 59.58433734939759, "grad_norm": 6.545825481414795, "learning_rate": 2.016990291262136e-05, "loss": 0.1551, "step": 6145 }, { "epoch": 59.59397590361446, "grad_norm": 9.052226066589355, "learning_rate": 2.0165048543689323e-05, "loss": 0.2988, "step": 6146 }, { "epoch": 59.60361445783133, "grad_norm": 29.637340545654297, "learning_rate": 2.0160194174757284e-05, "loss": 0.4176, "step": 6147 }, { "epoch": 59.61325301204819, "grad_norm": 22.265626907348633, "learning_rate": 2.0155339805825245e-05, "loss": 0.3172, "step": 6148 }, { "epoch": 59.62289156626506, "grad_norm": 4.2154059410095215, "learning_rate": 2.0150485436893206e-05, "loss": 0.206, "step": 6149 }, { "epoch": 59.63253012048193, "grad_norm": 12.944258689880371, "learning_rate": 2.0145631067961164e-05, "loss": 0.1767, "step": 6150 }, { "epoch": 59.642168674698794, "grad_norm": 3.487468957901001, "learning_rate": 2.0140776699029125e-05, "loss": 0.1771, "step": 6151 }, { "epoch": 59.651807228915665, "grad_norm": 57.320980072021484, "learning_rate": 2.0135922330097086e-05, "loss": 0.3154, "step": 6152 }, { "epoch": 59.66144578313253, "grad_norm": 12.519157409667969, "learning_rate": 2.0131067961165047e-05, "loss": 0.2904, "step": 6153 }, { "epoch": 59.6710843373494, "grad_norm": 10.756946563720703, "learning_rate": 2.0126213592233012e-05, "loss": 0.1912, "step": 6154 }, { "epoch": 59.68072289156626, "grad_norm": 11.424982070922852, "learning_rate": 2.0121359223300973e-05, "loss": 0.2363, "step": 6155 }, { "epoch": 59.69036144578313, "grad_norm": 18.30297088623047, "learning_rate": 2.0116504854368934e-05, "loss": 0.3133, "step": 6156 }, { "epoch": 59.7, "grad_norm": 13.000365257263184, "learning_rate": 2.0111650485436895e-05, "loss": 0.3547, "step": 6157 }, { "epoch": 59.709638554216866, "grad_norm": 13.09921932220459, "learning_rate": 2.0106796116504856e-05, "loss": 0.2385, "step": 6158 }, { "epoch": 59.71927710843374, "grad_norm": 4.376732349395752, "learning_rate": 2.0101941747572818e-05, "loss": 0.1473, "step": 6159 }, { "epoch": 59.7289156626506, "grad_norm": 6.42922306060791, "learning_rate": 2.009708737864078e-05, "loss": 0.3552, "step": 6160 }, { "epoch": 59.73855421686747, "grad_norm": 7.733317852020264, "learning_rate": 2.009223300970874e-05, "loss": 0.3044, "step": 6161 }, { "epoch": 59.74819277108434, "grad_norm": 10.471858978271484, "learning_rate": 2.00873786407767e-05, "loss": 0.3239, "step": 6162 }, { "epoch": 59.757831325301204, "grad_norm": 16.75289535522461, "learning_rate": 2.008252427184466e-05, "loss": 0.349, "step": 6163 }, { "epoch": 59.767469879518075, "grad_norm": 3.023862838745117, "learning_rate": 2.007766990291262e-05, "loss": 0.1729, "step": 6164 }, { "epoch": 59.77710843373494, "grad_norm": 8.257643699645996, "learning_rate": 2.007281553398058e-05, "loss": 0.2514, "step": 6165 }, { "epoch": 59.78674698795181, "grad_norm": 39.58168029785156, "learning_rate": 2.0067961165048542e-05, "loss": 0.1898, "step": 6166 }, { "epoch": 59.79638554216867, "grad_norm": 3.557767629623413, "learning_rate": 2.0063106796116507e-05, "loss": 0.2398, "step": 6167 }, { "epoch": 59.80602409638554, "grad_norm": 8.992487907409668, "learning_rate": 2.0058252427184468e-05, "loss": 0.1392, "step": 6168 }, { "epoch": 59.81566265060241, "grad_norm": 5.1073317527771, "learning_rate": 2.005339805825243e-05, "loss": 0.2504, "step": 6169 }, { "epoch": 59.825301204819276, "grad_norm": 7.409968852996826, "learning_rate": 2.004854368932039e-05, "loss": 0.4108, "step": 6170 }, { "epoch": 59.834939759036146, "grad_norm": 3.2630159854888916, "learning_rate": 2.004368932038835e-05, "loss": 0.171, "step": 6171 }, { "epoch": 59.84457831325301, "grad_norm": 7.934826374053955, "learning_rate": 2.0038834951456312e-05, "loss": 0.2125, "step": 6172 }, { "epoch": 59.85421686746988, "grad_norm": 3.6972782611846924, "learning_rate": 2.0033980582524274e-05, "loss": 0.1268, "step": 6173 }, { "epoch": 59.86385542168675, "grad_norm": 6.655429840087891, "learning_rate": 2.0029126213592235e-05, "loss": 0.2261, "step": 6174 }, { "epoch": 59.873493975903614, "grad_norm": 2.5239756107330322, "learning_rate": 2.0024271844660196e-05, "loss": 0.0971, "step": 6175 }, { "epoch": 59.883132530120484, "grad_norm": 2.818634510040283, "learning_rate": 2.0019417475728157e-05, "loss": 0.347, "step": 6176 }, { "epoch": 59.89277108433735, "grad_norm": 3.2839603424072266, "learning_rate": 2.0014563106796115e-05, "loss": 0.2491, "step": 6177 }, { "epoch": 59.90240963855422, "grad_norm": 9.807039260864258, "learning_rate": 2.0009708737864076e-05, "loss": 0.3991, "step": 6178 }, { "epoch": 59.91204819277108, "grad_norm": 16.731056213378906, "learning_rate": 2.000485436893204e-05, "loss": 0.1703, "step": 6179 }, { "epoch": 59.92168674698795, "grad_norm": 3.1374502182006836, "learning_rate": 2e-05, "loss": 0.1538, "step": 6180 }, { "epoch": 59.93132530120482, "grad_norm": 9.470355033874512, "learning_rate": 1.9995145631067963e-05, "loss": 0.2155, "step": 6181 }, { "epoch": 59.940963855421685, "grad_norm": 7.115716934204102, "learning_rate": 1.9990291262135924e-05, "loss": 0.3806, "step": 6182 }, { "epoch": 59.950602409638556, "grad_norm": 2.001962423324585, "learning_rate": 1.9985436893203885e-05, "loss": 0.4133, "step": 6183 }, { "epoch": 59.96024096385542, "grad_norm": 5.9186787605285645, "learning_rate": 1.9980582524271846e-05, "loss": 0.2726, "step": 6184 }, { "epoch": 59.96987951807229, "grad_norm": 4.243680953979492, "learning_rate": 1.9975728155339807e-05, "loss": 0.3619, "step": 6185 }, { "epoch": 59.97951807228916, "grad_norm": 8.467835426330566, "learning_rate": 1.997087378640777e-05, "loss": 0.2382, "step": 6186 }, { "epoch": 59.98915662650602, "grad_norm": 2.735024929046631, "learning_rate": 1.996601941747573e-05, "loss": 0.1826, "step": 6187 }, { "epoch": 60.00481927710843, "grad_norm": 16.41693115234375, "learning_rate": 1.996116504854369e-05, "loss": 0.1523, "step": 6188 }, { "epoch": 60.0144578313253, "grad_norm": 9.476831436157227, "learning_rate": 1.9956310679611652e-05, "loss": 0.5616, "step": 6189 }, { "epoch": 60.024096385542165, "grad_norm": 27.716611862182617, "learning_rate": 1.9951456310679613e-05, "loss": 0.363, "step": 6190 }, { "epoch": 60.033734939759036, "grad_norm": 12.795563697814941, "learning_rate": 1.994660194174757e-05, "loss": 0.1183, "step": 6191 }, { "epoch": 60.043373493975906, "grad_norm": 9.427631378173828, "learning_rate": 1.9941747572815535e-05, "loss": 0.494, "step": 6192 }, { "epoch": 60.05301204819277, "grad_norm": 7.65744686126709, "learning_rate": 1.9936893203883497e-05, "loss": 0.3499, "step": 6193 }, { "epoch": 60.06265060240964, "grad_norm": 5.1967692375183105, "learning_rate": 1.9932038834951458e-05, "loss": 0.2116, "step": 6194 }, { "epoch": 60.0722891566265, "grad_norm": 3.6593146324157715, "learning_rate": 1.992718446601942e-05, "loss": 0.4513, "step": 6195 }, { "epoch": 60.081927710843374, "grad_norm": 3.7075600624084473, "learning_rate": 1.992233009708738e-05, "loss": 0.3291, "step": 6196 }, { "epoch": 60.091566265060244, "grad_norm": 11.164424896240234, "learning_rate": 1.991747572815534e-05, "loss": 0.266, "step": 6197 }, { "epoch": 60.10120481927711, "grad_norm": 3.332989454269409, "learning_rate": 1.9912621359223302e-05, "loss": 0.4596, "step": 6198 }, { "epoch": 60.11084337349398, "grad_norm": 4.683569431304932, "learning_rate": 1.9907766990291263e-05, "loss": 0.2126, "step": 6199 }, { "epoch": 60.12048192771084, "grad_norm": 7.082148551940918, "learning_rate": 1.9902912621359225e-05, "loss": 0.242, "step": 6200 }, { "epoch": 60.13012048192771, "grad_norm": 5.38917350769043, "learning_rate": 1.9898058252427186e-05, "loss": 0.2681, "step": 6201 }, { "epoch": 60.139759036144575, "grad_norm": 10.474862098693848, "learning_rate": 1.9893203883495147e-05, "loss": 0.2226, "step": 6202 }, { "epoch": 60.149397590361446, "grad_norm": 5.139820098876953, "learning_rate": 1.9888349514563108e-05, "loss": 0.4092, "step": 6203 }, { "epoch": 60.159036144578316, "grad_norm": 3.0911219120025635, "learning_rate": 1.988349514563107e-05, "loss": 0.1385, "step": 6204 }, { "epoch": 60.16867469879518, "grad_norm": 1.8408713340759277, "learning_rate": 1.987864077669903e-05, "loss": 0.1534, "step": 6205 }, { "epoch": 60.17831325301205, "grad_norm": 8.33565902709961, "learning_rate": 1.987378640776699e-05, "loss": 0.0871, "step": 6206 }, { "epoch": 60.18795180722891, "grad_norm": 6.6789631843566895, "learning_rate": 1.9868932038834953e-05, "loss": 0.3521, "step": 6207 }, { "epoch": 60.19759036144578, "grad_norm": 5.801967620849609, "learning_rate": 1.9864077669902914e-05, "loss": 0.1982, "step": 6208 }, { "epoch": 60.207228915662654, "grad_norm": 10.47492504119873, "learning_rate": 1.9859223300970875e-05, "loss": 0.1771, "step": 6209 }, { "epoch": 60.21686746987952, "grad_norm": 5.527388095855713, "learning_rate": 1.9854368932038836e-05, "loss": 0.2303, "step": 6210 }, { "epoch": 60.22650602409639, "grad_norm": 11.822787284851074, "learning_rate": 1.9849514563106797e-05, "loss": 0.3105, "step": 6211 }, { "epoch": 60.23614457831325, "grad_norm": 6.598188400268555, "learning_rate": 1.9844660194174758e-05, "loss": 0.1132, "step": 6212 }, { "epoch": 60.24578313253012, "grad_norm": 7.93485164642334, "learning_rate": 1.983980582524272e-05, "loss": 0.5929, "step": 6213 }, { "epoch": 60.255421686746985, "grad_norm": 11.050250053405762, "learning_rate": 1.983495145631068e-05, "loss": 0.178, "step": 6214 }, { "epoch": 60.265060240963855, "grad_norm": 34.45709991455078, "learning_rate": 1.9830097087378642e-05, "loss": 0.1786, "step": 6215 }, { "epoch": 60.274698795180726, "grad_norm": 4.152299404144287, "learning_rate": 1.9825242718446603e-05, "loss": 0.3065, "step": 6216 }, { "epoch": 60.28433734939759, "grad_norm": 3.315532922744751, "learning_rate": 1.9820388349514564e-05, "loss": 0.2538, "step": 6217 }, { "epoch": 60.29397590361446, "grad_norm": 2.740886688232422, "learning_rate": 1.9815533980582525e-05, "loss": 0.1484, "step": 6218 }, { "epoch": 60.30361445783132, "grad_norm": 8.949191093444824, "learning_rate": 1.9810679611650486e-05, "loss": 0.264, "step": 6219 }, { "epoch": 60.31325301204819, "grad_norm": 14.59123420715332, "learning_rate": 1.9805825242718447e-05, "loss": 0.2281, "step": 6220 }, { "epoch": 60.32289156626506, "grad_norm": 13.97864818572998, "learning_rate": 1.980097087378641e-05, "loss": 0.1901, "step": 6221 }, { "epoch": 60.33253012048193, "grad_norm": 9.304489135742188, "learning_rate": 1.979611650485437e-05, "loss": 0.2237, "step": 6222 }, { "epoch": 60.3421686746988, "grad_norm": 5.756253719329834, "learning_rate": 1.979126213592233e-05, "loss": 0.2849, "step": 6223 }, { "epoch": 60.35180722891566, "grad_norm": 3.8582468032836914, "learning_rate": 1.9786407766990292e-05, "loss": 0.2971, "step": 6224 }, { "epoch": 60.36144578313253, "grad_norm": 3.363542318344116, "learning_rate": 1.9781553398058253e-05, "loss": 0.2297, "step": 6225 }, { "epoch": 60.371084337349394, "grad_norm": 2.81164288520813, "learning_rate": 1.9776699029126214e-05, "loss": 0.128, "step": 6226 }, { "epoch": 60.380722891566265, "grad_norm": 33.057373046875, "learning_rate": 1.9771844660194175e-05, "loss": 0.3885, "step": 6227 }, { "epoch": 60.390361445783135, "grad_norm": 3.1626834869384766, "learning_rate": 1.9766990291262137e-05, "loss": 0.2538, "step": 6228 }, { "epoch": 60.4, "grad_norm": 4.332249641418457, "learning_rate": 1.9762135922330098e-05, "loss": 0.2051, "step": 6229 }, { "epoch": 60.40963855421687, "grad_norm": 9.177361488342285, "learning_rate": 1.975728155339806e-05, "loss": 0.1994, "step": 6230 }, { "epoch": 60.41927710843373, "grad_norm": 12.609395980834961, "learning_rate": 1.975242718446602e-05, "loss": 0.3183, "step": 6231 }, { "epoch": 60.4289156626506, "grad_norm": 2.7161197662353516, "learning_rate": 1.974757281553398e-05, "loss": 0.1748, "step": 6232 }, { "epoch": 60.43855421686747, "grad_norm": 12.521734237670898, "learning_rate": 1.9742718446601942e-05, "loss": 0.3733, "step": 6233 }, { "epoch": 60.44819277108434, "grad_norm": 3.9564764499664307, "learning_rate": 1.9737864077669904e-05, "loss": 0.1882, "step": 6234 }, { "epoch": 60.45783132530121, "grad_norm": 2.7667694091796875, "learning_rate": 1.9733009708737865e-05, "loss": 0.1397, "step": 6235 }, { "epoch": 60.46746987951807, "grad_norm": 3.982801675796509, "learning_rate": 1.9728155339805826e-05, "loss": 0.101, "step": 6236 }, { "epoch": 60.47710843373494, "grad_norm": 3.0623109340667725, "learning_rate": 1.9723300970873787e-05, "loss": 0.3475, "step": 6237 }, { "epoch": 60.486746987951804, "grad_norm": 1.7323493957519531, "learning_rate": 1.9718446601941748e-05, "loss": 0.0858, "step": 6238 }, { "epoch": 60.496385542168674, "grad_norm": 2.80721116065979, "learning_rate": 1.971359223300971e-05, "loss": 0.1371, "step": 6239 }, { "epoch": 60.506024096385545, "grad_norm": 3.694031000137329, "learning_rate": 1.970873786407767e-05, "loss": 0.2367, "step": 6240 }, { "epoch": 60.51566265060241, "grad_norm": 17.061262130737305, "learning_rate": 1.970388349514563e-05, "loss": 0.1993, "step": 6241 }, { "epoch": 60.52530120481928, "grad_norm": 21.944072723388672, "learning_rate": 1.9699029126213596e-05, "loss": 0.2367, "step": 6242 }, { "epoch": 60.53493975903614, "grad_norm": 2.7354378700256348, "learning_rate": 1.9694174757281554e-05, "loss": 0.1975, "step": 6243 }, { "epoch": 60.54457831325301, "grad_norm": 21.821054458618164, "learning_rate": 1.9689320388349515e-05, "loss": 0.2745, "step": 6244 }, { "epoch": 60.55421686746988, "grad_norm": 4.314064025878906, "learning_rate": 1.9684466019417476e-05, "loss": 0.2766, "step": 6245 }, { "epoch": 60.563855421686746, "grad_norm": 2.391838788986206, "learning_rate": 1.9679611650485437e-05, "loss": 0.2422, "step": 6246 }, { "epoch": 60.57349397590362, "grad_norm": 3.5229032039642334, "learning_rate": 1.96747572815534e-05, "loss": 0.1766, "step": 6247 }, { "epoch": 60.58313253012048, "grad_norm": 3.8041067123413086, "learning_rate": 1.966990291262136e-05, "loss": 0.0736, "step": 6248 }, { "epoch": 60.59277108433735, "grad_norm": 6.281168460845947, "learning_rate": 1.966504854368932e-05, "loss": 0.237, "step": 6249 }, { "epoch": 60.602409638554214, "grad_norm": 13.528611183166504, "learning_rate": 1.9660194174757282e-05, "loss": 0.265, "step": 6250 }, { "epoch": 60.612048192771084, "grad_norm": 3.0086400508880615, "learning_rate": 1.9655339805825243e-05, "loss": 0.2817, "step": 6251 }, { "epoch": 60.621686746987955, "grad_norm": 3.3608224391937256, "learning_rate": 1.9650485436893204e-05, "loss": 0.3027, "step": 6252 }, { "epoch": 60.63132530120482, "grad_norm": 46.48912048339844, "learning_rate": 1.9645631067961165e-05, "loss": 0.3295, "step": 6253 }, { "epoch": 60.64096385542169, "grad_norm": 10.001876831054688, "learning_rate": 1.9640776699029126e-05, "loss": 0.2593, "step": 6254 }, { "epoch": 60.65060240963855, "grad_norm": 16.269325256347656, "learning_rate": 1.963592233009709e-05, "loss": 0.2678, "step": 6255 }, { "epoch": 60.66024096385542, "grad_norm": 5.90783166885376, "learning_rate": 1.9631067961165052e-05, "loss": 0.1732, "step": 6256 }, { "epoch": 60.66987951807229, "grad_norm": 6.490077018737793, "learning_rate": 1.962621359223301e-05, "loss": 0.2876, "step": 6257 }, { "epoch": 60.679518072289156, "grad_norm": 2.4800264835357666, "learning_rate": 1.962135922330097e-05, "loss": 0.195, "step": 6258 }, { "epoch": 60.689156626506026, "grad_norm": 5.627487659454346, "learning_rate": 1.9616504854368932e-05, "loss": 0.2145, "step": 6259 }, { "epoch": 60.69879518072289, "grad_norm": 3.499220132827759, "learning_rate": 1.9611650485436893e-05, "loss": 0.1776, "step": 6260 }, { "epoch": 60.70843373493976, "grad_norm": 2.3786261081695557, "learning_rate": 1.9606796116504854e-05, "loss": 0.054, "step": 6261 }, { "epoch": 60.71807228915662, "grad_norm": 13.167119026184082, "learning_rate": 1.9601941747572816e-05, "loss": 0.158, "step": 6262 }, { "epoch": 60.727710843373494, "grad_norm": 11.221871376037598, "learning_rate": 1.9597087378640777e-05, "loss": 0.1473, "step": 6263 }, { "epoch": 60.737349397590364, "grad_norm": 8.381471633911133, "learning_rate": 1.9592233009708738e-05, "loss": 0.413, "step": 6264 }, { "epoch": 60.74698795180723, "grad_norm": 17.889616012573242, "learning_rate": 1.95873786407767e-05, "loss": 0.1489, "step": 6265 }, { "epoch": 60.7566265060241, "grad_norm": 5.893215656280518, "learning_rate": 1.958252427184466e-05, "loss": 0.2016, "step": 6266 }, { "epoch": 60.76626506024096, "grad_norm": 2.436547040939331, "learning_rate": 1.957766990291262e-05, "loss": 0.3385, "step": 6267 }, { "epoch": 60.77590361445783, "grad_norm": 5.081252098083496, "learning_rate": 1.9572815533980586e-05, "loss": 0.2366, "step": 6268 }, { "epoch": 60.7855421686747, "grad_norm": 4.396456718444824, "learning_rate": 1.9567961165048547e-05, "loss": 0.2332, "step": 6269 }, { "epoch": 60.795180722891565, "grad_norm": 8.162766456604004, "learning_rate": 1.9563106796116508e-05, "loss": 0.3022, "step": 6270 }, { "epoch": 60.804819277108436, "grad_norm": 10.14670467376709, "learning_rate": 1.9558252427184466e-05, "loss": 0.3894, "step": 6271 }, { "epoch": 60.8144578313253, "grad_norm": 3.382152795791626, "learning_rate": 1.9553398058252427e-05, "loss": 0.3536, "step": 6272 }, { "epoch": 60.82409638554217, "grad_norm": 11.872448921203613, "learning_rate": 1.9548543689320388e-05, "loss": 0.4089, "step": 6273 }, { "epoch": 60.83373493975903, "grad_norm": 8.208964347839355, "learning_rate": 1.954368932038835e-05, "loss": 0.199, "step": 6274 }, { "epoch": 60.8433734939759, "grad_norm": 5.184309005737305, "learning_rate": 1.953883495145631e-05, "loss": 0.2191, "step": 6275 }, { "epoch": 60.853012048192774, "grad_norm": 6.611877918243408, "learning_rate": 1.953398058252427e-05, "loss": 0.2464, "step": 6276 }, { "epoch": 60.86265060240964, "grad_norm": 9.670797348022461, "learning_rate": 1.9529126213592233e-05, "loss": 0.2592, "step": 6277 }, { "epoch": 60.87228915662651, "grad_norm": 20.363269805908203, "learning_rate": 1.9524271844660194e-05, "loss": 0.1202, "step": 6278 }, { "epoch": 60.88192771084337, "grad_norm": 1.4559906721115112, "learning_rate": 1.9519417475728155e-05, "loss": 0.09, "step": 6279 }, { "epoch": 60.89156626506024, "grad_norm": 6.537635326385498, "learning_rate": 1.951456310679612e-05, "loss": 0.2596, "step": 6280 }, { "epoch": 60.90120481927711, "grad_norm": 13.724369049072266, "learning_rate": 1.950970873786408e-05, "loss": 0.4173, "step": 6281 }, { "epoch": 60.910843373493975, "grad_norm": 17.689966201782227, "learning_rate": 1.9504854368932042e-05, "loss": 0.3042, "step": 6282 }, { "epoch": 60.920481927710846, "grad_norm": 11.430673599243164, "learning_rate": 1.9500000000000003e-05, "loss": 0.2174, "step": 6283 }, { "epoch": 60.93012048192771, "grad_norm": 21.51195526123047, "learning_rate": 1.9495145631067964e-05, "loss": 0.2305, "step": 6284 }, { "epoch": 60.93975903614458, "grad_norm": 5.2149810791015625, "learning_rate": 1.9490291262135922e-05, "loss": 0.4216, "step": 6285 }, { "epoch": 60.94939759036144, "grad_norm": 2.0237135887145996, "learning_rate": 1.9485436893203883e-05, "loss": 0.1418, "step": 6286 }, { "epoch": 60.95903614457831, "grad_norm": 4.5068840980529785, "learning_rate": 1.9480582524271844e-05, "loss": 0.2094, "step": 6287 }, { "epoch": 60.96867469879518, "grad_norm": 11.527152061462402, "learning_rate": 1.9475728155339805e-05, "loss": 0.306, "step": 6288 }, { "epoch": 60.97831325301205, "grad_norm": 2.873648166656494, "learning_rate": 1.9470873786407767e-05, "loss": 0.117, "step": 6289 }, { "epoch": 60.98795180722892, "grad_norm": 3.5838611125946045, "learning_rate": 1.9466019417475728e-05, "loss": 0.3885, "step": 6290 }, { "epoch": 61.003614457831326, "grad_norm": 3.868579387664795, "learning_rate": 1.946116504854369e-05, "loss": 0.1127, "step": 6291 }, { "epoch": 61.013253012048196, "grad_norm": 7.453570365905762, "learning_rate": 1.945631067961165e-05, "loss": 0.2198, "step": 6292 }, { "epoch": 61.02289156626506, "grad_norm": 5.842768669128418, "learning_rate": 1.9451456310679614e-05, "loss": 0.2741, "step": 6293 }, { "epoch": 61.03253012048193, "grad_norm": 6.175844192504883, "learning_rate": 1.9446601941747576e-05, "loss": 0.4933, "step": 6294 }, { "epoch": 61.04216867469879, "grad_norm": 4.599715232849121, "learning_rate": 1.9441747572815537e-05, "loss": 0.182, "step": 6295 }, { "epoch": 61.05180722891566, "grad_norm": 5.680471897125244, "learning_rate": 1.9436893203883498e-05, "loss": 0.2173, "step": 6296 }, { "epoch": 61.06144578313253, "grad_norm": 4.404165744781494, "learning_rate": 1.943203883495146e-05, "loss": 0.198, "step": 6297 }, { "epoch": 61.0710843373494, "grad_norm": 7.549442768096924, "learning_rate": 1.942718446601942e-05, "loss": 0.4293, "step": 6298 }, { "epoch": 61.08072289156627, "grad_norm": 7.012890338897705, "learning_rate": 1.9422330097087378e-05, "loss": 0.3318, "step": 6299 }, { "epoch": 61.09036144578313, "grad_norm": 3.5747733116149902, "learning_rate": 1.941747572815534e-05, "loss": 0.2299, "step": 6300 }, { "epoch": 61.1, "grad_norm": 7.008413791656494, "learning_rate": 1.94126213592233e-05, "loss": 0.3727, "step": 6301 }, { "epoch": 61.109638554216865, "grad_norm": 11.888433456420898, "learning_rate": 1.940776699029126e-05, "loss": 0.3032, "step": 6302 }, { "epoch": 61.119277108433735, "grad_norm": 37.85762405395508, "learning_rate": 1.9402912621359223e-05, "loss": 0.2727, "step": 6303 }, { "epoch": 61.128915662650606, "grad_norm": 8.920494079589844, "learning_rate": 1.9398058252427184e-05, "loss": 0.4584, "step": 6304 }, { "epoch": 61.13855421686747, "grad_norm": 6.337416648864746, "learning_rate": 1.9393203883495148e-05, "loss": 0.2737, "step": 6305 }, { "epoch": 61.14819277108434, "grad_norm": 2.738138198852539, "learning_rate": 1.938834951456311e-05, "loss": 0.1669, "step": 6306 }, { "epoch": 61.1578313253012, "grad_norm": 6.222540855407715, "learning_rate": 1.938349514563107e-05, "loss": 0.1174, "step": 6307 }, { "epoch": 61.16746987951807, "grad_norm": 4.371759414672852, "learning_rate": 1.937864077669903e-05, "loss": 0.1058, "step": 6308 }, { "epoch": 61.17710843373494, "grad_norm": 11.907783508300781, "learning_rate": 1.9373786407766993e-05, "loss": 0.1783, "step": 6309 }, { "epoch": 61.18674698795181, "grad_norm": 6.457304954528809, "learning_rate": 1.9368932038834954e-05, "loss": 0.4905, "step": 6310 }, { "epoch": 61.19638554216868, "grad_norm": 2.857980966567993, "learning_rate": 1.9364077669902915e-05, "loss": 0.1904, "step": 6311 }, { "epoch": 61.20602409638554, "grad_norm": 1.2029941082000732, "learning_rate": 1.9359223300970873e-05, "loss": 0.1523, "step": 6312 }, { "epoch": 61.21566265060241, "grad_norm": 27.088327407836914, "learning_rate": 1.9354368932038834e-05, "loss": 0.3632, "step": 6313 }, { "epoch": 61.225301204819274, "grad_norm": 16.619279861450195, "learning_rate": 1.9349514563106795e-05, "loss": 0.218, "step": 6314 }, { "epoch": 61.234939759036145, "grad_norm": 6.598398685455322, "learning_rate": 1.9344660194174756e-05, "loss": 0.2703, "step": 6315 }, { "epoch": 61.244578313253015, "grad_norm": 4.799069881439209, "learning_rate": 1.9339805825242717e-05, "loss": 0.266, "step": 6316 }, { "epoch": 61.25421686746988, "grad_norm": 16.752620697021484, "learning_rate": 1.933495145631068e-05, "loss": 0.2725, "step": 6317 }, { "epoch": 61.26385542168675, "grad_norm": 11.634900093078613, "learning_rate": 1.9330097087378643e-05, "loss": 0.4603, "step": 6318 }, { "epoch": 61.27349397590361, "grad_norm": 6.937582969665527, "learning_rate": 1.9325242718446604e-05, "loss": 0.2944, "step": 6319 }, { "epoch": 61.28313253012048, "grad_norm": 5.183067798614502, "learning_rate": 1.9320388349514565e-05, "loss": 0.3228, "step": 6320 }, { "epoch": 61.292771084337346, "grad_norm": 11.733434677124023, "learning_rate": 1.9315533980582527e-05, "loss": 0.3036, "step": 6321 }, { "epoch": 61.30240963855422, "grad_norm": 4.365450859069824, "learning_rate": 1.9310679611650488e-05, "loss": 0.1483, "step": 6322 }, { "epoch": 61.31204819277109, "grad_norm": 5.57679557800293, "learning_rate": 1.930582524271845e-05, "loss": 0.2642, "step": 6323 }, { "epoch": 61.32168674698795, "grad_norm": 4.279637813568115, "learning_rate": 1.930097087378641e-05, "loss": 0.2854, "step": 6324 }, { "epoch": 61.33132530120482, "grad_norm": 2.720757007598877, "learning_rate": 1.929611650485437e-05, "loss": 0.2358, "step": 6325 }, { "epoch": 61.340963855421684, "grad_norm": 13.206584930419922, "learning_rate": 1.929126213592233e-05, "loss": 0.2998, "step": 6326 }, { "epoch": 61.350602409638554, "grad_norm": 3.227292060852051, "learning_rate": 1.928640776699029e-05, "loss": 0.1858, "step": 6327 }, { "epoch": 61.360240963855425, "grad_norm": 14.809481620788574, "learning_rate": 1.928155339805825e-05, "loss": 0.3401, "step": 6328 }, { "epoch": 61.36987951807229, "grad_norm": 4.080522060394287, "learning_rate": 1.9276699029126212e-05, "loss": 0.262, "step": 6329 }, { "epoch": 61.37951807228916, "grad_norm": 16.279634475708008, "learning_rate": 1.9271844660194173e-05, "loss": 0.4239, "step": 6330 }, { "epoch": 61.38915662650602, "grad_norm": 2.2303218841552734, "learning_rate": 1.9266990291262138e-05, "loss": 0.1224, "step": 6331 }, { "epoch": 61.39879518072289, "grad_norm": 6.65068244934082, "learning_rate": 1.92621359223301e-05, "loss": 0.1438, "step": 6332 }, { "epoch": 61.408433734939756, "grad_norm": 3.5699546337127686, "learning_rate": 1.925728155339806e-05, "loss": 0.2202, "step": 6333 }, { "epoch": 61.418072289156626, "grad_norm": 26.107303619384766, "learning_rate": 1.925242718446602e-05, "loss": 0.2931, "step": 6334 }, { "epoch": 61.4277108433735, "grad_norm": 3.949767589569092, "learning_rate": 1.9247572815533983e-05, "loss": 0.126, "step": 6335 }, { "epoch": 61.43734939759036, "grad_norm": 13.140693664550781, "learning_rate": 1.9242718446601944e-05, "loss": 0.3403, "step": 6336 }, { "epoch": 61.44698795180723, "grad_norm": 4.980710029602051, "learning_rate": 1.9237864077669905e-05, "loss": 0.3111, "step": 6337 }, { "epoch": 61.456626506024094, "grad_norm": 2.110072374343872, "learning_rate": 1.9233009708737866e-05, "loss": 0.1783, "step": 6338 }, { "epoch": 61.466265060240964, "grad_norm": 7.481468200683594, "learning_rate": 1.9228155339805827e-05, "loss": 0.2158, "step": 6339 }, { "epoch": 61.475903614457835, "grad_norm": 2.657203197479248, "learning_rate": 1.9223300970873785e-05, "loss": 0.291, "step": 6340 }, { "epoch": 61.4855421686747, "grad_norm": 4.03296422958374, "learning_rate": 1.9218446601941746e-05, "loss": 0.301, "step": 6341 }, { "epoch": 61.49518072289157, "grad_norm": 4.255730152130127, "learning_rate": 1.9213592233009707e-05, "loss": 0.2313, "step": 6342 }, { "epoch": 61.50481927710843, "grad_norm": 18.84883689880371, "learning_rate": 1.9208737864077672e-05, "loss": 0.3325, "step": 6343 }, { "epoch": 61.5144578313253, "grad_norm": 12.856209754943848, "learning_rate": 1.9203883495145633e-05, "loss": 0.3104, "step": 6344 }, { "epoch": 61.524096385542165, "grad_norm": 14.574767112731934, "learning_rate": 1.9199029126213594e-05, "loss": 0.3618, "step": 6345 }, { "epoch": 61.533734939759036, "grad_norm": 11.739580154418945, "learning_rate": 1.9194174757281555e-05, "loss": 0.237, "step": 6346 }, { "epoch": 61.543373493975906, "grad_norm": 4.444575786590576, "learning_rate": 1.9189320388349516e-05, "loss": 0.1632, "step": 6347 }, { "epoch": 61.55301204819277, "grad_norm": 3.3174057006835938, "learning_rate": 1.9184466019417478e-05, "loss": 0.0737, "step": 6348 }, { "epoch": 61.56265060240964, "grad_norm": 14.087471008300781, "learning_rate": 1.917961165048544e-05, "loss": 0.2686, "step": 6349 }, { "epoch": 61.5722891566265, "grad_norm": 13.006897926330566, "learning_rate": 1.91747572815534e-05, "loss": 0.194, "step": 6350 }, { "epoch": 61.581927710843374, "grad_norm": 12.82766342163086, "learning_rate": 1.916990291262136e-05, "loss": 0.2874, "step": 6351 }, { "epoch": 61.591566265060244, "grad_norm": 5.40841007232666, "learning_rate": 1.9165048543689322e-05, "loss": 0.3345, "step": 6352 }, { "epoch": 61.60120481927711, "grad_norm": 4.371339321136475, "learning_rate": 1.9160194174757283e-05, "loss": 0.1009, "step": 6353 }, { "epoch": 61.61084337349398, "grad_norm": 12.000500679016113, "learning_rate": 1.915533980582524e-05, "loss": 0.3178, "step": 6354 }, { "epoch": 61.62048192771084, "grad_norm": 5.000463962554932, "learning_rate": 1.9150485436893202e-05, "loss": 0.2038, "step": 6355 }, { "epoch": 61.63012048192771, "grad_norm": 10.122700691223145, "learning_rate": 1.9145631067961167e-05, "loss": 0.2756, "step": 6356 }, { "epoch": 61.639759036144575, "grad_norm": 8.847368240356445, "learning_rate": 1.9140776699029128e-05, "loss": 0.1469, "step": 6357 }, { "epoch": 61.649397590361446, "grad_norm": 3.8841965198516846, "learning_rate": 1.913592233009709e-05, "loss": 0.1784, "step": 6358 }, { "epoch": 61.659036144578316, "grad_norm": 8.978529930114746, "learning_rate": 1.913106796116505e-05, "loss": 0.263, "step": 6359 }, { "epoch": 61.66867469879518, "grad_norm": 5.22803258895874, "learning_rate": 1.912621359223301e-05, "loss": 0.3676, "step": 6360 }, { "epoch": 61.67831325301205, "grad_norm": 52.249351501464844, "learning_rate": 1.9121359223300972e-05, "loss": 0.4316, "step": 6361 }, { "epoch": 61.68795180722891, "grad_norm": 3.99702525138855, "learning_rate": 1.9116504854368934e-05, "loss": 0.1967, "step": 6362 }, { "epoch": 61.69759036144578, "grad_norm": 46.032737731933594, "learning_rate": 1.9111650485436895e-05, "loss": 0.2077, "step": 6363 }, { "epoch": 61.707228915662654, "grad_norm": 2.484968900680542, "learning_rate": 1.9106796116504856e-05, "loss": 0.1554, "step": 6364 }, { "epoch": 61.71686746987952, "grad_norm": 5.97007942199707, "learning_rate": 1.9101941747572817e-05, "loss": 0.2268, "step": 6365 }, { "epoch": 61.72650602409639, "grad_norm": 15.404288291931152, "learning_rate": 1.9097087378640778e-05, "loss": 0.211, "step": 6366 }, { "epoch": 61.73614457831325, "grad_norm": 5.1402130126953125, "learning_rate": 1.909223300970874e-05, "loss": 0.184, "step": 6367 }, { "epoch": 61.74578313253012, "grad_norm": 6.3844828605651855, "learning_rate": 1.9087378640776697e-05, "loss": 0.3235, "step": 6368 }, { "epoch": 61.755421686746985, "grad_norm": 2.232191801071167, "learning_rate": 1.908252427184466e-05, "loss": 0.2452, "step": 6369 }, { "epoch": 61.765060240963855, "grad_norm": 3.0368428230285645, "learning_rate": 1.9077669902912623e-05, "loss": 0.1875, "step": 6370 }, { "epoch": 61.774698795180726, "grad_norm": 6.166379928588867, "learning_rate": 1.9072815533980584e-05, "loss": 0.3108, "step": 6371 }, { "epoch": 61.78433734939759, "grad_norm": 4.540817737579346, "learning_rate": 1.9067961165048545e-05, "loss": 0.1443, "step": 6372 }, { "epoch": 61.79397590361446, "grad_norm": 3.4988582134246826, "learning_rate": 1.9063106796116506e-05, "loss": 0.106, "step": 6373 }, { "epoch": 61.80361445783132, "grad_norm": 12.675655364990234, "learning_rate": 1.9058252427184467e-05, "loss": 0.2784, "step": 6374 }, { "epoch": 61.81325301204819, "grad_norm": 10.708345413208008, "learning_rate": 1.905339805825243e-05, "loss": 0.2892, "step": 6375 }, { "epoch": 61.82289156626506, "grad_norm": 5.49843168258667, "learning_rate": 1.904854368932039e-05, "loss": 0.3434, "step": 6376 }, { "epoch": 61.83253012048193, "grad_norm": 7.781121253967285, "learning_rate": 1.904368932038835e-05, "loss": 0.2683, "step": 6377 }, { "epoch": 61.8421686746988, "grad_norm": 1.4959056377410889, "learning_rate": 1.9038834951456312e-05, "loss": 0.0635, "step": 6378 }, { "epoch": 61.85180722891566, "grad_norm": 2.997638702392578, "learning_rate": 1.9033980582524273e-05, "loss": 0.1536, "step": 6379 }, { "epoch": 61.86144578313253, "grad_norm": 17.613140106201172, "learning_rate": 1.9029126213592234e-05, "loss": 0.3035, "step": 6380 }, { "epoch": 61.871084337349394, "grad_norm": 3.86350417137146, "learning_rate": 1.9024271844660195e-05, "loss": 0.255, "step": 6381 }, { "epoch": 61.880722891566265, "grad_norm": 4.0862627029418945, "learning_rate": 1.9019417475728156e-05, "loss": 0.3219, "step": 6382 }, { "epoch": 61.890361445783135, "grad_norm": 3.1979968547821045, "learning_rate": 1.9014563106796118e-05, "loss": 0.145, "step": 6383 }, { "epoch": 61.9, "grad_norm": 8.922184944152832, "learning_rate": 1.900970873786408e-05, "loss": 0.4137, "step": 6384 }, { "epoch": 61.90963855421687, "grad_norm": 3.1145567893981934, "learning_rate": 1.900485436893204e-05, "loss": 0.2695, "step": 6385 }, { "epoch": 61.91927710843373, "grad_norm": 4.177104473114014, "learning_rate": 1.9e-05, "loss": 0.206, "step": 6386 }, { "epoch": 61.9289156626506, "grad_norm": 6.667459011077881, "learning_rate": 1.8995145631067962e-05, "loss": 0.2673, "step": 6387 }, { "epoch": 61.93855421686747, "grad_norm": 5.205520153045654, "learning_rate": 1.8990291262135923e-05, "loss": 0.1453, "step": 6388 }, { "epoch": 61.94819277108434, "grad_norm": 5.289984703063965, "learning_rate": 1.8985436893203884e-05, "loss": 0.1804, "step": 6389 }, { "epoch": 61.95783132530121, "grad_norm": 5.939762592315674, "learning_rate": 1.8980582524271846e-05, "loss": 0.4527, "step": 6390 }, { "epoch": 61.96746987951807, "grad_norm": 4.504059791564941, "learning_rate": 1.8975728155339807e-05, "loss": 0.2582, "step": 6391 }, { "epoch": 61.97710843373494, "grad_norm": 15.437551498413086, "learning_rate": 1.8970873786407768e-05, "loss": 0.3057, "step": 6392 }, { "epoch": 61.986746987951804, "grad_norm": 5.003702163696289, "learning_rate": 1.896601941747573e-05, "loss": 0.2282, "step": 6393 }, { "epoch": 62.00240963855422, "grad_norm": 3.44108510017395, "learning_rate": 1.896116504854369e-05, "loss": 0.322, "step": 6394 }, { "epoch": 62.01204819277108, "grad_norm": 10.637738227844238, "learning_rate": 1.895631067961165e-05, "loss": 0.2773, "step": 6395 }, { "epoch": 62.02168674698795, "grad_norm": 6.199082374572754, "learning_rate": 1.8951456310679613e-05, "loss": 0.3086, "step": 6396 }, { "epoch": 62.03132530120482, "grad_norm": 5.151947975158691, "learning_rate": 1.8946601941747574e-05, "loss": 0.2659, "step": 6397 }, { "epoch": 62.04096385542169, "grad_norm": 4.341410160064697, "learning_rate": 1.8941747572815535e-05, "loss": 0.1642, "step": 6398 }, { "epoch": 62.05060240963856, "grad_norm": 4.045079231262207, "learning_rate": 1.8936893203883496e-05, "loss": 0.1177, "step": 6399 }, { "epoch": 62.06024096385542, "grad_norm": 1.6444413661956787, "learning_rate": 1.8932038834951457e-05, "loss": 0.2373, "step": 6400 }, { "epoch": 62.06987951807229, "grad_norm": 6.8696699142456055, "learning_rate": 1.8927184466019418e-05, "loss": 0.3339, "step": 6401 }, { "epoch": 62.079518072289154, "grad_norm": 3.528550624847412, "learning_rate": 1.892233009708738e-05, "loss": 0.1008, "step": 6402 }, { "epoch": 62.089156626506025, "grad_norm": 4.445713043212891, "learning_rate": 1.891747572815534e-05, "loss": 0.1615, "step": 6403 }, { "epoch": 62.09879518072289, "grad_norm": 11.17031478881836, "learning_rate": 1.89126213592233e-05, "loss": 0.2641, "step": 6404 }, { "epoch": 62.10843373493976, "grad_norm": 6.218481540679932, "learning_rate": 1.8907766990291263e-05, "loss": 0.2177, "step": 6405 }, { "epoch": 62.11807228915663, "grad_norm": 8.4464111328125, "learning_rate": 1.8902912621359224e-05, "loss": 0.2878, "step": 6406 }, { "epoch": 62.12771084337349, "grad_norm": 12.492119789123535, "learning_rate": 1.8898058252427185e-05, "loss": 0.1245, "step": 6407 }, { "epoch": 62.13734939759036, "grad_norm": 4.372972011566162, "learning_rate": 1.8893203883495146e-05, "loss": 0.1616, "step": 6408 }, { "epoch": 62.146987951807226, "grad_norm": 6.812867164611816, "learning_rate": 1.8888349514563107e-05, "loss": 0.2017, "step": 6409 }, { "epoch": 62.1566265060241, "grad_norm": 51.08679962158203, "learning_rate": 1.888349514563107e-05, "loss": 0.3291, "step": 6410 }, { "epoch": 62.16626506024097, "grad_norm": 5.13352632522583, "learning_rate": 1.887864077669903e-05, "loss": 0.2296, "step": 6411 }, { "epoch": 62.17590361445783, "grad_norm": 5.207038402557373, "learning_rate": 1.887378640776699e-05, "loss": 0.1746, "step": 6412 }, { "epoch": 62.1855421686747, "grad_norm": 3.078223943710327, "learning_rate": 1.8868932038834952e-05, "loss": 0.2362, "step": 6413 }, { "epoch": 62.195180722891564, "grad_norm": 1.2399075031280518, "learning_rate": 1.8864077669902913e-05, "loss": 0.0717, "step": 6414 }, { "epoch": 62.204819277108435, "grad_norm": 8.411497116088867, "learning_rate": 1.8859223300970874e-05, "loss": 0.24, "step": 6415 }, { "epoch": 62.2144578313253, "grad_norm": 6.59587287902832, "learning_rate": 1.8854368932038835e-05, "loss": 0.3035, "step": 6416 }, { "epoch": 62.22409638554217, "grad_norm": 23.20875358581543, "learning_rate": 1.8849514563106797e-05, "loss": 0.2599, "step": 6417 }, { "epoch": 62.23373493975904, "grad_norm": 35.279666900634766, "learning_rate": 1.8844660194174758e-05, "loss": 0.1957, "step": 6418 }, { "epoch": 62.2433734939759, "grad_norm": 22.063852310180664, "learning_rate": 1.8839805825242722e-05, "loss": 0.2078, "step": 6419 }, { "epoch": 62.25301204819277, "grad_norm": 3.5973587036132812, "learning_rate": 1.883495145631068e-05, "loss": 0.1132, "step": 6420 }, { "epoch": 62.262650602409636, "grad_norm": 3.697188138961792, "learning_rate": 1.883009708737864e-05, "loss": 0.2025, "step": 6421 }, { "epoch": 62.272289156626506, "grad_norm": 4.14540958404541, "learning_rate": 1.8825242718446602e-05, "loss": 0.4803, "step": 6422 }, { "epoch": 62.28192771084338, "grad_norm": 3.144305467605591, "learning_rate": 1.8820388349514563e-05, "loss": 0.2872, "step": 6423 }, { "epoch": 62.29156626506024, "grad_norm": 3.675781488418579, "learning_rate": 1.8815533980582525e-05, "loss": 0.248, "step": 6424 }, { "epoch": 62.30120481927711, "grad_norm": 4.666933059692383, "learning_rate": 1.8810679611650486e-05, "loss": 0.2527, "step": 6425 }, { "epoch": 62.310843373493974, "grad_norm": 5.856875896453857, "learning_rate": 1.8805825242718447e-05, "loss": 0.1844, "step": 6426 }, { "epoch": 62.320481927710844, "grad_norm": 2.7021005153656006, "learning_rate": 1.8800970873786408e-05, "loss": 0.2095, "step": 6427 }, { "epoch": 62.33012048192771, "grad_norm": 3.509845733642578, "learning_rate": 1.879611650485437e-05, "loss": 0.1917, "step": 6428 }, { "epoch": 62.33975903614458, "grad_norm": 6.06282901763916, "learning_rate": 1.879126213592233e-05, "loss": 0.2315, "step": 6429 }, { "epoch": 62.34939759036145, "grad_norm": 3.5123870372772217, "learning_rate": 1.878640776699029e-05, "loss": 0.2684, "step": 6430 }, { "epoch": 62.35903614457831, "grad_norm": 6.2379255294799805, "learning_rate": 1.8781553398058253e-05, "loss": 0.2983, "step": 6431 }, { "epoch": 62.36867469879518, "grad_norm": 4.146108627319336, "learning_rate": 1.8776699029126217e-05, "loss": 0.1709, "step": 6432 }, { "epoch": 62.378313253012045, "grad_norm": 3.8548576831817627, "learning_rate": 1.8771844660194178e-05, "loss": 0.2087, "step": 6433 }, { "epoch": 62.387951807228916, "grad_norm": 9.359267234802246, "learning_rate": 1.8766990291262136e-05, "loss": 0.116, "step": 6434 }, { "epoch": 62.397590361445786, "grad_norm": 2.9254374504089355, "learning_rate": 1.8762135922330097e-05, "loss": 0.2395, "step": 6435 }, { "epoch": 62.40722891566265, "grad_norm": 6.134002685546875, "learning_rate": 1.875728155339806e-05, "loss": 0.3417, "step": 6436 }, { "epoch": 62.41686746987952, "grad_norm": 5.497939109802246, "learning_rate": 1.875242718446602e-05, "loss": 0.2536, "step": 6437 }, { "epoch": 62.42650602409638, "grad_norm": 4.480133056640625, "learning_rate": 1.874757281553398e-05, "loss": 0.2326, "step": 6438 }, { "epoch": 62.436144578313254, "grad_norm": 7.169097423553467, "learning_rate": 1.8742718446601942e-05, "loss": 0.3131, "step": 6439 }, { "epoch": 62.44578313253012, "grad_norm": 4.449195384979248, "learning_rate": 1.8737864077669903e-05, "loss": 0.2816, "step": 6440 }, { "epoch": 62.45542168674699, "grad_norm": 8.700122833251953, "learning_rate": 1.8733009708737864e-05, "loss": 0.3432, "step": 6441 }, { "epoch": 62.46506024096386, "grad_norm": 23.897687911987305, "learning_rate": 1.8728155339805825e-05, "loss": 0.4325, "step": 6442 }, { "epoch": 62.47469879518072, "grad_norm": 21.040897369384766, "learning_rate": 1.8723300970873786e-05, "loss": 0.269, "step": 6443 }, { "epoch": 62.48433734939759, "grad_norm": 3.8155548572540283, "learning_rate": 1.871844660194175e-05, "loss": 0.1308, "step": 6444 }, { "epoch": 62.493975903614455, "grad_norm": 23.468017578125, "learning_rate": 1.8713592233009712e-05, "loss": 0.1374, "step": 6445 }, { "epoch": 62.503614457831326, "grad_norm": 4.452372074127197, "learning_rate": 1.8708737864077673e-05, "loss": 0.1904, "step": 6446 }, { "epoch": 62.513253012048196, "grad_norm": 5.503695487976074, "learning_rate": 1.8703883495145634e-05, "loss": 0.3405, "step": 6447 }, { "epoch": 62.52289156626506, "grad_norm": 6.195518493652344, "learning_rate": 1.8699029126213592e-05, "loss": 0.3103, "step": 6448 }, { "epoch": 62.53253012048193, "grad_norm": 5.461397647857666, "learning_rate": 1.8694174757281553e-05, "loss": 0.2195, "step": 6449 }, { "epoch": 62.54216867469879, "grad_norm": 3.9944169521331787, "learning_rate": 1.8689320388349514e-05, "loss": 0.2383, "step": 6450 }, { "epoch": 62.55180722891566, "grad_norm": 5.2607574462890625, "learning_rate": 1.8684466019417476e-05, "loss": 0.2731, "step": 6451 }, { "epoch": 62.56144578313253, "grad_norm": 4.411778926849365, "learning_rate": 1.8679611650485437e-05, "loss": 0.1725, "step": 6452 }, { "epoch": 62.5710843373494, "grad_norm": 11.373614311218262, "learning_rate": 1.8674757281553398e-05, "loss": 0.1436, "step": 6453 }, { "epoch": 62.58072289156627, "grad_norm": 11.305303573608398, "learning_rate": 1.866990291262136e-05, "loss": 0.2773, "step": 6454 }, { "epoch": 62.59036144578313, "grad_norm": 2.2138521671295166, "learning_rate": 1.866504854368932e-05, "loss": 0.1064, "step": 6455 }, { "epoch": 62.6, "grad_norm": 7.464939117431641, "learning_rate": 1.866019417475728e-05, "loss": 0.3405, "step": 6456 }, { "epoch": 62.609638554216865, "grad_norm": 4.2840657234191895, "learning_rate": 1.8655339805825246e-05, "loss": 0.278, "step": 6457 }, { "epoch": 62.619277108433735, "grad_norm": 5.004339694976807, "learning_rate": 1.8650485436893207e-05, "loss": 0.215, "step": 6458 }, { "epoch": 62.628915662650606, "grad_norm": 6.181880474090576, "learning_rate": 1.8645631067961168e-05, "loss": 0.1812, "step": 6459 }, { "epoch": 62.63855421686747, "grad_norm": 6.887350559234619, "learning_rate": 1.864077669902913e-05, "loss": 0.3789, "step": 6460 }, { "epoch": 62.64819277108434, "grad_norm": 2.52447509765625, "learning_rate": 1.8635922330097087e-05, "loss": 0.2835, "step": 6461 }, { "epoch": 62.6578313253012, "grad_norm": 17.284456253051758, "learning_rate": 1.8631067961165048e-05, "loss": 0.3155, "step": 6462 }, { "epoch": 62.66746987951807, "grad_norm": 5.480302333831787, "learning_rate": 1.862621359223301e-05, "loss": 0.223, "step": 6463 }, { "epoch": 62.67710843373494, "grad_norm": 8.097647666931152, "learning_rate": 1.862135922330097e-05, "loss": 0.1413, "step": 6464 }, { "epoch": 62.68674698795181, "grad_norm": 9.199381828308105, "learning_rate": 1.861650485436893e-05, "loss": 0.1877, "step": 6465 }, { "epoch": 62.69638554216868, "grad_norm": 4.0660810470581055, "learning_rate": 1.8611650485436893e-05, "loss": 0.1431, "step": 6466 }, { "epoch": 62.70602409638554, "grad_norm": 11.19238567352295, "learning_rate": 1.8606796116504854e-05, "loss": 0.1957, "step": 6467 }, { "epoch": 62.71566265060241, "grad_norm": 2.4228227138519287, "learning_rate": 1.8601941747572815e-05, "loss": 0.1013, "step": 6468 }, { "epoch": 62.725301204819274, "grad_norm": 7.068685054779053, "learning_rate": 1.8597087378640776e-05, "loss": 0.3276, "step": 6469 }, { "epoch": 62.734939759036145, "grad_norm": 4.095796585083008, "learning_rate": 1.859223300970874e-05, "loss": 0.2491, "step": 6470 }, { "epoch": 62.744578313253015, "grad_norm": 9.30323314666748, "learning_rate": 1.8587378640776702e-05, "loss": 0.5146, "step": 6471 }, { "epoch": 62.75421686746988, "grad_norm": 10.06196403503418, "learning_rate": 1.8582524271844663e-05, "loss": 0.2622, "step": 6472 }, { "epoch": 62.76385542168675, "grad_norm": 8.503377914428711, "learning_rate": 1.8577669902912624e-05, "loss": 0.3647, "step": 6473 }, { "epoch": 62.77349397590361, "grad_norm": 3.7116525173187256, "learning_rate": 1.8572815533980585e-05, "loss": 0.2039, "step": 6474 }, { "epoch": 62.78313253012048, "grad_norm": 17.58473777770996, "learning_rate": 1.8567961165048543e-05, "loss": 0.3064, "step": 6475 }, { "epoch": 62.792771084337346, "grad_norm": 25.144027709960938, "learning_rate": 1.8563106796116504e-05, "loss": 0.376, "step": 6476 }, { "epoch": 62.80240963855422, "grad_norm": 3.557323455810547, "learning_rate": 1.8558252427184465e-05, "loss": 0.1509, "step": 6477 }, { "epoch": 62.81204819277109, "grad_norm": 6.111523628234863, "learning_rate": 1.8553398058252426e-05, "loss": 0.4218, "step": 6478 }, { "epoch": 62.82168674698795, "grad_norm": 8.21865177154541, "learning_rate": 1.8548543689320388e-05, "loss": 0.3992, "step": 6479 }, { "epoch": 62.83132530120482, "grad_norm": 2.5769870281219482, "learning_rate": 1.854368932038835e-05, "loss": 0.1399, "step": 6480 }, { "epoch": 62.840963855421684, "grad_norm": 4.942470073699951, "learning_rate": 1.853883495145631e-05, "loss": 0.4104, "step": 6481 }, { "epoch": 62.850602409638554, "grad_norm": 4.053674697875977, "learning_rate": 1.8533980582524274e-05, "loss": 0.2177, "step": 6482 }, { "epoch": 62.860240963855425, "grad_norm": 3.4528591632843018, "learning_rate": 1.8529126213592236e-05, "loss": 0.1595, "step": 6483 }, { "epoch": 62.86987951807229, "grad_norm": 13.02497386932373, "learning_rate": 1.8524271844660197e-05, "loss": 0.2545, "step": 6484 }, { "epoch": 62.87951807228916, "grad_norm": 7.862222194671631, "learning_rate": 1.8519417475728158e-05, "loss": 0.2471, "step": 6485 }, { "epoch": 62.88915662650602, "grad_norm": 3.4613001346588135, "learning_rate": 1.851456310679612e-05, "loss": 0.1927, "step": 6486 }, { "epoch": 62.89879518072289, "grad_norm": 4.580051898956299, "learning_rate": 1.850970873786408e-05, "loss": 0.1874, "step": 6487 }, { "epoch": 62.908433734939756, "grad_norm": 7.608817100524902, "learning_rate": 1.850485436893204e-05, "loss": 0.1586, "step": 6488 }, { "epoch": 62.918072289156626, "grad_norm": 5.039779186248779, "learning_rate": 1.85e-05, "loss": 0.2562, "step": 6489 }, { "epoch": 62.9277108433735, "grad_norm": 11.93008804321289, "learning_rate": 1.849514563106796e-05, "loss": 0.2575, "step": 6490 }, { "epoch": 62.93734939759036, "grad_norm": 3.7108216285705566, "learning_rate": 1.849029126213592e-05, "loss": 0.2635, "step": 6491 }, { "epoch": 62.94698795180723, "grad_norm": 6.940544605255127, "learning_rate": 1.8485436893203882e-05, "loss": 0.2172, "step": 6492 }, { "epoch": 62.956626506024094, "grad_norm": 4.711289882659912, "learning_rate": 1.8480582524271844e-05, "loss": 0.2015, "step": 6493 }, { "epoch": 62.966265060240964, "grad_norm": 6.555110931396484, "learning_rate": 1.8475728155339805e-05, "loss": 0.1512, "step": 6494 }, { "epoch": 62.975903614457835, "grad_norm": 10.166234970092773, "learning_rate": 1.847087378640777e-05, "loss": 0.1131, "step": 6495 }, { "epoch": 62.9855421686747, "grad_norm": 2.635965347290039, "learning_rate": 1.846601941747573e-05, "loss": 0.2536, "step": 6496 }, { "epoch": 63.001204819277106, "grad_norm": 3.9641904830932617, "learning_rate": 1.846116504854369e-05, "loss": 0.1478, "step": 6497 }, { "epoch": 63.01084337349398, "grad_norm": 4.437438011169434, "learning_rate": 1.8456310679611653e-05, "loss": 0.2479, "step": 6498 }, { "epoch": 63.02048192771084, "grad_norm": 7.879708290100098, "learning_rate": 1.8451456310679614e-05, "loss": 0.2907, "step": 6499 }, { "epoch": 63.03012048192771, "grad_norm": 5.816905975341797, "learning_rate": 1.8446601941747575e-05, "loss": 0.1626, "step": 6500 }, { "epoch": 63.03975903614458, "grad_norm": 2.6537973880767822, "learning_rate": 1.8441747572815536e-05, "loss": 0.1645, "step": 6501 }, { "epoch": 63.049397590361444, "grad_norm": 7.601563453674316, "learning_rate": 1.8436893203883497e-05, "loss": 0.461, "step": 6502 }, { "epoch": 63.059036144578315, "grad_norm": 3.374466896057129, "learning_rate": 1.8432038834951455e-05, "loss": 0.1422, "step": 6503 }, { "epoch": 63.06867469879518, "grad_norm": 3.3457419872283936, "learning_rate": 1.8427184466019416e-05, "loss": 0.1783, "step": 6504 }, { "epoch": 63.07831325301205, "grad_norm": 3.2361724376678467, "learning_rate": 1.8422330097087377e-05, "loss": 0.2029, "step": 6505 }, { "epoch": 63.08795180722892, "grad_norm": 4.559907913208008, "learning_rate": 1.841747572815534e-05, "loss": 0.3889, "step": 6506 }, { "epoch": 63.09759036144578, "grad_norm": 4.792265892028809, "learning_rate": 1.8412621359223303e-05, "loss": 0.3171, "step": 6507 }, { "epoch": 63.10722891566265, "grad_norm": 4.3607072830200195, "learning_rate": 1.8407766990291264e-05, "loss": 0.1407, "step": 6508 }, { "epoch": 63.116867469879516, "grad_norm": 8.844127655029297, "learning_rate": 1.8402912621359225e-05, "loss": 0.294, "step": 6509 }, { "epoch": 63.126506024096386, "grad_norm": 6.400631904602051, "learning_rate": 1.8398058252427187e-05, "loss": 0.2363, "step": 6510 }, { "epoch": 63.13614457831325, "grad_norm": 4.367488384246826, "learning_rate": 1.8393203883495148e-05, "loss": 0.1319, "step": 6511 }, { "epoch": 63.14578313253012, "grad_norm": 10.325469970703125, "learning_rate": 1.838834951456311e-05, "loss": 0.2247, "step": 6512 }, { "epoch": 63.15542168674699, "grad_norm": 4.731476783752441, "learning_rate": 1.838349514563107e-05, "loss": 0.2703, "step": 6513 }, { "epoch": 63.165060240963854, "grad_norm": 3.796700954437256, "learning_rate": 1.837864077669903e-05, "loss": 0.2404, "step": 6514 }, { "epoch": 63.174698795180724, "grad_norm": 9.867021560668945, "learning_rate": 1.8373786407766992e-05, "loss": 0.202, "step": 6515 }, { "epoch": 63.18433734939759, "grad_norm": 5.569084644317627, "learning_rate": 1.8368932038834953e-05, "loss": 0.2228, "step": 6516 }, { "epoch": 63.19397590361446, "grad_norm": 2.712738037109375, "learning_rate": 1.836407766990291e-05, "loss": 0.1551, "step": 6517 }, { "epoch": 63.20361445783133, "grad_norm": 3.0606963634490967, "learning_rate": 1.8359223300970872e-05, "loss": 0.1771, "step": 6518 }, { "epoch": 63.21325301204819, "grad_norm": 4.576059341430664, "learning_rate": 1.8354368932038833e-05, "loss": 0.3005, "step": 6519 }, { "epoch": 63.22289156626506, "grad_norm": 5.705111503601074, "learning_rate": 1.8349514563106798e-05, "loss": 0.2245, "step": 6520 }, { "epoch": 63.232530120481925, "grad_norm": 7.519847869873047, "learning_rate": 1.834466019417476e-05, "loss": 0.34, "step": 6521 }, { "epoch": 63.242168674698796, "grad_norm": 6.370523929595947, "learning_rate": 1.833980582524272e-05, "loss": 0.402, "step": 6522 }, { "epoch": 63.25180722891566, "grad_norm": 3.563537836074829, "learning_rate": 1.833495145631068e-05, "loss": 0.1268, "step": 6523 }, { "epoch": 63.26144578313253, "grad_norm": 7.18746280670166, "learning_rate": 1.8330097087378643e-05, "loss": 0.2566, "step": 6524 }, { "epoch": 63.2710843373494, "grad_norm": 14.179351806640625, "learning_rate": 1.8325242718446604e-05, "loss": 0.262, "step": 6525 }, { "epoch": 63.28072289156626, "grad_norm": 13.737875938415527, "learning_rate": 1.8320388349514565e-05, "loss": 0.1769, "step": 6526 }, { "epoch": 63.290361445783134, "grad_norm": 6.177418231964111, "learning_rate": 1.8315533980582526e-05, "loss": 0.2829, "step": 6527 }, { "epoch": 63.3, "grad_norm": 5.605952739715576, "learning_rate": 1.8310679611650487e-05, "loss": 0.2543, "step": 6528 }, { "epoch": 63.30963855421687, "grad_norm": 20.676223754882812, "learning_rate": 1.8305825242718448e-05, "loss": 0.2889, "step": 6529 }, { "epoch": 63.31927710843374, "grad_norm": 3.4485960006713867, "learning_rate": 1.830097087378641e-05, "loss": 0.2192, "step": 6530 }, { "epoch": 63.3289156626506, "grad_norm": 8.515679359436035, "learning_rate": 1.8296116504854367e-05, "loss": 0.2872, "step": 6531 }, { "epoch": 63.33855421686747, "grad_norm": 2.3763766288757324, "learning_rate": 1.829126213592233e-05, "loss": 0.1439, "step": 6532 }, { "epoch": 63.348192771084335, "grad_norm": 3.913665771484375, "learning_rate": 1.8286407766990293e-05, "loss": 0.1864, "step": 6533 }, { "epoch": 63.357831325301206, "grad_norm": 11.234436988830566, "learning_rate": 1.8281553398058254e-05, "loss": 0.3922, "step": 6534 }, { "epoch": 63.36746987951807, "grad_norm": 13.021550178527832, "learning_rate": 1.8276699029126215e-05, "loss": 0.2678, "step": 6535 }, { "epoch": 63.37710843373494, "grad_norm": 2.1870436668395996, "learning_rate": 1.8271844660194176e-05, "loss": 0.1051, "step": 6536 }, { "epoch": 63.38674698795181, "grad_norm": 3.8613033294677734, "learning_rate": 1.8266990291262137e-05, "loss": 0.1515, "step": 6537 }, { "epoch": 63.39638554216867, "grad_norm": 2.9000048637390137, "learning_rate": 1.82621359223301e-05, "loss": 0.13, "step": 6538 }, { "epoch": 63.40602409638554, "grad_norm": 9.810993194580078, "learning_rate": 1.825728155339806e-05, "loss": 0.1782, "step": 6539 }, { "epoch": 63.41566265060241, "grad_norm": 2.7478551864624023, "learning_rate": 1.825242718446602e-05, "loss": 0.1741, "step": 6540 }, { "epoch": 63.42530120481928, "grad_norm": 7.414473533630371, "learning_rate": 1.8247572815533982e-05, "loss": 0.3135, "step": 6541 }, { "epoch": 63.43493975903615, "grad_norm": 6.348113536834717, "learning_rate": 1.8242718446601943e-05, "loss": 0.285, "step": 6542 }, { "epoch": 63.44457831325301, "grad_norm": 3.340587854385376, "learning_rate": 1.8237864077669904e-05, "loss": 0.1591, "step": 6543 }, { "epoch": 63.45421686746988, "grad_norm": 10.511194229125977, "learning_rate": 1.8233009708737862e-05, "loss": 0.2395, "step": 6544 }, { "epoch": 63.463855421686745, "grad_norm": 6.29851770401001, "learning_rate": 1.8228155339805827e-05, "loss": 0.3475, "step": 6545 }, { "epoch": 63.473493975903615, "grad_norm": 14.434024810791016, "learning_rate": 1.8223300970873788e-05, "loss": 0.4063, "step": 6546 }, { "epoch": 63.48313253012048, "grad_norm": 4.51621675491333, "learning_rate": 1.821844660194175e-05, "loss": 0.1337, "step": 6547 }, { "epoch": 63.49277108433735, "grad_norm": 2.394969940185547, "learning_rate": 1.821359223300971e-05, "loss": 0.1471, "step": 6548 }, { "epoch": 63.50240963855422, "grad_norm": 3.7967824935913086, "learning_rate": 1.820873786407767e-05, "loss": 0.1376, "step": 6549 }, { "epoch": 63.51204819277108, "grad_norm": 9.995774269104004, "learning_rate": 1.8203883495145632e-05, "loss": 0.3116, "step": 6550 }, { "epoch": 63.52168674698795, "grad_norm": 6.365564346313477, "learning_rate": 1.8199029126213593e-05, "loss": 0.3035, "step": 6551 }, { "epoch": 63.53132530120482, "grad_norm": 3.261061906814575, "learning_rate": 1.8194174757281555e-05, "loss": 0.1843, "step": 6552 }, { "epoch": 63.54096385542169, "grad_norm": 4.364355564117432, "learning_rate": 1.8189320388349516e-05, "loss": 0.2896, "step": 6553 }, { "epoch": 63.55060240963856, "grad_norm": 5.246786117553711, "learning_rate": 1.8184466019417477e-05, "loss": 0.2981, "step": 6554 }, { "epoch": 63.56024096385542, "grad_norm": 5.037435531616211, "learning_rate": 1.8179611650485438e-05, "loss": 0.2345, "step": 6555 }, { "epoch": 63.56987951807229, "grad_norm": 8.334623336791992, "learning_rate": 1.81747572815534e-05, "loss": 0.4045, "step": 6556 }, { "epoch": 63.579518072289154, "grad_norm": 3.8711585998535156, "learning_rate": 1.816990291262136e-05, "loss": 0.1398, "step": 6557 }, { "epoch": 63.589156626506025, "grad_norm": 5.434070110321045, "learning_rate": 1.816504854368932e-05, "loss": 0.2136, "step": 6558 }, { "epoch": 63.59879518072289, "grad_norm": 4.637836456298828, "learning_rate": 1.8160194174757283e-05, "loss": 0.2398, "step": 6559 }, { "epoch": 63.60843373493976, "grad_norm": 3.5660948753356934, "learning_rate": 1.8155339805825244e-05, "loss": 0.2505, "step": 6560 }, { "epoch": 63.61807228915663, "grad_norm": 5.423440933227539, "learning_rate": 1.8150485436893205e-05, "loss": 0.4612, "step": 6561 }, { "epoch": 63.62771084337349, "grad_norm": 3.3755125999450684, "learning_rate": 1.8145631067961166e-05, "loss": 0.1835, "step": 6562 }, { "epoch": 63.63734939759036, "grad_norm": 3.472095251083374, "learning_rate": 1.8140776699029127e-05, "loss": 0.288, "step": 6563 }, { "epoch": 63.646987951807226, "grad_norm": 12.089235305786133, "learning_rate": 1.813592233009709e-05, "loss": 0.2972, "step": 6564 }, { "epoch": 63.6566265060241, "grad_norm": 2.8978288173675537, "learning_rate": 1.813106796116505e-05, "loss": 0.2435, "step": 6565 }, { "epoch": 63.66626506024097, "grad_norm": 20.030738830566406, "learning_rate": 1.812621359223301e-05, "loss": 0.2753, "step": 6566 }, { "epoch": 63.67590361445783, "grad_norm": 6.7112226486206055, "learning_rate": 1.8121359223300972e-05, "loss": 0.2734, "step": 6567 }, { "epoch": 63.6855421686747, "grad_norm": 9.345488548278809, "learning_rate": 1.8116504854368933e-05, "loss": 0.1983, "step": 6568 }, { "epoch": 63.695180722891564, "grad_norm": 3.42960524559021, "learning_rate": 1.8111650485436894e-05, "loss": 0.1686, "step": 6569 }, { "epoch": 63.704819277108435, "grad_norm": 8.439249992370605, "learning_rate": 1.8106796116504855e-05, "loss": 0.3369, "step": 6570 }, { "epoch": 63.7144578313253, "grad_norm": 4.588433742523193, "learning_rate": 1.8101941747572816e-05, "loss": 0.4023, "step": 6571 }, { "epoch": 63.72409638554217, "grad_norm": 7.568880081176758, "learning_rate": 1.8097087378640778e-05, "loss": 0.1351, "step": 6572 }, { "epoch": 63.73373493975904, "grad_norm": 4.265466690063477, "learning_rate": 1.809223300970874e-05, "loss": 0.2749, "step": 6573 }, { "epoch": 63.7433734939759, "grad_norm": 3.078479290008545, "learning_rate": 1.80873786407767e-05, "loss": 0.1647, "step": 6574 }, { "epoch": 63.75301204819277, "grad_norm": 6.995843410491943, "learning_rate": 1.808252427184466e-05, "loss": 0.4371, "step": 6575 }, { "epoch": 63.762650602409636, "grad_norm": 10.023028373718262, "learning_rate": 1.8077669902912622e-05, "loss": 0.2325, "step": 6576 }, { "epoch": 63.772289156626506, "grad_norm": 4.325545787811279, "learning_rate": 1.8072815533980583e-05, "loss": 0.311, "step": 6577 }, { "epoch": 63.78192771084338, "grad_norm": 7.741002559661865, "learning_rate": 1.8067961165048544e-05, "loss": 0.2476, "step": 6578 }, { "epoch": 63.79156626506024, "grad_norm": 7.094123363494873, "learning_rate": 1.8063106796116506e-05, "loss": 0.2206, "step": 6579 }, { "epoch": 63.80120481927711, "grad_norm": 6.5338945388793945, "learning_rate": 1.8058252427184467e-05, "loss": 0.3312, "step": 6580 }, { "epoch": 63.810843373493974, "grad_norm": 5.939232349395752, "learning_rate": 1.8053398058252428e-05, "loss": 0.2946, "step": 6581 }, { "epoch": 63.820481927710844, "grad_norm": 11.806957244873047, "learning_rate": 1.804854368932039e-05, "loss": 0.275, "step": 6582 }, { "epoch": 63.83012048192771, "grad_norm": 35.51750183105469, "learning_rate": 1.804368932038835e-05, "loss": 0.2187, "step": 6583 }, { "epoch": 63.83975903614458, "grad_norm": 5.233889102935791, "learning_rate": 1.803883495145631e-05, "loss": 0.2451, "step": 6584 }, { "epoch": 63.84939759036145, "grad_norm": 3.6838772296905518, "learning_rate": 1.8033980582524272e-05, "loss": 0.2002, "step": 6585 }, { "epoch": 63.85903614457831, "grad_norm": 9.247572898864746, "learning_rate": 1.8029126213592234e-05, "loss": 0.1982, "step": 6586 }, { "epoch": 63.86867469879518, "grad_norm": 4.083924770355225, "learning_rate": 1.8024271844660195e-05, "loss": 0.3627, "step": 6587 }, { "epoch": 63.878313253012045, "grad_norm": 5.148581504821777, "learning_rate": 1.8019417475728156e-05, "loss": 0.1357, "step": 6588 }, { "epoch": 63.887951807228916, "grad_norm": 4.651505470275879, "learning_rate": 1.8014563106796117e-05, "loss": 0.1706, "step": 6589 }, { "epoch": 63.897590361445786, "grad_norm": 19.201780319213867, "learning_rate": 1.8009708737864078e-05, "loss": 0.2622, "step": 6590 }, { "epoch": 63.90722891566265, "grad_norm": 12.06498908996582, "learning_rate": 1.800485436893204e-05, "loss": 0.2038, "step": 6591 }, { "epoch": 63.91686746987952, "grad_norm": 3.98781156539917, "learning_rate": 1.8e-05, "loss": 0.1802, "step": 6592 }, { "epoch": 63.92650602409638, "grad_norm": 6.4831976890563965, "learning_rate": 1.799514563106796e-05, "loss": 0.2847, "step": 6593 }, { "epoch": 63.936144578313254, "grad_norm": 3.906562566757202, "learning_rate": 1.7990291262135923e-05, "loss": 0.2302, "step": 6594 }, { "epoch": 63.94578313253012, "grad_norm": 3.317716598510742, "learning_rate": 1.7985436893203884e-05, "loss": 0.1637, "step": 6595 }, { "epoch": 63.95542168674699, "grad_norm": 4.435973167419434, "learning_rate": 1.798058252427185e-05, "loss": 0.1745, "step": 6596 }, { "epoch": 63.96506024096386, "grad_norm": 3.454219341278076, "learning_rate": 1.7975728155339806e-05, "loss": 0.268, "step": 6597 }, { "epoch": 63.97469879518072, "grad_norm": 4.05440616607666, "learning_rate": 1.7970873786407767e-05, "loss": 0.2252, "step": 6598 }, { "epoch": 63.98433734939759, "grad_norm": 5.040217876434326, "learning_rate": 1.796601941747573e-05, "loss": 0.2498, "step": 6599 }, { "epoch": 63.993975903614455, "grad_norm": 5.405030250549316, "learning_rate": 1.796116504854369e-05, "loss": 0.2699, "step": 6600 }, { "epoch": 64.00963855421686, "grad_norm": 3.1040139198303223, "learning_rate": 1.795631067961165e-05, "loss": 0.1193, "step": 6601 }, { "epoch": 64.01927710843374, "grad_norm": 6.213781356811523, "learning_rate": 1.7951456310679612e-05, "loss": 0.2136, "step": 6602 }, { "epoch": 64.0289156626506, "grad_norm": 3.5212819576263428, "learning_rate": 1.7946601941747573e-05, "loss": 0.0738, "step": 6603 }, { "epoch": 64.03855421686747, "grad_norm": 6.874629497528076, "learning_rate": 1.7941747572815534e-05, "loss": 0.2328, "step": 6604 }, { "epoch": 64.04819277108433, "grad_norm": 7.983249187469482, "learning_rate": 1.7936893203883495e-05, "loss": 0.4068, "step": 6605 }, { "epoch": 64.05783132530121, "grad_norm": 7.6977715492248535, "learning_rate": 1.7932038834951456e-05, "loss": 0.4066, "step": 6606 }, { "epoch": 64.06746987951807, "grad_norm": 3.4671871662139893, "learning_rate": 1.7927184466019418e-05, "loss": 0.3265, "step": 6607 }, { "epoch": 64.07710843373494, "grad_norm": 4.750828742980957, "learning_rate": 1.7922330097087382e-05, "loss": 0.2841, "step": 6608 }, { "epoch": 64.08674698795181, "grad_norm": 4.727184295654297, "learning_rate": 1.7917475728155343e-05, "loss": 0.355, "step": 6609 }, { "epoch": 64.09638554216868, "grad_norm": 4.638000965118408, "learning_rate": 1.79126213592233e-05, "loss": 0.2459, "step": 6610 }, { "epoch": 64.10602409638554, "grad_norm": 4.341406345367432, "learning_rate": 1.7907766990291262e-05, "loss": 0.2227, "step": 6611 }, { "epoch": 64.1156626506024, "grad_norm": 5.135220050811768, "learning_rate": 1.7902912621359223e-05, "loss": 0.2161, "step": 6612 }, { "epoch": 64.12530120481928, "grad_norm": 4.627102375030518, "learning_rate": 1.7898058252427185e-05, "loss": 0.2674, "step": 6613 }, { "epoch": 64.13493975903614, "grad_norm": 2.575749158859253, "learning_rate": 1.7893203883495146e-05, "loss": 0.2162, "step": 6614 }, { "epoch": 64.144578313253, "grad_norm": 3.9001858234405518, "learning_rate": 1.7888349514563107e-05, "loss": 0.2241, "step": 6615 }, { "epoch": 64.15421686746988, "grad_norm": 4.922489166259766, "learning_rate": 1.7883495145631068e-05, "loss": 0.4546, "step": 6616 }, { "epoch": 64.16385542168675, "grad_norm": 3.6198770999908447, "learning_rate": 1.787864077669903e-05, "loss": 0.1889, "step": 6617 }, { "epoch": 64.17349397590361, "grad_norm": 3.498824119567871, "learning_rate": 1.787378640776699e-05, "loss": 0.173, "step": 6618 }, { "epoch": 64.18313253012049, "grad_norm": 7.092662811279297, "learning_rate": 1.786893203883495e-05, "loss": 0.4111, "step": 6619 }, { "epoch": 64.19277108433735, "grad_norm": 9.273859977722168, "learning_rate": 1.7864077669902913e-05, "loss": 0.2174, "step": 6620 }, { "epoch": 64.20240963855422, "grad_norm": 21.296463012695312, "learning_rate": 1.7859223300970877e-05, "loss": 0.3465, "step": 6621 }, { "epoch": 64.21204819277108, "grad_norm": 11.02074909210205, "learning_rate": 1.7854368932038838e-05, "loss": 0.5228, "step": 6622 }, { "epoch": 64.22168674698796, "grad_norm": 6.567591190338135, "learning_rate": 1.78495145631068e-05, "loss": 0.1226, "step": 6623 }, { "epoch": 64.23132530120482, "grad_norm": 6.904694080352783, "learning_rate": 1.7844660194174757e-05, "loss": 0.325, "step": 6624 }, { "epoch": 64.24096385542168, "grad_norm": 9.972241401672363, "learning_rate": 1.7839805825242718e-05, "loss": 0.2923, "step": 6625 }, { "epoch": 64.25060240963856, "grad_norm": 5.634770393371582, "learning_rate": 1.783495145631068e-05, "loss": 0.5842, "step": 6626 }, { "epoch": 64.26024096385542, "grad_norm": 7.103586673736572, "learning_rate": 1.783009708737864e-05, "loss": 0.4644, "step": 6627 }, { "epoch": 64.26987951807229, "grad_norm": 5.794072151184082, "learning_rate": 1.7825242718446602e-05, "loss": 0.2236, "step": 6628 }, { "epoch": 64.27951807228915, "grad_norm": 6.070021629333496, "learning_rate": 1.7820388349514563e-05, "loss": 0.204, "step": 6629 }, { "epoch": 64.28915662650603, "grad_norm": 10.170531272888184, "learning_rate": 1.7815533980582524e-05, "loss": 0.3684, "step": 6630 }, { "epoch": 64.29879518072289, "grad_norm": 4.415534019470215, "learning_rate": 1.7810679611650485e-05, "loss": 0.1483, "step": 6631 }, { "epoch": 64.30843373493975, "grad_norm": 3.16381573677063, "learning_rate": 1.7805825242718446e-05, "loss": 0.1095, "step": 6632 }, { "epoch": 64.31807228915663, "grad_norm": 3.899556875228882, "learning_rate": 1.7800970873786407e-05, "loss": 0.2234, "step": 6633 }, { "epoch": 64.3277108433735, "grad_norm": 4.873519420623779, "learning_rate": 1.7796116504854372e-05, "loss": 0.4219, "step": 6634 }, { "epoch": 64.33734939759036, "grad_norm": 8.958643913269043, "learning_rate": 1.7791262135922333e-05, "loss": 0.3782, "step": 6635 }, { "epoch": 64.34698795180722, "grad_norm": 3.765855312347412, "learning_rate": 1.7786407766990294e-05, "loss": 0.1532, "step": 6636 }, { "epoch": 64.3566265060241, "grad_norm": 3.7134389877319336, "learning_rate": 1.7781553398058255e-05, "loss": 0.275, "step": 6637 }, { "epoch": 64.36626506024096, "grad_norm": 2.1669578552246094, "learning_rate": 1.7776699029126213e-05, "loss": 0.0779, "step": 6638 }, { "epoch": 64.37590361445783, "grad_norm": 7.353455066680908, "learning_rate": 1.7771844660194174e-05, "loss": 0.4927, "step": 6639 }, { "epoch": 64.3855421686747, "grad_norm": 4.303150653839111, "learning_rate": 1.7766990291262135e-05, "loss": 0.3278, "step": 6640 }, { "epoch": 64.39518072289157, "grad_norm": 8.668716430664062, "learning_rate": 1.7762135922330097e-05, "loss": 0.305, "step": 6641 }, { "epoch": 64.40481927710843, "grad_norm": 5.837489128112793, "learning_rate": 1.7757281553398058e-05, "loss": 0.2579, "step": 6642 }, { "epoch": 64.41445783132531, "grad_norm": 7.148987770080566, "learning_rate": 1.775242718446602e-05, "loss": 0.3272, "step": 6643 }, { "epoch": 64.42409638554217, "grad_norm": 5.251522541046143, "learning_rate": 1.774757281553398e-05, "loss": 0.196, "step": 6644 }, { "epoch": 64.43373493975903, "grad_norm": 6.0585808753967285, "learning_rate": 1.774271844660194e-05, "loss": 0.3163, "step": 6645 }, { "epoch": 64.4433734939759, "grad_norm": 4.906554222106934, "learning_rate": 1.7737864077669906e-05, "loss": 0.204, "step": 6646 }, { "epoch": 64.45301204819278, "grad_norm": 5.8196234703063965, "learning_rate": 1.7733009708737867e-05, "loss": 0.3192, "step": 6647 }, { "epoch": 64.46265060240964, "grad_norm": 5.195136070251465, "learning_rate": 1.7728155339805828e-05, "loss": 0.164, "step": 6648 }, { "epoch": 64.4722891566265, "grad_norm": 6.472867965698242, "learning_rate": 1.772330097087379e-05, "loss": 0.2131, "step": 6649 }, { "epoch": 64.48192771084338, "grad_norm": 3.264495611190796, "learning_rate": 1.771844660194175e-05, "loss": 0.1599, "step": 6650 }, { "epoch": 64.49156626506024, "grad_norm": 11.559306144714355, "learning_rate": 1.771359223300971e-05, "loss": 0.5372, "step": 6651 }, { "epoch": 64.5012048192771, "grad_norm": 9.981470108032227, "learning_rate": 1.770873786407767e-05, "loss": 0.3832, "step": 6652 }, { "epoch": 64.51084337349397, "grad_norm": 4.005462169647217, "learning_rate": 1.770388349514563e-05, "loss": 0.2079, "step": 6653 }, { "epoch": 64.52048192771085, "grad_norm": 7.175344467163086, "learning_rate": 1.769902912621359e-05, "loss": 0.2372, "step": 6654 }, { "epoch": 64.53012048192771, "grad_norm": 4.621581554412842, "learning_rate": 1.7694174757281553e-05, "loss": 0.1481, "step": 6655 }, { "epoch": 64.53975903614457, "grad_norm": 2.619629383087158, "learning_rate": 1.7689320388349514e-05, "loss": 0.1693, "step": 6656 }, { "epoch": 64.54939759036145, "grad_norm": 4.448292255401611, "learning_rate": 1.7684466019417475e-05, "loss": 0.362, "step": 6657 }, { "epoch": 64.55903614457831, "grad_norm": 4.210350036621094, "learning_rate": 1.7679611650485436e-05, "loss": 0.2097, "step": 6658 }, { "epoch": 64.56867469879518, "grad_norm": 4.034517288208008, "learning_rate": 1.76747572815534e-05, "loss": 0.1876, "step": 6659 }, { "epoch": 64.57831325301204, "grad_norm": 5.5890679359436035, "learning_rate": 1.7669902912621362e-05, "loss": 0.2037, "step": 6660 }, { "epoch": 64.58795180722892, "grad_norm": 4.225044250488281, "learning_rate": 1.7665048543689323e-05, "loss": 0.1712, "step": 6661 }, { "epoch": 64.59759036144578, "grad_norm": 3.878208875656128, "learning_rate": 1.7660194174757284e-05, "loss": 0.1649, "step": 6662 }, { "epoch": 64.60722891566265, "grad_norm": 5.4109883308410645, "learning_rate": 1.7655339805825245e-05, "loss": 0.391, "step": 6663 }, { "epoch": 64.61686746987952, "grad_norm": 4.710358619689941, "learning_rate": 1.7650485436893206e-05, "loss": 0.3593, "step": 6664 }, { "epoch": 64.62650602409639, "grad_norm": 4.055832862854004, "learning_rate": 1.7645631067961167e-05, "loss": 0.1238, "step": 6665 }, { "epoch": 64.63614457831325, "grad_norm": 5.17829704284668, "learning_rate": 1.7640776699029125e-05, "loss": 0.3474, "step": 6666 }, { "epoch": 64.64578313253013, "grad_norm": 3.4804606437683105, "learning_rate": 1.7635922330097086e-05, "loss": 0.1433, "step": 6667 }, { "epoch": 64.65542168674699, "grad_norm": 6.772469520568848, "learning_rate": 1.7631067961165048e-05, "loss": 0.2166, "step": 6668 }, { "epoch": 64.66506024096385, "grad_norm": 4.238104820251465, "learning_rate": 1.762621359223301e-05, "loss": 0.2383, "step": 6669 }, { "epoch": 64.67469879518072, "grad_norm": 6.238546371459961, "learning_rate": 1.762135922330097e-05, "loss": 0.2655, "step": 6670 }, { "epoch": 64.6843373493976, "grad_norm": 3.2269020080566406, "learning_rate": 1.761650485436893e-05, "loss": 0.1631, "step": 6671 }, { "epoch": 64.69397590361446, "grad_norm": 6.978241443634033, "learning_rate": 1.7611650485436896e-05, "loss": 0.2013, "step": 6672 }, { "epoch": 64.70361445783132, "grad_norm": 7.730045795440674, "learning_rate": 1.7606796116504857e-05, "loss": 0.2838, "step": 6673 }, { "epoch": 64.7132530120482, "grad_norm": 7.50343656539917, "learning_rate": 1.7601941747572818e-05, "loss": 0.2941, "step": 6674 }, { "epoch": 64.72289156626506, "grad_norm": 6.376489639282227, "learning_rate": 1.759708737864078e-05, "loss": 0.2216, "step": 6675 }, { "epoch": 64.73253012048193, "grad_norm": 7.145712375640869, "learning_rate": 1.759223300970874e-05, "loss": 0.4109, "step": 6676 }, { "epoch": 64.74216867469879, "grad_norm": 3.1627633571624756, "learning_rate": 1.75873786407767e-05, "loss": 0.1712, "step": 6677 }, { "epoch": 64.75180722891567, "grad_norm": 2.660447359085083, "learning_rate": 1.7582524271844662e-05, "loss": 0.0665, "step": 6678 }, { "epoch": 64.76144578313253, "grad_norm": 2.205557107925415, "learning_rate": 1.7577669902912624e-05, "loss": 0.1016, "step": 6679 }, { "epoch": 64.7710843373494, "grad_norm": 5.044806003570557, "learning_rate": 1.757281553398058e-05, "loss": 0.2303, "step": 6680 }, { "epoch": 64.78072289156627, "grad_norm": 5.844342231750488, "learning_rate": 1.7567961165048542e-05, "loss": 0.3003, "step": 6681 }, { "epoch": 64.79036144578313, "grad_norm": 6.6520185470581055, "learning_rate": 1.7563106796116504e-05, "loss": 0.2715, "step": 6682 }, { "epoch": 64.8, "grad_norm": 4.153007984161377, "learning_rate": 1.7558252427184465e-05, "loss": 0.2193, "step": 6683 }, { "epoch": 64.80963855421686, "grad_norm": 6.0956010818481445, "learning_rate": 1.755339805825243e-05, "loss": 0.3135, "step": 6684 }, { "epoch": 64.81927710843374, "grad_norm": 8.103360176086426, "learning_rate": 1.754854368932039e-05, "loss": 0.1542, "step": 6685 }, { "epoch": 64.8289156626506, "grad_norm": 3.5576257705688477, "learning_rate": 1.754368932038835e-05, "loss": 0.1587, "step": 6686 }, { "epoch": 64.83855421686746, "grad_norm": 9.018397331237793, "learning_rate": 1.7538834951456313e-05, "loss": 0.2414, "step": 6687 }, { "epoch": 64.84819277108434, "grad_norm": 6.8738837242126465, "learning_rate": 1.7533980582524274e-05, "loss": 0.3414, "step": 6688 }, { "epoch": 64.8578313253012, "grad_norm": 5.637515544891357, "learning_rate": 1.7529126213592235e-05, "loss": 0.209, "step": 6689 }, { "epoch": 64.86746987951807, "grad_norm": 3.1336774826049805, "learning_rate": 1.7524271844660196e-05, "loss": 0.1222, "step": 6690 }, { "epoch": 64.87710843373495, "grad_norm": 6.72017765045166, "learning_rate": 1.7519417475728157e-05, "loss": 0.422, "step": 6691 }, { "epoch": 64.88674698795181, "grad_norm": 5.6899847984313965, "learning_rate": 1.751456310679612e-05, "loss": 0.241, "step": 6692 }, { "epoch": 64.89638554216867, "grad_norm": 5.322423934936523, "learning_rate": 1.7509708737864076e-05, "loss": 0.1869, "step": 6693 }, { "epoch": 64.90602409638554, "grad_norm": 7.348280429840088, "learning_rate": 1.7504854368932037e-05, "loss": 0.3996, "step": 6694 }, { "epoch": 64.91566265060241, "grad_norm": 2.5641517639160156, "learning_rate": 1.75e-05, "loss": 0.2114, "step": 6695 }, { "epoch": 64.92530120481928, "grad_norm": 8.323660850524902, "learning_rate": 1.749514563106796e-05, "loss": 0.143, "step": 6696 }, { "epoch": 64.93493975903614, "grad_norm": 3.367830991744995, "learning_rate": 1.7490291262135924e-05, "loss": 0.2395, "step": 6697 }, { "epoch": 64.94457831325302, "grad_norm": 2.389838457107544, "learning_rate": 1.7485436893203885e-05, "loss": 0.0548, "step": 6698 }, { "epoch": 64.95421686746988, "grad_norm": 4.4320549964904785, "learning_rate": 1.7480582524271846e-05, "loss": 0.1374, "step": 6699 }, { "epoch": 64.96385542168674, "grad_norm": 6.377394676208496, "learning_rate": 1.7475728155339808e-05, "loss": 0.2761, "step": 6700 }, { "epoch": 64.97349397590361, "grad_norm": 5.758779048919678, "learning_rate": 1.747087378640777e-05, "loss": 0.2473, "step": 6701 }, { "epoch": 64.98313253012049, "grad_norm": 4.97775411605835, "learning_rate": 1.746601941747573e-05, "loss": 0.2053, "step": 6702 }, { "epoch": 64.99277108433735, "grad_norm": 6.826815605163574, "learning_rate": 1.746116504854369e-05, "loss": 0.4521, "step": 6703 }, { "epoch": 65.00843373493976, "grad_norm": 24.0233097076416, "learning_rate": 1.7456310679611652e-05, "loss": 0.4219, "step": 6704 }, { "epoch": 65.01807228915662, "grad_norm": 0.8198635578155518, "learning_rate": 1.7451456310679613e-05, "loss": 0.3261, "step": 6705 }, { "epoch": 65.0277108433735, "grad_norm": 4.495153427124023, "learning_rate": 1.7446601941747574e-05, "loss": 0.2268, "step": 6706 }, { "epoch": 65.03734939759036, "grad_norm": 3.8858819007873535, "learning_rate": 1.7441747572815532e-05, "loss": 0.3081, "step": 6707 }, { "epoch": 65.04698795180722, "grad_norm": 15.994850158691406, "learning_rate": 1.7436893203883493e-05, "loss": 0.1928, "step": 6708 }, { "epoch": 65.0566265060241, "grad_norm": 2.18764066696167, "learning_rate": 1.7432038834951458e-05, "loss": 0.2062, "step": 6709 }, { "epoch": 65.06626506024097, "grad_norm": 21.080032348632812, "learning_rate": 1.742718446601942e-05, "loss": 0.5019, "step": 6710 }, { "epoch": 65.07590361445783, "grad_norm": 6.211146831512451, "learning_rate": 1.742233009708738e-05, "loss": 0.2281, "step": 6711 }, { "epoch": 65.08554216867469, "grad_norm": 18.258615493774414, "learning_rate": 1.741747572815534e-05, "loss": 0.3068, "step": 6712 }, { "epoch": 65.09518072289157, "grad_norm": 6.398983001708984, "learning_rate": 1.7412621359223302e-05, "loss": 0.202, "step": 6713 }, { "epoch": 65.10481927710843, "grad_norm": 15.416221618652344, "learning_rate": 1.7407766990291264e-05, "loss": 0.1908, "step": 6714 }, { "epoch": 65.1144578313253, "grad_norm": 1.8156721591949463, "learning_rate": 1.7402912621359225e-05, "loss": 0.1404, "step": 6715 }, { "epoch": 65.12409638554217, "grad_norm": 14.699630737304688, "learning_rate": 1.7398058252427186e-05, "loss": 0.2713, "step": 6716 }, { "epoch": 65.13373493975904, "grad_norm": 7.532877445220947, "learning_rate": 1.7393203883495147e-05, "loss": 0.2597, "step": 6717 }, { "epoch": 65.1433734939759, "grad_norm": 7.4329633712768555, "learning_rate": 1.7388349514563108e-05, "loss": 0.1187, "step": 6718 }, { "epoch": 65.15301204819278, "grad_norm": 5.530175685882568, "learning_rate": 1.738349514563107e-05, "loss": 0.1858, "step": 6719 }, { "epoch": 65.16265060240964, "grad_norm": 26.46933937072754, "learning_rate": 1.737864077669903e-05, "loss": 0.207, "step": 6720 }, { "epoch": 65.1722891566265, "grad_norm": 14.36626148223877, "learning_rate": 1.7373786407766988e-05, "loss": 0.3397, "step": 6721 }, { "epoch": 65.18192771084337, "grad_norm": 5.602409362792969, "learning_rate": 1.7368932038834953e-05, "loss": 0.0917, "step": 6722 }, { "epoch": 65.19156626506025, "grad_norm": 4.788156032562256, "learning_rate": 1.7364077669902914e-05, "loss": 0.1432, "step": 6723 }, { "epoch": 65.20120481927711, "grad_norm": 5.2705841064453125, "learning_rate": 1.7359223300970875e-05, "loss": 0.2319, "step": 6724 }, { "epoch": 65.21084337349397, "grad_norm": 9.53313159942627, "learning_rate": 1.7354368932038836e-05, "loss": 0.2387, "step": 6725 }, { "epoch": 65.22048192771085, "grad_norm": 26.383222579956055, "learning_rate": 1.7349514563106797e-05, "loss": 0.42, "step": 6726 }, { "epoch": 65.23012048192771, "grad_norm": 8.205617904663086, "learning_rate": 1.734466019417476e-05, "loss": 0.2339, "step": 6727 }, { "epoch": 65.23975903614458, "grad_norm": 9.374670028686523, "learning_rate": 1.733980582524272e-05, "loss": 0.1525, "step": 6728 }, { "epoch": 65.24939759036144, "grad_norm": 5.1050896644592285, "learning_rate": 1.733495145631068e-05, "loss": 0.2422, "step": 6729 }, { "epoch": 65.25903614457832, "grad_norm": 3.035731554031372, "learning_rate": 1.7330097087378642e-05, "loss": 0.3071, "step": 6730 }, { "epoch": 65.26867469879518, "grad_norm": 9.336048126220703, "learning_rate": 1.7325242718446603e-05, "loss": 0.1769, "step": 6731 }, { "epoch": 65.27831325301204, "grad_norm": 27.10284423828125, "learning_rate": 1.7320388349514564e-05, "loss": 0.4141, "step": 6732 }, { "epoch": 65.28795180722892, "grad_norm": 15.795821189880371, "learning_rate": 1.7315533980582525e-05, "loss": 0.3533, "step": 6733 }, { "epoch": 65.29759036144578, "grad_norm": 8.26356029510498, "learning_rate": 1.7310679611650487e-05, "loss": 0.2375, "step": 6734 }, { "epoch": 65.30722891566265, "grad_norm": 5.578372001647949, "learning_rate": 1.7305825242718448e-05, "loss": 0.3058, "step": 6735 }, { "epoch": 65.31686746987951, "grad_norm": 4.020269393920898, "learning_rate": 1.730097087378641e-05, "loss": 0.1687, "step": 6736 }, { "epoch": 65.32650602409639, "grad_norm": 14.70491886138916, "learning_rate": 1.729611650485437e-05, "loss": 0.2161, "step": 6737 }, { "epoch": 65.33614457831325, "grad_norm": 2.1751372814178467, "learning_rate": 1.729126213592233e-05, "loss": 0.2646, "step": 6738 }, { "epoch": 65.34578313253012, "grad_norm": 25.490535736083984, "learning_rate": 1.7286407766990292e-05, "loss": 0.3495, "step": 6739 }, { "epoch": 65.355421686747, "grad_norm": 5.001345157623291, "learning_rate": 1.7281553398058253e-05, "loss": 0.1278, "step": 6740 }, { "epoch": 65.36506024096386, "grad_norm": 3.8375332355499268, "learning_rate": 1.7276699029126215e-05, "loss": 0.4254, "step": 6741 }, { "epoch": 65.37469879518072, "grad_norm": 3.7393288612365723, "learning_rate": 1.7271844660194176e-05, "loss": 0.2011, "step": 6742 }, { "epoch": 65.38433734939758, "grad_norm": 17.513837814331055, "learning_rate": 1.7266990291262137e-05, "loss": 0.1709, "step": 6743 }, { "epoch": 65.39397590361446, "grad_norm": 2.354762077331543, "learning_rate": 1.7262135922330098e-05, "loss": 0.0919, "step": 6744 }, { "epoch": 65.40361445783132, "grad_norm": 8.821565628051758, "learning_rate": 1.725728155339806e-05, "loss": 0.1772, "step": 6745 }, { "epoch": 65.41325301204819, "grad_norm": 12.514342308044434, "learning_rate": 1.725242718446602e-05, "loss": 0.1879, "step": 6746 }, { "epoch": 65.42289156626506, "grad_norm": 20.004087448120117, "learning_rate": 1.724757281553398e-05, "loss": 0.2328, "step": 6747 }, { "epoch": 65.43253012048193, "grad_norm": 11.171804428100586, "learning_rate": 1.7242718446601943e-05, "loss": 0.2608, "step": 6748 }, { "epoch": 65.44216867469879, "grad_norm": 19.252952575683594, "learning_rate": 1.7237864077669904e-05, "loss": 0.4075, "step": 6749 }, { "epoch": 65.45180722891567, "grad_norm": 5.018495082855225, "learning_rate": 1.7233009708737865e-05, "loss": 0.2974, "step": 6750 }, { "epoch": 65.46144578313253, "grad_norm": 1.1451659202575684, "learning_rate": 1.7228155339805826e-05, "loss": 0.1076, "step": 6751 }, { "epoch": 65.4710843373494, "grad_norm": 6.99127721786499, "learning_rate": 1.7223300970873787e-05, "loss": 0.3766, "step": 6752 }, { "epoch": 65.48072289156626, "grad_norm": 1.8086436986923218, "learning_rate": 1.721844660194175e-05, "loss": 0.1286, "step": 6753 }, { "epoch": 65.49036144578314, "grad_norm": 11.698962211608887, "learning_rate": 1.721359223300971e-05, "loss": 0.2251, "step": 6754 }, { "epoch": 65.5, "grad_norm": 14.084790229797363, "learning_rate": 1.720873786407767e-05, "loss": 0.236, "step": 6755 }, { "epoch": 65.50963855421686, "grad_norm": 12.010801315307617, "learning_rate": 1.7203883495145632e-05, "loss": 0.3856, "step": 6756 }, { "epoch": 65.51927710843374, "grad_norm": 7.037867546081543, "learning_rate": 1.7199029126213593e-05, "loss": 0.1371, "step": 6757 }, { "epoch": 65.5289156626506, "grad_norm": 5.10107421875, "learning_rate": 1.7194174757281554e-05, "loss": 0.1914, "step": 6758 }, { "epoch": 65.53855421686747, "grad_norm": 4.802816390991211, "learning_rate": 1.7189320388349515e-05, "loss": 0.2544, "step": 6759 }, { "epoch": 65.54819277108433, "grad_norm": 9.995659828186035, "learning_rate": 1.7184466019417476e-05, "loss": 0.2267, "step": 6760 }, { "epoch": 65.55783132530121, "grad_norm": 9.054237365722656, "learning_rate": 1.7179611650485437e-05, "loss": 0.2906, "step": 6761 }, { "epoch": 65.56746987951807, "grad_norm": 3.9006147384643555, "learning_rate": 1.71747572815534e-05, "loss": 0.2849, "step": 6762 }, { "epoch": 65.57710843373494, "grad_norm": 2.6890952587127686, "learning_rate": 1.716990291262136e-05, "loss": 0.2306, "step": 6763 }, { "epoch": 65.58674698795181, "grad_norm": 6.466459274291992, "learning_rate": 1.716504854368932e-05, "loss": 0.3025, "step": 6764 }, { "epoch": 65.59638554216868, "grad_norm": 5.264733791351318, "learning_rate": 1.7160194174757282e-05, "loss": 0.4366, "step": 6765 }, { "epoch": 65.60602409638554, "grad_norm": 11.377941131591797, "learning_rate": 1.7155339805825243e-05, "loss": 0.1992, "step": 6766 }, { "epoch": 65.61566265060242, "grad_norm": 9.877355575561523, "learning_rate": 1.7150485436893204e-05, "loss": 0.3164, "step": 6767 }, { "epoch": 65.62530120481928, "grad_norm": 1.1379307508468628, "learning_rate": 1.7145631067961165e-05, "loss": 0.072, "step": 6768 }, { "epoch": 65.63493975903614, "grad_norm": 2.676072359085083, "learning_rate": 1.7140776699029127e-05, "loss": 0.2466, "step": 6769 }, { "epoch": 65.644578313253, "grad_norm": 1.9918407201766968, "learning_rate": 1.7135922330097088e-05, "loss": 0.2173, "step": 6770 }, { "epoch": 65.65421686746988, "grad_norm": 4.570426940917969, "learning_rate": 1.713106796116505e-05, "loss": 0.1787, "step": 6771 }, { "epoch": 65.66385542168675, "grad_norm": 13.799654960632324, "learning_rate": 1.7126213592233013e-05, "loss": 0.2157, "step": 6772 }, { "epoch": 65.67349397590361, "grad_norm": 2.99998140335083, "learning_rate": 1.712135922330097e-05, "loss": 0.1832, "step": 6773 }, { "epoch": 65.68313253012049, "grad_norm": 4.4591450691223145, "learning_rate": 1.7116504854368932e-05, "loss": 0.1804, "step": 6774 }, { "epoch": 65.69277108433735, "grad_norm": 10.66292953491211, "learning_rate": 1.7111650485436894e-05, "loss": 0.3628, "step": 6775 }, { "epoch": 65.70240963855422, "grad_norm": 14.877535820007324, "learning_rate": 1.7106796116504855e-05, "loss": 0.3299, "step": 6776 }, { "epoch": 65.71204819277108, "grad_norm": 12.072694778442383, "learning_rate": 1.7101941747572816e-05, "loss": 0.2662, "step": 6777 }, { "epoch": 65.72168674698796, "grad_norm": 4.072932720184326, "learning_rate": 1.7097087378640777e-05, "loss": 0.2096, "step": 6778 }, { "epoch": 65.73132530120482, "grad_norm": 4.2290496826171875, "learning_rate": 1.7092233009708738e-05, "loss": 0.2414, "step": 6779 }, { "epoch": 65.74096385542168, "grad_norm": 5.3215718269348145, "learning_rate": 1.70873786407767e-05, "loss": 0.2588, "step": 6780 }, { "epoch": 65.75060240963856, "grad_norm": 3.861030340194702, "learning_rate": 1.708252427184466e-05, "loss": 0.1554, "step": 6781 }, { "epoch": 65.76024096385542, "grad_norm": 3.1878652572631836, "learning_rate": 1.707766990291262e-05, "loss": 0.2437, "step": 6782 }, { "epoch": 65.76987951807229, "grad_norm": 7.133805751800537, "learning_rate": 1.7072815533980583e-05, "loss": 0.3553, "step": 6783 }, { "epoch": 65.77951807228915, "grad_norm": 12.634856224060059, "learning_rate": 1.7067961165048544e-05, "loss": 0.1251, "step": 6784 }, { "epoch": 65.78915662650603, "grad_norm": 19.642635345458984, "learning_rate": 1.706310679611651e-05, "loss": 0.1957, "step": 6785 }, { "epoch": 65.79879518072289, "grad_norm": 0.7945847511291504, "learning_rate": 1.705825242718447e-05, "loss": 0.151, "step": 6786 }, { "epoch": 65.80843373493975, "grad_norm": 2.964045286178589, "learning_rate": 1.7053398058252427e-05, "loss": 0.2404, "step": 6787 }, { "epoch": 65.81807228915663, "grad_norm": 3.2627110481262207, "learning_rate": 1.704854368932039e-05, "loss": 0.1771, "step": 6788 }, { "epoch": 65.8277108433735, "grad_norm": 7.818816661834717, "learning_rate": 1.704368932038835e-05, "loss": 0.0807, "step": 6789 }, { "epoch": 65.83734939759036, "grad_norm": 2.2370879650115967, "learning_rate": 1.703883495145631e-05, "loss": 0.1271, "step": 6790 }, { "epoch": 65.84698795180722, "grad_norm": 1.7445378303527832, "learning_rate": 1.7033980582524272e-05, "loss": 0.27, "step": 6791 }, { "epoch": 65.8566265060241, "grad_norm": 12.875688552856445, "learning_rate": 1.7029126213592233e-05, "loss": 0.1848, "step": 6792 }, { "epoch": 65.86626506024096, "grad_norm": 2.1216979026794434, "learning_rate": 1.7024271844660194e-05, "loss": 0.3044, "step": 6793 }, { "epoch": 65.87590361445783, "grad_norm": 3.5114150047302246, "learning_rate": 1.7019417475728155e-05, "loss": 0.2199, "step": 6794 }, { "epoch": 65.8855421686747, "grad_norm": 3.9687910079956055, "learning_rate": 1.7014563106796116e-05, "loss": 0.3126, "step": 6795 }, { "epoch": 65.89518072289157, "grad_norm": 4.261148452758789, "learning_rate": 1.7009708737864078e-05, "loss": 0.2619, "step": 6796 }, { "epoch": 65.90481927710843, "grad_norm": 2.4165616035461426, "learning_rate": 1.700485436893204e-05, "loss": 0.2913, "step": 6797 }, { "epoch": 65.91445783132531, "grad_norm": 20.421262741088867, "learning_rate": 1.7000000000000003e-05, "loss": 0.219, "step": 6798 }, { "epoch": 65.92409638554217, "grad_norm": 10.818621635437012, "learning_rate": 1.6995145631067964e-05, "loss": 0.3818, "step": 6799 }, { "epoch": 65.93373493975903, "grad_norm": 11.982765197753906, "learning_rate": 1.6990291262135926e-05, "loss": 0.0955, "step": 6800 }, { "epoch": 65.9433734939759, "grad_norm": 6.031546115875244, "learning_rate": 1.6985436893203883e-05, "loss": 0.1708, "step": 6801 }, { "epoch": 65.95301204819278, "grad_norm": 12.288686752319336, "learning_rate": 1.6980582524271844e-05, "loss": 0.1056, "step": 6802 }, { "epoch": 65.96265060240964, "grad_norm": 1.4103496074676514, "learning_rate": 1.6975728155339806e-05, "loss": 0.1738, "step": 6803 }, { "epoch": 65.9722891566265, "grad_norm": 8.25393009185791, "learning_rate": 1.6970873786407767e-05, "loss": 0.346, "step": 6804 }, { "epoch": 65.98192771084338, "grad_norm": 3.9085681438446045, "learning_rate": 1.6966019417475728e-05, "loss": 0.1408, "step": 6805 }, { "epoch": 65.99156626506024, "grad_norm": 3.5493757724761963, "learning_rate": 1.696116504854369e-05, "loss": 0.1661, "step": 6806 }, { "epoch": 66.00722891566265, "grad_norm": 2.492319107055664, "learning_rate": 1.695631067961165e-05, "loss": 0.1783, "step": 6807 }, { "epoch": 66.01686746987951, "grad_norm": 9.426575660705566, "learning_rate": 1.695145631067961e-05, "loss": 0.2297, "step": 6808 }, { "epoch": 66.02650602409639, "grad_norm": 22.397653579711914, "learning_rate": 1.6946601941747572e-05, "loss": 0.1547, "step": 6809 }, { "epoch": 66.03614457831326, "grad_norm": 4.967355728149414, "learning_rate": 1.6941747572815537e-05, "loss": 0.1047, "step": 6810 }, { "epoch": 66.04578313253012, "grad_norm": 2.600862979888916, "learning_rate": 1.6936893203883498e-05, "loss": 0.4288, "step": 6811 }, { "epoch": 66.05542168674698, "grad_norm": 35.87109375, "learning_rate": 1.693203883495146e-05, "loss": 0.3122, "step": 6812 }, { "epoch": 66.06506024096386, "grad_norm": 3.7666244506835938, "learning_rate": 1.692718446601942e-05, "loss": 0.2096, "step": 6813 }, { "epoch": 66.07469879518072, "grad_norm": 3.531080961227417, "learning_rate": 1.692233009708738e-05, "loss": 0.129, "step": 6814 }, { "epoch": 66.08433734939759, "grad_norm": 2.5521504878997803, "learning_rate": 1.691747572815534e-05, "loss": 0.12, "step": 6815 }, { "epoch": 66.09397590361446, "grad_norm": 7.342280864715576, "learning_rate": 1.69126213592233e-05, "loss": 0.2111, "step": 6816 }, { "epoch": 66.10361445783133, "grad_norm": 3.203352928161621, "learning_rate": 1.690776699029126e-05, "loss": 0.1429, "step": 6817 }, { "epoch": 66.11325301204819, "grad_norm": 14.903541564941406, "learning_rate": 1.6902912621359223e-05, "loss": 0.3507, "step": 6818 }, { "epoch": 66.12289156626505, "grad_norm": 9.13461971282959, "learning_rate": 1.6898058252427184e-05, "loss": 0.257, "step": 6819 }, { "epoch": 66.13253012048193, "grad_norm": 11.637826919555664, "learning_rate": 1.6893203883495145e-05, "loss": 0.1484, "step": 6820 }, { "epoch": 66.1421686746988, "grad_norm": 9.851774215698242, "learning_rate": 1.6888349514563106e-05, "loss": 0.2819, "step": 6821 }, { "epoch": 66.15180722891566, "grad_norm": 2.760859727859497, "learning_rate": 1.6883495145631067e-05, "loss": 0.3919, "step": 6822 }, { "epoch": 66.16144578313254, "grad_norm": 12.130182266235352, "learning_rate": 1.6878640776699032e-05, "loss": 0.2864, "step": 6823 }, { "epoch": 66.1710843373494, "grad_norm": 5.423563480377197, "learning_rate": 1.6873786407766993e-05, "loss": 0.223, "step": 6824 }, { "epoch": 66.18072289156626, "grad_norm": 5.178747653961182, "learning_rate": 1.6868932038834954e-05, "loss": 0.3773, "step": 6825 }, { "epoch": 66.19036144578314, "grad_norm": 5.622215270996094, "learning_rate": 1.6864077669902915e-05, "loss": 0.1459, "step": 6826 }, { "epoch": 66.2, "grad_norm": 4.8182172775268555, "learning_rate": 1.6859223300970876e-05, "loss": 0.1455, "step": 6827 }, { "epoch": 66.20963855421687, "grad_norm": 2.4715163707733154, "learning_rate": 1.6854368932038838e-05, "loss": 0.0852, "step": 6828 }, { "epoch": 66.21927710843373, "grad_norm": 9.460112571716309, "learning_rate": 1.6849514563106795e-05, "loss": 0.4243, "step": 6829 }, { "epoch": 66.2289156626506, "grad_norm": 11.56740951538086, "learning_rate": 1.6844660194174757e-05, "loss": 0.1834, "step": 6830 }, { "epoch": 66.23855421686747, "grad_norm": 17.162927627563477, "learning_rate": 1.6839805825242718e-05, "loss": 0.272, "step": 6831 }, { "epoch": 66.24819277108433, "grad_norm": 13.450098991394043, "learning_rate": 1.683495145631068e-05, "loss": 0.3674, "step": 6832 }, { "epoch": 66.25783132530121, "grad_norm": 15.263616561889648, "learning_rate": 1.683009708737864e-05, "loss": 0.1905, "step": 6833 }, { "epoch": 66.26746987951807, "grad_norm": 6.042243480682373, "learning_rate": 1.68252427184466e-05, "loss": 0.2477, "step": 6834 }, { "epoch": 66.27710843373494, "grad_norm": 2.552194118499756, "learning_rate": 1.6820388349514562e-05, "loss": 0.2199, "step": 6835 }, { "epoch": 66.2867469879518, "grad_norm": 15.826237678527832, "learning_rate": 1.6815533980582527e-05, "loss": 0.2066, "step": 6836 }, { "epoch": 66.29638554216868, "grad_norm": 2.9504592418670654, "learning_rate": 1.6810679611650488e-05, "loss": 0.3812, "step": 6837 }, { "epoch": 66.30602409638554, "grad_norm": 0.6605208516120911, "learning_rate": 1.680582524271845e-05, "loss": 0.2737, "step": 6838 }, { "epoch": 66.3156626506024, "grad_norm": 7.273275852203369, "learning_rate": 1.680097087378641e-05, "loss": 0.3944, "step": 6839 }, { "epoch": 66.32530120481928, "grad_norm": 14.109575271606445, "learning_rate": 1.679611650485437e-05, "loss": 0.2411, "step": 6840 }, { "epoch": 66.33493975903615, "grad_norm": 28.286895751953125, "learning_rate": 1.6791262135922333e-05, "loss": 0.2194, "step": 6841 }, { "epoch": 66.34457831325301, "grad_norm": 1.6529461145401, "learning_rate": 1.678640776699029e-05, "loss": 0.1667, "step": 6842 }, { "epoch": 66.35421686746987, "grad_norm": 5.930611610412598, "learning_rate": 1.678155339805825e-05, "loss": 0.2553, "step": 6843 }, { "epoch": 66.36385542168675, "grad_norm": 5.762628555297852, "learning_rate": 1.6776699029126213e-05, "loss": 0.2373, "step": 6844 }, { "epoch": 66.37349397590361, "grad_norm": 11.052785873413086, "learning_rate": 1.6771844660194174e-05, "loss": 0.1314, "step": 6845 }, { "epoch": 66.38313253012048, "grad_norm": 10.894341468811035, "learning_rate": 1.6766990291262135e-05, "loss": 0.2055, "step": 6846 }, { "epoch": 66.39277108433735, "grad_norm": 15.543655395507812, "learning_rate": 1.6762135922330096e-05, "loss": 0.1224, "step": 6847 }, { "epoch": 66.40240963855422, "grad_norm": 23.286344528198242, "learning_rate": 1.675728155339806e-05, "loss": 0.5463, "step": 6848 }, { "epoch": 66.41204819277108, "grad_norm": 5.385855197906494, "learning_rate": 1.675242718446602e-05, "loss": 0.1672, "step": 6849 }, { "epoch": 66.42168674698796, "grad_norm": 11.042313575744629, "learning_rate": 1.6747572815533983e-05, "loss": 0.4372, "step": 6850 }, { "epoch": 66.43132530120482, "grad_norm": 3.3820300102233887, "learning_rate": 1.6742718446601944e-05, "loss": 0.1339, "step": 6851 }, { "epoch": 66.44096385542169, "grad_norm": 12.179534912109375, "learning_rate": 1.6737864077669905e-05, "loss": 0.2204, "step": 6852 }, { "epoch": 66.45060240963855, "grad_norm": 4.091047763824463, "learning_rate": 1.6733009708737866e-05, "loss": 0.304, "step": 6853 }, { "epoch": 66.46024096385543, "grad_norm": 17.07459259033203, "learning_rate": 1.6728155339805827e-05, "loss": 0.3133, "step": 6854 }, { "epoch": 66.46987951807229, "grad_norm": 3.0986738204956055, "learning_rate": 1.672330097087379e-05, "loss": 0.1604, "step": 6855 }, { "epoch": 66.47951807228915, "grad_norm": 9.003719329833984, "learning_rate": 1.6718446601941746e-05, "loss": 0.2327, "step": 6856 }, { "epoch": 66.48915662650603, "grad_norm": 2.9936411380767822, "learning_rate": 1.6713592233009707e-05, "loss": 0.3118, "step": 6857 }, { "epoch": 66.4987951807229, "grad_norm": 14.777709007263184, "learning_rate": 1.670873786407767e-05, "loss": 0.2617, "step": 6858 }, { "epoch": 66.50843373493976, "grad_norm": 39.209041595458984, "learning_rate": 1.670388349514563e-05, "loss": 0.2366, "step": 6859 }, { "epoch": 66.51807228915662, "grad_norm": 34.99449157714844, "learning_rate": 1.669902912621359e-05, "loss": 0.2726, "step": 6860 }, { "epoch": 66.5277108433735, "grad_norm": 5.5202107429504395, "learning_rate": 1.6694174757281555e-05, "loss": 0.197, "step": 6861 }, { "epoch": 66.53734939759036, "grad_norm": 1.4221059083938599, "learning_rate": 1.6689320388349517e-05, "loss": 0.2086, "step": 6862 }, { "epoch": 66.54698795180722, "grad_norm": 7.313944339752197, "learning_rate": 1.6684466019417478e-05, "loss": 0.2451, "step": 6863 }, { "epoch": 66.5566265060241, "grad_norm": 5.464135646820068, "learning_rate": 1.667961165048544e-05, "loss": 0.2925, "step": 6864 }, { "epoch": 66.56626506024097, "grad_norm": 7.391800403594971, "learning_rate": 1.66747572815534e-05, "loss": 0.1961, "step": 6865 }, { "epoch": 66.57590361445783, "grad_norm": 36.48830795288086, "learning_rate": 1.666990291262136e-05, "loss": 0.3385, "step": 6866 }, { "epoch": 66.58554216867469, "grad_norm": 6.880064487457275, "learning_rate": 1.6665048543689322e-05, "loss": 0.2878, "step": 6867 }, { "epoch": 66.59518072289157, "grad_norm": 15.180387496948242, "learning_rate": 1.6660194174757283e-05, "loss": 0.3138, "step": 6868 }, { "epoch": 66.60481927710843, "grad_norm": 0.833736777305603, "learning_rate": 1.6655339805825245e-05, "loss": 0.2592, "step": 6869 }, { "epoch": 66.6144578313253, "grad_norm": 13.422852516174316, "learning_rate": 1.6650485436893202e-05, "loss": 0.2671, "step": 6870 }, { "epoch": 66.62409638554217, "grad_norm": 9.786617279052734, "learning_rate": 1.6645631067961164e-05, "loss": 0.2636, "step": 6871 }, { "epoch": 66.63373493975904, "grad_norm": 8.047605514526367, "learning_rate": 1.6640776699029125e-05, "loss": 0.3318, "step": 6872 }, { "epoch": 66.6433734939759, "grad_norm": 3.0119640827178955, "learning_rate": 1.663592233009709e-05, "loss": 0.2337, "step": 6873 }, { "epoch": 66.65301204819278, "grad_norm": 2.3946025371551514, "learning_rate": 1.663106796116505e-05, "loss": 0.2431, "step": 6874 }, { "epoch": 66.66265060240964, "grad_norm": 17.66657257080078, "learning_rate": 1.662621359223301e-05, "loss": 0.4527, "step": 6875 }, { "epoch": 66.6722891566265, "grad_norm": 9.836881637573242, "learning_rate": 1.6621359223300973e-05, "loss": 0.3503, "step": 6876 }, { "epoch": 66.68192771084337, "grad_norm": 12.475504875183105, "learning_rate": 1.6616504854368934e-05, "loss": 0.1707, "step": 6877 }, { "epoch": 66.69156626506025, "grad_norm": 2.368037462234497, "learning_rate": 1.6611650485436895e-05, "loss": 0.3193, "step": 6878 }, { "epoch": 66.70120481927711, "grad_norm": 7.798638820648193, "learning_rate": 1.6606796116504856e-05, "loss": 0.2413, "step": 6879 }, { "epoch": 66.71084337349397, "grad_norm": 1.5596983432769775, "learning_rate": 1.6601941747572817e-05, "loss": 0.1618, "step": 6880 }, { "epoch": 66.72048192771085, "grad_norm": 13.864507675170898, "learning_rate": 1.659708737864078e-05, "loss": 0.3168, "step": 6881 }, { "epoch": 66.73012048192771, "grad_norm": 7.812751770019531, "learning_rate": 1.659223300970874e-05, "loss": 0.2102, "step": 6882 }, { "epoch": 66.73975903614458, "grad_norm": 3.845264196395874, "learning_rate": 1.65873786407767e-05, "loss": 0.1101, "step": 6883 }, { "epoch": 66.74939759036144, "grad_norm": 5.850836277008057, "learning_rate": 1.658252427184466e-05, "loss": 0.1681, "step": 6884 }, { "epoch": 66.75903614457832, "grad_norm": 17.069734573364258, "learning_rate": 1.657766990291262e-05, "loss": 0.1496, "step": 6885 }, { "epoch": 66.76867469879518, "grad_norm": 12.801722526550293, "learning_rate": 1.6572815533980584e-05, "loss": 0.3313, "step": 6886 }, { "epoch": 66.77831325301204, "grad_norm": 9.767088890075684, "learning_rate": 1.6567961165048545e-05, "loss": 0.2109, "step": 6887 }, { "epoch": 66.78795180722892, "grad_norm": 47.821495056152344, "learning_rate": 1.6563106796116506e-05, "loss": 0.204, "step": 6888 }, { "epoch": 66.79759036144578, "grad_norm": 5.203853130340576, "learning_rate": 1.6558252427184468e-05, "loss": 0.1143, "step": 6889 }, { "epoch": 66.80722891566265, "grad_norm": 12.118743896484375, "learning_rate": 1.655339805825243e-05, "loss": 0.2125, "step": 6890 }, { "epoch": 66.81686746987951, "grad_norm": 10.37362003326416, "learning_rate": 1.654854368932039e-05, "loss": 0.2633, "step": 6891 }, { "epoch": 66.82650602409639, "grad_norm": 3.4317944049835205, "learning_rate": 1.654368932038835e-05, "loss": 0.1712, "step": 6892 }, { "epoch": 66.83614457831325, "grad_norm": 9.928512573242188, "learning_rate": 1.6538834951456312e-05, "loss": 0.1798, "step": 6893 }, { "epoch": 66.84578313253012, "grad_norm": 3.308471441268921, "learning_rate": 1.6533980582524273e-05, "loss": 0.3055, "step": 6894 }, { "epoch": 66.855421686747, "grad_norm": 24.561603546142578, "learning_rate": 1.6529126213592234e-05, "loss": 0.2723, "step": 6895 }, { "epoch": 66.86506024096386, "grad_norm": 4.457960605621338, "learning_rate": 1.6524271844660196e-05, "loss": 0.3271, "step": 6896 }, { "epoch": 66.87469879518072, "grad_norm": 8.112129211425781, "learning_rate": 1.6519417475728157e-05, "loss": 0.2292, "step": 6897 }, { "epoch": 66.88433734939758, "grad_norm": 6.006361961364746, "learning_rate": 1.6514563106796114e-05, "loss": 0.2176, "step": 6898 }, { "epoch": 66.89397590361446, "grad_norm": 39.74903106689453, "learning_rate": 1.650970873786408e-05, "loss": 0.2199, "step": 6899 }, { "epoch": 66.90361445783132, "grad_norm": 7.562514305114746, "learning_rate": 1.650485436893204e-05, "loss": 0.2426, "step": 6900 }, { "epoch": 66.91325301204819, "grad_norm": 3.555332899093628, "learning_rate": 1.65e-05, "loss": 0.2057, "step": 6901 }, { "epoch": 66.92289156626506, "grad_norm": 7.072791576385498, "learning_rate": 1.6495145631067962e-05, "loss": 0.3329, "step": 6902 }, { "epoch": 66.93253012048193, "grad_norm": 18.591320037841797, "learning_rate": 1.6490291262135924e-05, "loss": 0.1281, "step": 6903 }, { "epoch": 66.94216867469879, "grad_norm": 3.6575746536254883, "learning_rate": 1.6485436893203885e-05, "loss": 0.24, "step": 6904 }, { "epoch": 66.95180722891567, "grad_norm": 9.44211196899414, "learning_rate": 1.6480582524271846e-05, "loss": 0.132, "step": 6905 }, { "epoch": 66.96144578313253, "grad_norm": 3.1078720092773438, "learning_rate": 1.6475728155339807e-05, "loss": 0.293, "step": 6906 }, { "epoch": 66.9710843373494, "grad_norm": 3.208373785018921, "learning_rate": 1.6470873786407768e-05, "loss": 0.2407, "step": 6907 }, { "epoch": 66.98072289156626, "grad_norm": 11.833812713623047, "learning_rate": 1.646601941747573e-05, "loss": 0.2918, "step": 6908 }, { "epoch": 66.99036144578314, "grad_norm": 3.5166282653808594, "learning_rate": 1.646116504854369e-05, "loss": 0.3876, "step": 6909 }, { "epoch": 67.00602409638554, "grad_norm": 1.065575361251831, "learning_rate": 1.645631067961165e-05, "loss": 0.2008, "step": 6910 }, { "epoch": 67.01566265060241, "grad_norm": 5.505540370941162, "learning_rate": 1.6451456310679613e-05, "loss": 0.0846, "step": 6911 }, { "epoch": 67.02530120481927, "grad_norm": 7.064964294433594, "learning_rate": 1.6446601941747574e-05, "loss": 0.2035, "step": 6912 }, { "epoch": 67.03493975903615, "grad_norm": 4.020414352416992, "learning_rate": 1.6441747572815535e-05, "loss": 0.2338, "step": 6913 }, { "epoch": 67.04457831325301, "grad_norm": 1.976121425628662, "learning_rate": 1.6436893203883496e-05, "loss": 0.2278, "step": 6914 }, { "epoch": 67.05421686746988, "grad_norm": 5.732389450073242, "learning_rate": 1.6432038834951457e-05, "loss": 0.4012, "step": 6915 }, { "epoch": 67.06385542168675, "grad_norm": 3.6795003414154053, "learning_rate": 1.642718446601942e-05, "loss": 0.2726, "step": 6916 }, { "epoch": 67.07349397590362, "grad_norm": 11.94006633758545, "learning_rate": 1.642233009708738e-05, "loss": 0.2717, "step": 6917 }, { "epoch": 67.08313253012048, "grad_norm": 4.37865686416626, "learning_rate": 1.641747572815534e-05, "loss": 0.2158, "step": 6918 }, { "epoch": 67.09277108433734, "grad_norm": 7.6788129806518555, "learning_rate": 1.6412621359223302e-05, "loss": 0.3047, "step": 6919 }, { "epoch": 67.10240963855422, "grad_norm": 3.838686227798462, "learning_rate": 1.6407766990291263e-05, "loss": 0.3811, "step": 6920 }, { "epoch": 67.11204819277108, "grad_norm": 1.0667052268981934, "learning_rate": 1.6402912621359224e-05, "loss": 0.1613, "step": 6921 }, { "epoch": 67.12168674698795, "grad_norm": 2.667898178100586, "learning_rate": 1.6398058252427185e-05, "loss": 0.3024, "step": 6922 }, { "epoch": 67.13132530120482, "grad_norm": 40.988224029541016, "learning_rate": 1.6393203883495146e-05, "loss": 0.3749, "step": 6923 }, { "epoch": 67.14096385542169, "grad_norm": 5.0780415534973145, "learning_rate": 1.6388349514563108e-05, "loss": 0.3881, "step": 6924 }, { "epoch": 67.15060240963855, "grad_norm": 2.8959896564483643, "learning_rate": 1.638349514563107e-05, "loss": 0.246, "step": 6925 }, { "epoch": 67.16024096385541, "grad_norm": 33.469261169433594, "learning_rate": 1.637864077669903e-05, "loss": 0.1782, "step": 6926 }, { "epoch": 67.16987951807229, "grad_norm": 3.0999972820281982, "learning_rate": 1.637378640776699e-05, "loss": 0.2173, "step": 6927 }, { "epoch": 67.17951807228916, "grad_norm": 3.8678362369537354, "learning_rate": 1.6368932038834952e-05, "loss": 0.1892, "step": 6928 }, { "epoch": 67.18915662650602, "grad_norm": 7.721595287322998, "learning_rate": 1.6364077669902913e-05, "loss": 0.3203, "step": 6929 }, { "epoch": 67.1987951807229, "grad_norm": 11.040714263916016, "learning_rate": 1.6359223300970874e-05, "loss": 0.1978, "step": 6930 }, { "epoch": 67.20843373493976, "grad_norm": 3.261315107345581, "learning_rate": 1.6354368932038836e-05, "loss": 0.1593, "step": 6931 }, { "epoch": 67.21807228915662, "grad_norm": 11.053744316101074, "learning_rate": 1.6349514563106797e-05, "loss": 0.2236, "step": 6932 }, { "epoch": 67.2277108433735, "grad_norm": 5.24761438369751, "learning_rate": 1.6344660194174758e-05, "loss": 0.2372, "step": 6933 }, { "epoch": 67.23734939759036, "grad_norm": 15.810038566589355, "learning_rate": 1.633980582524272e-05, "loss": 0.2426, "step": 6934 }, { "epoch": 67.24698795180723, "grad_norm": 2.4953343868255615, "learning_rate": 1.633495145631068e-05, "loss": 0.1962, "step": 6935 }, { "epoch": 67.25662650602409, "grad_norm": 2.1171905994415283, "learning_rate": 1.633009708737864e-05, "loss": 0.2517, "step": 6936 }, { "epoch": 67.26626506024097, "grad_norm": 11.179730415344238, "learning_rate": 1.6325242718446603e-05, "loss": 0.1349, "step": 6937 }, { "epoch": 67.27590361445783, "grad_norm": 12.765289306640625, "learning_rate": 1.6320388349514564e-05, "loss": 0.3114, "step": 6938 }, { "epoch": 67.2855421686747, "grad_norm": 7.967268466949463, "learning_rate": 1.6315533980582525e-05, "loss": 0.2678, "step": 6939 }, { "epoch": 67.29518072289157, "grad_norm": 2.2719500064849854, "learning_rate": 1.6310679611650486e-05, "loss": 0.1552, "step": 6940 }, { "epoch": 67.30481927710844, "grad_norm": 4.22945499420166, "learning_rate": 1.6305825242718447e-05, "loss": 0.2869, "step": 6941 }, { "epoch": 67.3144578313253, "grad_norm": 14.778789520263672, "learning_rate": 1.6300970873786408e-05, "loss": 0.1584, "step": 6942 }, { "epoch": 67.32409638554216, "grad_norm": 18.21709442138672, "learning_rate": 1.629611650485437e-05, "loss": 0.3354, "step": 6943 }, { "epoch": 67.33373493975904, "grad_norm": 19.52067756652832, "learning_rate": 1.629126213592233e-05, "loss": 0.3147, "step": 6944 }, { "epoch": 67.3433734939759, "grad_norm": 4.948939323425293, "learning_rate": 1.628640776699029e-05, "loss": 0.2859, "step": 6945 }, { "epoch": 67.35301204819277, "grad_norm": 9.81902027130127, "learning_rate": 1.6281553398058253e-05, "loss": 0.2659, "step": 6946 }, { "epoch": 67.36265060240964, "grad_norm": 8.627188682556152, "learning_rate": 1.6276699029126214e-05, "loss": 0.2863, "step": 6947 }, { "epoch": 67.37228915662651, "grad_norm": 8.356121063232422, "learning_rate": 1.6271844660194175e-05, "loss": 0.1136, "step": 6948 }, { "epoch": 67.38192771084337, "grad_norm": 14.49079704284668, "learning_rate": 1.626699029126214e-05, "loss": 0.2135, "step": 6949 }, { "epoch": 67.39156626506023, "grad_norm": 16.592357635498047, "learning_rate": 1.6262135922330097e-05, "loss": 0.1727, "step": 6950 }, { "epoch": 67.40120481927711, "grad_norm": 5.01421594619751, "learning_rate": 1.625728155339806e-05, "loss": 0.2185, "step": 6951 }, { "epoch": 67.41084337349398, "grad_norm": 1.6285425424575806, "learning_rate": 1.625242718446602e-05, "loss": 0.1262, "step": 6952 }, { "epoch": 67.42048192771084, "grad_norm": 4.214315891265869, "learning_rate": 1.624757281553398e-05, "loss": 0.1273, "step": 6953 }, { "epoch": 67.43012048192772, "grad_norm": 14.328014373779297, "learning_rate": 1.6242718446601942e-05, "loss": 0.2878, "step": 6954 }, { "epoch": 67.43975903614458, "grad_norm": 8.199352264404297, "learning_rate": 1.6237864077669903e-05, "loss": 0.1325, "step": 6955 }, { "epoch": 67.44939759036144, "grad_norm": 12.380399703979492, "learning_rate": 1.6233009708737864e-05, "loss": 0.3687, "step": 6956 }, { "epoch": 67.45903614457832, "grad_norm": 2.2645788192749023, "learning_rate": 1.6228155339805825e-05, "loss": 0.2147, "step": 6957 }, { "epoch": 67.46867469879518, "grad_norm": 9.315295219421387, "learning_rate": 1.6223300970873787e-05, "loss": 0.2803, "step": 6958 }, { "epoch": 67.47831325301205, "grad_norm": 18.882015228271484, "learning_rate": 1.6218446601941748e-05, "loss": 0.2396, "step": 6959 }, { "epoch": 67.48795180722891, "grad_norm": 3.2504498958587646, "learning_rate": 1.621359223300971e-05, "loss": 0.0982, "step": 6960 }, { "epoch": 67.49759036144579, "grad_norm": 2.7874693870544434, "learning_rate": 1.620873786407767e-05, "loss": 0.2323, "step": 6961 }, { "epoch": 67.50722891566265, "grad_norm": 2.413708448410034, "learning_rate": 1.6203883495145635e-05, "loss": 0.1089, "step": 6962 }, { "epoch": 67.51686746987951, "grad_norm": 18.567996978759766, "learning_rate": 1.6199029126213596e-05, "loss": 0.3453, "step": 6963 }, { "epoch": 67.52650602409639, "grad_norm": 2.9338529109954834, "learning_rate": 1.6194174757281553e-05, "loss": 0.0977, "step": 6964 }, { "epoch": 67.53614457831326, "grad_norm": 39.33094024658203, "learning_rate": 1.6189320388349515e-05, "loss": 0.3824, "step": 6965 }, { "epoch": 67.54578313253012, "grad_norm": 8.012566566467285, "learning_rate": 1.6184466019417476e-05, "loss": 0.4778, "step": 6966 }, { "epoch": 67.55542168674698, "grad_norm": 19.574329376220703, "learning_rate": 1.6179611650485437e-05, "loss": 0.2082, "step": 6967 }, { "epoch": 67.56506024096386, "grad_norm": 3.27409291267395, "learning_rate": 1.6174757281553398e-05, "loss": 0.2975, "step": 6968 }, { "epoch": 67.57469879518072, "grad_norm": 3.3904476165771484, "learning_rate": 1.616990291262136e-05, "loss": 0.3897, "step": 6969 }, { "epoch": 67.58433734939759, "grad_norm": 2.872692584991455, "learning_rate": 1.616504854368932e-05, "loss": 0.223, "step": 6970 }, { "epoch": 67.59397590361446, "grad_norm": 4.46239709854126, "learning_rate": 1.616019417475728e-05, "loss": 0.3103, "step": 6971 }, { "epoch": 67.60361445783133, "grad_norm": 2.6260621547698975, "learning_rate": 1.6155339805825243e-05, "loss": 0.2444, "step": 6972 }, { "epoch": 67.61325301204819, "grad_norm": 1.8158882856369019, "learning_rate": 1.6150485436893204e-05, "loss": 0.1102, "step": 6973 }, { "epoch": 67.62289156626505, "grad_norm": 13.861235618591309, "learning_rate": 1.6145631067961168e-05, "loss": 0.2064, "step": 6974 }, { "epoch": 67.63253012048193, "grad_norm": 10.15152359008789, "learning_rate": 1.614077669902913e-05, "loss": 0.4912, "step": 6975 }, { "epoch": 67.6421686746988, "grad_norm": 5.258996486663818, "learning_rate": 1.613592233009709e-05, "loss": 0.148, "step": 6976 }, { "epoch": 67.65180722891566, "grad_norm": 4.925541400909424, "learning_rate": 1.6131067961165052e-05, "loss": 0.1562, "step": 6977 }, { "epoch": 67.66144578313254, "grad_norm": 8.360085487365723, "learning_rate": 1.612621359223301e-05, "loss": 0.3396, "step": 6978 }, { "epoch": 67.6710843373494, "grad_norm": 3.3515472412109375, "learning_rate": 1.612135922330097e-05, "loss": 0.2715, "step": 6979 }, { "epoch": 67.68072289156626, "grad_norm": 12.921295166015625, "learning_rate": 1.6116504854368932e-05, "loss": 0.0868, "step": 6980 }, { "epoch": 67.69036144578314, "grad_norm": 7.242173194885254, "learning_rate": 1.6111650485436893e-05, "loss": 0.2151, "step": 6981 }, { "epoch": 67.7, "grad_norm": 9.872143745422363, "learning_rate": 1.6106796116504854e-05, "loss": 0.1238, "step": 6982 }, { "epoch": 67.70963855421687, "grad_norm": 12.451607704162598, "learning_rate": 1.6101941747572815e-05, "loss": 0.3256, "step": 6983 }, { "epoch": 67.71927710843373, "grad_norm": 9.747042655944824, "learning_rate": 1.6097087378640776e-05, "loss": 0.1839, "step": 6984 }, { "epoch": 67.7289156626506, "grad_norm": 18.67774772644043, "learning_rate": 1.6092233009708738e-05, "loss": 0.2118, "step": 6985 }, { "epoch": 67.73855421686747, "grad_norm": 11.533525466918945, "learning_rate": 1.60873786407767e-05, "loss": 0.2601, "step": 6986 }, { "epoch": 67.74819277108433, "grad_norm": 9.672327995300293, "learning_rate": 1.6082524271844663e-05, "loss": 0.103, "step": 6987 }, { "epoch": 67.75783132530121, "grad_norm": 5.785787105560303, "learning_rate": 1.6077669902912624e-05, "loss": 0.3613, "step": 6988 }, { "epoch": 67.76746987951807, "grad_norm": 7.641305923461914, "learning_rate": 1.6072815533980585e-05, "loss": 0.1615, "step": 6989 }, { "epoch": 67.77710843373494, "grad_norm": 5.5945658683776855, "learning_rate": 1.6067961165048547e-05, "loss": 0.3455, "step": 6990 }, { "epoch": 67.7867469879518, "grad_norm": 9.296849250793457, "learning_rate": 1.6063106796116504e-05, "loss": 0.1623, "step": 6991 }, { "epoch": 67.79638554216868, "grad_norm": 25.1669979095459, "learning_rate": 1.6058252427184466e-05, "loss": 0.1962, "step": 6992 }, { "epoch": 67.80602409638554, "grad_norm": 2.7700724601745605, "learning_rate": 1.6053398058252427e-05, "loss": 0.1486, "step": 6993 }, { "epoch": 67.8156626506024, "grad_norm": 3.2030398845672607, "learning_rate": 1.6048543689320388e-05, "loss": 0.1763, "step": 6994 }, { "epoch": 67.82530120481928, "grad_norm": 14.050956726074219, "learning_rate": 1.604368932038835e-05, "loss": 0.1394, "step": 6995 }, { "epoch": 67.83493975903615, "grad_norm": 30.38130760192871, "learning_rate": 1.603883495145631e-05, "loss": 0.2299, "step": 6996 }, { "epoch": 67.84457831325301, "grad_norm": 4.632196426391602, "learning_rate": 1.603398058252427e-05, "loss": 0.2517, "step": 6997 }, { "epoch": 67.85421686746987, "grad_norm": 3.670039415359497, "learning_rate": 1.6029126213592232e-05, "loss": 0.3004, "step": 6998 }, { "epoch": 67.86385542168675, "grad_norm": 3.5478031635284424, "learning_rate": 1.6024271844660194e-05, "loss": 0.1902, "step": 6999 }, { "epoch": 67.87349397590361, "grad_norm": 7.3660197257995605, "learning_rate": 1.6019417475728158e-05, "loss": 0.1436, "step": 7000 }, { "epoch": 67.88313253012048, "grad_norm": 6.493963241577148, "learning_rate": 1.601456310679612e-05, "loss": 0.3146, "step": 7001 }, { "epoch": 67.89277108433735, "grad_norm": 13.439291954040527, "learning_rate": 1.600970873786408e-05, "loss": 0.3636, "step": 7002 }, { "epoch": 67.90240963855422, "grad_norm": 8.050071716308594, "learning_rate": 1.600485436893204e-05, "loss": 0.4774, "step": 7003 }, { "epoch": 67.91204819277108, "grad_norm": 11.982166290283203, "learning_rate": 1.6000000000000003e-05, "loss": 0.3215, "step": 7004 }, { "epoch": 67.92168674698796, "grad_norm": 5.30062198638916, "learning_rate": 1.599514563106796e-05, "loss": 0.1841, "step": 7005 }, { "epoch": 67.93132530120482, "grad_norm": 9.46510124206543, "learning_rate": 1.599029126213592e-05, "loss": 0.2652, "step": 7006 }, { "epoch": 67.94096385542169, "grad_norm": 3.858088493347168, "learning_rate": 1.5985436893203883e-05, "loss": 0.2007, "step": 7007 }, { "epoch": 67.95060240963855, "grad_norm": 9.651148796081543, "learning_rate": 1.5980582524271844e-05, "loss": 0.2084, "step": 7008 }, { "epoch": 67.96024096385543, "grad_norm": 22.310794830322266, "learning_rate": 1.5975728155339805e-05, "loss": 0.2485, "step": 7009 }, { "epoch": 67.96987951807229, "grad_norm": 5.616776943206787, "learning_rate": 1.5970873786407766e-05, "loss": 0.193, "step": 7010 }, { "epoch": 67.97951807228915, "grad_norm": 5.391765117645264, "learning_rate": 1.5966019417475727e-05, "loss": 0.1291, "step": 7011 }, { "epoch": 67.98915662650603, "grad_norm": 10.427078247070312, "learning_rate": 1.5961165048543692e-05, "loss": 0.1552, "step": 7012 }, { "epoch": 68.00481927710844, "grad_norm": 2.707583427429199, "learning_rate": 1.5956310679611653e-05, "loss": 0.1206, "step": 7013 }, { "epoch": 68.0144578313253, "grad_norm": 3.4650380611419678, "learning_rate": 1.5951456310679614e-05, "loss": 0.0836, "step": 7014 }, { "epoch": 68.02409638554217, "grad_norm": 7.531802177429199, "learning_rate": 1.5946601941747575e-05, "loss": 0.0856, "step": 7015 }, { "epoch": 68.03373493975904, "grad_norm": 9.249044418334961, "learning_rate": 1.5941747572815536e-05, "loss": 0.1195, "step": 7016 }, { "epoch": 68.0433734939759, "grad_norm": 4.3990092277526855, "learning_rate": 1.5936893203883498e-05, "loss": 0.108, "step": 7017 }, { "epoch": 68.05301204819277, "grad_norm": 6.110069751739502, "learning_rate": 1.593203883495146e-05, "loss": 0.22, "step": 7018 }, { "epoch": 68.06265060240963, "grad_norm": 3.515622615814209, "learning_rate": 1.5927184466019416e-05, "loss": 0.2608, "step": 7019 }, { "epoch": 68.07228915662651, "grad_norm": 4.681053161621094, "learning_rate": 1.5922330097087378e-05, "loss": 0.155, "step": 7020 }, { "epoch": 68.08192771084337, "grad_norm": 7.6269755363464355, "learning_rate": 1.591747572815534e-05, "loss": 0.3096, "step": 7021 }, { "epoch": 68.09156626506024, "grad_norm": 7.0949225425720215, "learning_rate": 1.59126213592233e-05, "loss": 0.2485, "step": 7022 }, { "epoch": 68.10120481927711, "grad_norm": 45.008766174316406, "learning_rate": 1.590776699029126e-05, "loss": 0.1664, "step": 7023 }, { "epoch": 68.11084337349398, "grad_norm": 5.518270969390869, "learning_rate": 1.5902912621359222e-05, "loss": 0.1382, "step": 7024 }, { "epoch": 68.12048192771084, "grad_norm": 9.320350646972656, "learning_rate": 1.5898058252427187e-05, "loss": 0.1698, "step": 7025 }, { "epoch": 68.1301204819277, "grad_norm": 5.657938003540039, "learning_rate": 1.5893203883495148e-05, "loss": 0.231, "step": 7026 }, { "epoch": 68.13975903614458, "grad_norm": 4.189245700836182, "learning_rate": 1.588834951456311e-05, "loss": 0.2279, "step": 7027 }, { "epoch": 68.14939759036145, "grad_norm": 1.8222447633743286, "learning_rate": 1.588349514563107e-05, "loss": 0.1665, "step": 7028 }, { "epoch": 68.15903614457831, "grad_norm": 5.747811317443848, "learning_rate": 1.587864077669903e-05, "loss": 0.365, "step": 7029 }, { "epoch": 68.16867469879519, "grad_norm": 3.1591882705688477, "learning_rate": 1.5873786407766992e-05, "loss": 0.374, "step": 7030 }, { "epoch": 68.17831325301205, "grad_norm": 4.275758266448975, "learning_rate": 1.5868932038834954e-05, "loss": 0.4515, "step": 7031 }, { "epoch": 68.18795180722891, "grad_norm": 1.8219854831695557, "learning_rate": 1.5864077669902915e-05, "loss": 0.2024, "step": 7032 }, { "epoch": 68.19759036144578, "grad_norm": 10.501327514648438, "learning_rate": 1.5859223300970873e-05, "loss": 0.2186, "step": 7033 }, { "epoch": 68.20722891566265, "grad_norm": 3.797436237335205, "learning_rate": 1.5854368932038834e-05, "loss": 0.2111, "step": 7034 }, { "epoch": 68.21686746987952, "grad_norm": 19.471731185913086, "learning_rate": 1.5849514563106795e-05, "loss": 0.2396, "step": 7035 }, { "epoch": 68.22650602409638, "grad_norm": 3.972019672393799, "learning_rate": 1.5844660194174756e-05, "loss": 0.2487, "step": 7036 }, { "epoch": 68.23614457831326, "grad_norm": 8.646108627319336, "learning_rate": 1.5839805825242717e-05, "loss": 0.1566, "step": 7037 }, { "epoch": 68.24578313253012, "grad_norm": 2.6003613471984863, "learning_rate": 1.583495145631068e-05, "loss": 0.162, "step": 7038 }, { "epoch": 68.25542168674698, "grad_norm": 3.3084912300109863, "learning_rate": 1.5830097087378643e-05, "loss": 0.0951, "step": 7039 }, { "epoch": 68.26506024096386, "grad_norm": 2.809805393218994, "learning_rate": 1.5825242718446604e-05, "loss": 0.2449, "step": 7040 }, { "epoch": 68.27469879518073, "grad_norm": 14.698777198791504, "learning_rate": 1.5820388349514565e-05, "loss": 0.2372, "step": 7041 }, { "epoch": 68.28433734939759, "grad_norm": 3.471327066421509, "learning_rate": 1.5815533980582526e-05, "loss": 0.4, "step": 7042 }, { "epoch": 68.29397590361445, "grad_norm": 6.950856685638428, "learning_rate": 1.5810679611650487e-05, "loss": 0.2763, "step": 7043 }, { "epoch": 68.30361445783133, "grad_norm": 3.656872272491455, "learning_rate": 1.580582524271845e-05, "loss": 0.2539, "step": 7044 }, { "epoch": 68.3132530120482, "grad_norm": 6.089412689208984, "learning_rate": 1.580097087378641e-05, "loss": 0.2507, "step": 7045 }, { "epoch": 68.32289156626506, "grad_norm": 39.469207763671875, "learning_rate": 1.579611650485437e-05, "loss": 0.2537, "step": 7046 }, { "epoch": 68.33253012048193, "grad_norm": 9.326927185058594, "learning_rate": 1.579126213592233e-05, "loss": 0.2581, "step": 7047 }, { "epoch": 68.3421686746988, "grad_norm": 2.466092348098755, "learning_rate": 1.578640776699029e-05, "loss": 0.1959, "step": 7048 }, { "epoch": 68.35180722891566, "grad_norm": 3.801790952682495, "learning_rate": 1.578155339805825e-05, "loss": 0.2155, "step": 7049 }, { "epoch": 68.36144578313252, "grad_norm": 4.514357089996338, "learning_rate": 1.5776699029126215e-05, "loss": 0.1416, "step": 7050 }, { "epoch": 68.3710843373494, "grad_norm": 5.614737510681152, "learning_rate": 1.5771844660194177e-05, "loss": 0.351, "step": 7051 }, { "epoch": 68.38072289156626, "grad_norm": 2.0030035972595215, "learning_rate": 1.5766990291262138e-05, "loss": 0.0897, "step": 7052 }, { "epoch": 68.39036144578313, "grad_norm": 14.44750690460205, "learning_rate": 1.57621359223301e-05, "loss": 0.1642, "step": 7053 }, { "epoch": 68.4, "grad_norm": 1.5108286142349243, "learning_rate": 1.575728155339806e-05, "loss": 0.2227, "step": 7054 }, { "epoch": 68.40963855421687, "grad_norm": 6.744640827178955, "learning_rate": 1.575242718446602e-05, "loss": 0.2038, "step": 7055 }, { "epoch": 68.41927710843373, "grad_norm": 3.6839675903320312, "learning_rate": 1.5747572815533982e-05, "loss": 0.3551, "step": 7056 }, { "epoch": 68.4289156626506, "grad_norm": 4.039345741271973, "learning_rate": 1.5742718446601943e-05, "loss": 0.1232, "step": 7057 }, { "epoch": 68.43855421686747, "grad_norm": 5.935076713562012, "learning_rate": 1.5737864077669905e-05, "loss": 0.1497, "step": 7058 }, { "epoch": 68.44819277108434, "grad_norm": 2.9710922241210938, "learning_rate": 1.5733009708737866e-05, "loss": 0.3282, "step": 7059 }, { "epoch": 68.4578313253012, "grad_norm": 8.660177230834961, "learning_rate": 1.5728155339805823e-05, "loss": 0.1204, "step": 7060 }, { "epoch": 68.46746987951808, "grad_norm": 6.803740978240967, "learning_rate": 1.5723300970873785e-05, "loss": 0.1636, "step": 7061 }, { "epoch": 68.47710843373494, "grad_norm": 10.416114807128906, "learning_rate": 1.5718446601941746e-05, "loss": 0.1033, "step": 7062 }, { "epoch": 68.4867469879518, "grad_norm": 15.712705612182617, "learning_rate": 1.571359223300971e-05, "loss": 0.2404, "step": 7063 }, { "epoch": 68.49638554216868, "grad_norm": 4.722684860229492, "learning_rate": 1.570873786407767e-05, "loss": 0.1579, "step": 7064 }, { "epoch": 68.50602409638554, "grad_norm": 9.898528099060059, "learning_rate": 1.5703883495145633e-05, "loss": 0.1948, "step": 7065 }, { "epoch": 68.51566265060241, "grad_norm": 11.964600563049316, "learning_rate": 1.5699029126213594e-05, "loss": 0.4118, "step": 7066 }, { "epoch": 68.52530120481927, "grad_norm": 6.2177557945251465, "learning_rate": 1.5694174757281555e-05, "loss": 0.091, "step": 7067 }, { "epoch": 68.53493975903615, "grad_norm": 4.249815940856934, "learning_rate": 1.5689320388349516e-05, "loss": 0.2471, "step": 7068 }, { "epoch": 68.54457831325301, "grad_norm": 7.341970443725586, "learning_rate": 1.5684466019417477e-05, "loss": 0.326, "step": 7069 }, { "epoch": 68.55421686746988, "grad_norm": 4.2844557762146, "learning_rate": 1.5679611650485438e-05, "loss": 0.1969, "step": 7070 }, { "epoch": 68.56385542168675, "grad_norm": 9.762212753295898, "learning_rate": 1.56747572815534e-05, "loss": 0.2795, "step": 7071 }, { "epoch": 68.57349397590362, "grad_norm": 8.05679702758789, "learning_rate": 1.566990291262136e-05, "loss": 0.1843, "step": 7072 }, { "epoch": 68.58313253012048, "grad_norm": 2.6633548736572266, "learning_rate": 1.5665048543689322e-05, "loss": 0.1358, "step": 7073 }, { "epoch": 68.59277108433734, "grad_norm": 5.095353126525879, "learning_rate": 1.566019417475728e-05, "loss": 0.3161, "step": 7074 }, { "epoch": 68.60240963855422, "grad_norm": 4.991581439971924, "learning_rate": 1.5655339805825244e-05, "loss": 0.1369, "step": 7075 }, { "epoch": 68.61204819277108, "grad_norm": 11.401288032531738, "learning_rate": 1.5650485436893205e-05, "loss": 0.279, "step": 7076 }, { "epoch": 68.62168674698795, "grad_norm": 10.08537769317627, "learning_rate": 1.5645631067961166e-05, "loss": 0.2961, "step": 7077 }, { "epoch": 68.63132530120482, "grad_norm": 4.8390936851501465, "learning_rate": 1.5640776699029127e-05, "loss": 0.3825, "step": 7078 }, { "epoch": 68.64096385542169, "grad_norm": 9.326869010925293, "learning_rate": 1.563592233009709e-05, "loss": 0.2066, "step": 7079 }, { "epoch": 68.65060240963855, "grad_norm": 3.0799174308776855, "learning_rate": 1.563106796116505e-05, "loss": 0.3156, "step": 7080 }, { "epoch": 68.66024096385541, "grad_norm": 15.961691856384277, "learning_rate": 1.562621359223301e-05, "loss": 0.2653, "step": 7081 }, { "epoch": 68.66987951807229, "grad_norm": 28.758472442626953, "learning_rate": 1.5621359223300972e-05, "loss": 0.4125, "step": 7082 }, { "epoch": 68.67951807228916, "grad_norm": 2.558102607727051, "learning_rate": 1.5616504854368933e-05, "loss": 0.1722, "step": 7083 }, { "epoch": 68.68915662650602, "grad_norm": 4.962578773498535, "learning_rate": 1.5611650485436894e-05, "loss": 0.2669, "step": 7084 }, { "epoch": 68.6987951807229, "grad_norm": 6.071590423583984, "learning_rate": 1.5606796116504855e-05, "loss": 0.4171, "step": 7085 }, { "epoch": 68.70843373493976, "grad_norm": 10.562142372131348, "learning_rate": 1.5601941747572817e-05, "loss": 0.3844, "step": 7086 }, { "epoch": 68.71807228915662, "grad_norm": 2.9589149951934814, "learning_rate": 1.5597087378640778e-05, "loss": 0.3047, "step": 7087 }, { "epoch": 68.7277108433735, "grad_norm": 8.342185974121094, "learning_rate": 1.559223300970874e-05, "loss": 0.2067, "step": 7088 }, { "epoch": 68.73734939759036, "grad_norm": 2.8087716102600098, "learning_rate": 1.55873786407767e-05, "loss": 0.173, "step": 7089 }, { "epoch": 68.74698795180723, "grad_norm": 9.814685821533203, "learning_rate": 1.558252427184466e-05, "loss": 0.23, "step": 7090 }, { "epoch": 68.75662650602409, "grad_norm": 2.3481619358062744, "learning_rate": 1.5577669902912622e-05, "loss": 0.13, "step": 7091 }, { "epoch": 68.76626506024097, "grad_norm": 0.7732184529304504, "learning_rate": 1.5572815533980583e-05, "loss": 0.1157, "step": 7092 }, { "epoch": 68.77590361445783, "grad_norm": 8.283831596374512, "learning_rate": 1.5567961165048545e-05, "loss": 0.2847, "step": 7093 }, { "epoch": 68.7855421686747, "grad_norm": 19.325855255126953, "learning_rate": 1.5563106796116506e-05, "loss": 0.2785, "step": 7094 }, { "epoch": 68.79518072289157, "grad_norm": 9.864052772521973, "learning_rate": 1.5558252427184467e-05, "loss": 0.3181, "step": 7095 }, { "epoch": 68.80481927710844, "grad_norm": 3.274592876434326, "learning_rate": 1.5553398058252428e-05, "loss": 0.2511, "step": 7096 }, { "epoch": 68.8144578313253, "grad_norm": 41.72990417480469, "learning_rate": 1.554854368932039e-05, "loss": 0.2014, "step": 7097 }, { "epoch": 68.82409638554216, "grad_norm": 3.8731021881103516, "learning_rate": 1.554368932038835e-05, "loss": 0.1289, "step": 7098 }, { "epoch": 68.83373493975904, "grad_norm": 2.513810634613037, "learning_rate": 1.553883495145631e-05, "loss": 0.2295, "step": 7099 }, { "epoch": 68.8433734939759, "grad_norm": 9.120660781860352, "learning_rate": 1.5533980582524273e-05, "loss": 0.0983, "step": 7100 }, { "epoch": 68.85301204819277, "grad_norm": 2.3519864082336426, "learning_rate": 1.5529126213592234e-05, "loss": 0.2039, "step": 7101 }, { "epoch": 68.86265060240964, "grad_norm": 13.28311824798584, "learning_rate": 1.5524271844660195e-05, "loss": 0.2651, "step": 7102 }, { "epoch": 68.87228915662651, "grad_norm": 6.558041572570801, "learning_rate": 1.5519417475728156e-05, "loss": 0.3156, "step": 7103 }, { "epoch": 68.88192771084337, "grad_norm": 3.6790878772735596, "learning_rate": 1.5514563106796117e-05, "loss": 0.1112, "step": 7104 }, { "epoch": 68.89156626506023, "grad_norm": 5.1000494956970215, "learning_rate": 1.550970873786408e-05, "loss": 0.247, "step": 7105 }, { "epoch": 68.90120481927711, "grad_norm": 3.8076066970825195, "learning_rate": 1.550485436893204e-05, "loss": 0.1921, "step": 7106 }, { "epoch": 68.91084337349398, "grad_norm": 6.127856254577637, "learning_rate": 1.55e-05, "loss": 0.5053, "step": 7107 }, { "epoch": 68.92048192771084, "grad_norm": 4.289303779602051, "learning_rate": 1.5495145631067962e-05, "loss": 0.1927, "step": 7108 }, { "epoch": 68.93012048192772, "grad_norm": 7.992083549499512, "learning_rate": 1.5490291262135923e-05, "loss": 0.113, "step": 7109 }, { "epoch": 68.93975903614458, "grad_norm": 6.433160781860352, "learning_rate": 1.5485436893203884e-05, "loss": 0.4274, "step": 7110 }, { "epoch": 68.94939759036144, "grad_norm": 2.968747615814209, "learning_rate": 1.5480582524271845e-05, "loss": 0.1937, "step": 7111 }, { "epoch": 68.95903614457832, "grad_norm": 12.35164737701416, "learning_rate": 1.5475728155339806e-05, "loss": 0.2838, "step": 7112 }, { "epoch": 68.96867469879518, "grad_norm": 5.567519187927246, "learning_rate": 1.5470873786407768e-05, "loss": 0.3954, "step": 7113 }, { "epoch": 68.97831325301205, "grad_norm": 10.31318473815918, "learning_rate": 1.546601941747573e-05, "loss": 0.2186, "step": 7114 }, { "epoch": 68.98795180722891, "grad_norm": 2.0694923400878906, "learning_rate": 1.546116504854369e-05, "loss": 0.1646, "step": 7115 }, { "epoch": 69.00361445783132, "grad_norm": 6.664844989776611, "learning_rate": 1.545631067961165e-05, "loss": 0.431, "step": 7116 }, { "epoch": 69.0132530120482, "grad_norm": 7.797993183135986, "learning_rate": 1.5451456310679612e-05, "loss": 0.3283, "step": 7117 }, { "epoch": 69.02289156626506, "grad_norm": 19.22056007385254, "learning_rate": 1.5446601941747573e-05, "loss": 0.4389, "step": 7118 }, { "epoch": 69.03253012048192, "grad_norm": 4.502618312835693, "learning_rate": 1.5441747572815534e-05, "loss": 0.2498, "step": 7119 }, { "epoch": 69.0421686746988, "grad_norm": 2.4590814113616943, "learning_rate": 1.5436893203883496e-05, "loss": 0.2111, "step": 7120 }, { "epoch": 69.05180722891566, "grad_norm": 5.532229423522949, "learning_rate": 1.5432038834951457e-05, "loss": 0.2539, "step": 7121 }, { "epoch": 69.06144578313253, "grad_norm": 27.548179626464844, "learning_rate": 1.5427184466019418e-05, "loss": 0.3901, "step": 7122 }, { "epoch": 69.0710843373494, "grad_norm": 11.318187713623047, "learning_rate": 1.542233009708738e-05, "loss": 0.2877, "step": 7123 }, { "epoch": 69.08072289156627, "grad_norm": 4.398303508758545, "learning_rate": 1.541747572815534e-05, "loss": 0.247, "step": 7124 }, { "epoch": 69.09036144578313, "grad_norm": 7.678520679473877, "learning_rate": 1.54126213592233e-05, "loss": 0.201, "step": 7125 }, { "epoch": 69.1, "grad_norm": 4.478413105010986, "learning_rate": 1.5407766990291262e-05, "loss": 0.1199, "step": 7126 }, { "epoch": 69.10963855421687, "grad_norm": 8.07192325592041, "learning_rate": 1.5402912621359224e-05, "loss": 0.2619, "step": 7127 }, { "epoch": 69.11927710843374, "grad_norm": 6.269580364227295, "learning_rate": 1.5398058252427185e-05, "loss": 0.1398, "step": 7128 }, { "epoch": 69.1289156626506, "grad_norm": 3.217088222503662, "learning_rate": 1.5393203883495146e-05, "loss": 0.1012, "step": 7129 }, { "epoch": 69.13855421686748, "grad_norm": 1.0351061820983887, "learning_rate": 1.5388349514563107e-05, "loss": 0.0525, "step": 7130 }, { "epoch": 69.14819277108434, "grad_norm": 4.301706314086914, "learning_rate": 1.5383495145631068e-05, "loss": 0.3223, "step": 7131 }, { "epoch": 69.1578313253012, "grad_norm": 5.796014308929443, "learning_rate": 1.537864077669903e-05, "loss": 0.2723, "step": 7132 }, { "epoch": 69.16746987951807, "grad_norm": 6.256807804107666, "learning_rate": 1.537378640776699e-05, "loss": 0.3181, "step": 7133 }, { "epoch": 69.17710843373494, "grad_norm": 8.535839080810547, "learning_rate": 1.536893203883495e-05, "loss": 0.1509, "step": 7134 }, { "epoch": 69.1867469879518, "grad_norm": 4.6416192054748535, "learning_rate": 1.5364077669902913e-05, "loss": 0.1558, "step": 7135 }, { "epoch": 69.19638554216867, "grad_norm": 6.145987033843994, "learning_rate": 1.5359223300970874e-05, "loss": 0.2549, "step": 7136 }, { "epoch": 69.20602409638555, "grad_norm": 3.0479018688201904, "learning_rate": 1.5354368932038835e-05, "loss": 0.3272, "step": 7137 }, { "epoch": 69.21566265060241, "grad_norm": 2.285717725753784, "learning_rate": 1.5349514563106796e-05, "loss": 0.1552, "step": 7138 }, { "epoch": 69.22530120481927, "grad_norm": 7.965272903442383, "learning_rate": 1.534466019417476e-05, "loss": 0.2191, "step": 7139 }, { "epoch": 69.23493975903614, "grad_norm": 3.7833411693573, "learning_rate": 1.533980582524272e-05, "loss": 0.2312, "step": 7140 }, { "epoch": 69.24457831325302, "grad_norm": 2.546159029006958, "learning_rate": 1.533495145631068e-05, "loss": 0.2117, "step": 7141 }, { "epoch": 69.25421686746988, "grad_norm": 30.22565460205078, "learning_rate": 1.533009708737864e-05, "loss": 0.4971, "step": 7142 }, { "epoch": 69.26385542168674, "grad_norm": 14.211407661437988, "learning_rate": 1.5325242718446602e-05, "loss": 0.2907, "step": 7143 }, { "epoch": 69.27349397590362, "grad_norm": 12.592351913452148, "learning_rate": 1.5320388349514563e-05, "loss": 0.2427, "step": 7144 }, { "epoch": 69.28313253012048, "grad_norm": 18.787660598754883, "learning_rate": 1.5315533980582524e-05, "loss": 0.3413, "step": 7145 }, { "epoch": 69.29277108433735, "grad_norm": 11.76677131652832, "learning_rate": 1.5310679611650485e-05, "loss": 0.2982, "step": 7146 }, { "epoch": 69.30240963855422, "grad_norm": 5.425744533538818, "learning_rate": 1.5305825242718447e-05, "loss": 0.2546, "step": 7147 }, { "epoch": 69.31204819277109, "grad_norm": 12.025601387023926, "learning_rate": 1.5300970873786408e-05, "loss": 0.2075, "step": 7148 }, { "epoch": 69.32168674698795, "grad_norm": 6.23056173324585, "learning_rate": 1.529611650485437e-05, "loss": 0.2929, "step": 7149 }, { "epoch": 69.33132530120481, "grad_norm": 12.460831642150879, "learning_rate": 1.529126213592233e-05, "loss": 0.4614, "step": 7150 }, { "epoch": 69.34096385542169, "grad_norm": 6.08943510055542, "learning_rate": 1.5286407766990294e-05, "loss": 0.1426, "step": 7151 }, { "epoch": 69.35060240963855, "grad_norm": 4.789731979370117, "learning_rate": 1.5281553398058256e-05, "loss": 0.3013, "step": 7152 }, { "epoch": 69.36024096385542, "grad_norm": 7.988735198974609, "learning_rate": 1.5276699029126217e-05, "loss": 0.1812, "step": 7153 }, { "epoch": 69.3698795180723, "grad_norm": 3.2270214557647705, "learning_rate": 1.5271844660194175e-05, "loss": 0.4153, "step": 7154 }, { "epoch": 69.37951807228916, "grad_norm": 10.428766250610352, "learning_rate": 1.5266990291262136e-05, "loss": 0.198, "step": 7155 }, { "epoch": 69.38915662650602, "grad_norm": 6.130436420440674, "learning_rate": 1.5262135922330097e-05, "loss": 0.2812, "step": 7156 }, { "epoch": 69.39879518072289, "grad_norm": 7.010892391204834, "learning_rate": 1.525728155339806e-05, "loss": 0.3137, "step": 7157 }, { "epoch": 69.40843373493976, "grad_norm": 2.6077637672424316, "learning_rate": 1.5252427184466019e-05, "loss": 0.1708, "step": 7158 }, { "epoch": 69.41807228915663, "grad_norm": 6.218930244445801, "learning_rate": 1.524757281553398e-05, "loss": 0.194, "step": 7159 }, { "epoch": 69.42771084337349, "grad_norm": 4.766157150268555, "learning_rate": 1.5242718446601941e-05, "loss": 0.3094, "step": 7160 }, { "epoch": 69.43734939759037, "grad_norm": 9.923993110656738, "learning_rate": 1.5237864077669903e-05, "loss": 0.1436, "step": 7161 }, { "epoch": 69.44698795180723, "grad_norm": 12.532271385192871, "learning_rate": 1.5233009708737864e-05, "loss": 0.2369, "step": 7162 }, { "epoch": 69.4566265060241, "grad_norm": 5.495175361633301, "learning_rate": 1.5228155339805825e-05, "loss": 0.3767, "step": 7163 }, { "epoch": 69.46626506024096, "grad_norm": 16.325199127197266, "learning_rate": 1.5223300970873788e-05, "loss": 0.2049, "step": 7164 }, { "epoch": 69.47590361445783, "grad_norm": 8.715742111206055, "learning_rate": 1.5218446601941749e-05, "loss": 0.3367, "step": 7165 }, { "epoch": 69.4855421686747, "grad_norm": 28.96422576904297, "learning_rate": 1.521359223300971e-05, "loss": 0.3699, "step": 7166 }, { "epoch": 69.49518072289156, "grad_norm": 4.174931049346924, "learning_rate": 1.5208737864077671e-05, "loss": 0.2607, "step": 7167 }, { "epoch": 69.50481927710844, "grad_norm": 17.872467041015625, "learning_rate": 1.5203883495145632e-05, "loss": 0.347, "step": 7168 }, { "epoch": 69.5144578313253, "grad_norm": 7.686421871185303, "learning_rate": 1.5199029126213593e-05, "loss": 0.3207, "step": 7169 }, { "epoch": 69.52409638554217, "grad_norm": 4.90125036239624, "learning_rate": 1.5194174757281555e-05, "loss": 0.197, "step": 7170 }, { "epoch": 69.53373493975904, "grad_norm": 3.6346030235290527, "learning_rate": 1.5189320388349516e-05, "loss": 0.2344, "step": 7171 }, { "epoch": 69.5433734939759, "grad_norm": 3.253833055496216, "learning_rate": 1.5184466019417475e-05, "loss": 0.105, "step": 7172 }, { "epoch": 69.55301204819277, "grad_norm": 5.21101188659668, "learning_rate": 1.5179611650485436e-05, "loss": 0.1413, "step": 7173 }, { "epoch": 69.56265060240963, "grad_norm": 11.272223472595215, "learning_rate": 1.5174757281553397e-05, "loss": 0.3295, "step": 7174 }, { "epoch": 69.57228915662651, "grad_norm": 4.44465446472168, "learning_rate": 1.5169902912621359e-05, "loss": 0.0999, "step": 7175 }, { "epoch": 69.58192771084337, "grad_norm": 4.332655906677246, "learning_rate": 1.5165048543689323e-05, "loss": 0.3614, "step": 7176 }, { "epoch": 69.59156626506024, "grad_norm": 6.032704830169678, "learning_rate": 1.5160194174757284e-05, "loss": 0.1767, "step": 7177 }, { "epoch": 69.60120481927711, "grad_norm": 5.9535298347473145, "learning_rate": 1.5155339805825244e-05, "loss": 0.3704, "step": 7178 }, { "epoch": 69.61084337349398, "grad_norm": 10.666366577148438, "learning_rate": 1.5150485436893205e-05, "loss": 0.3182, "step": 7179 }, { "epoch": 69.62048192771084, "grad_norm": 3.82310152053833, "learning_rate": 1.5145631067961166e-05, "loss": 0.3468, "step": 7180 }, { "epoch": 69.6301204819277, "grad_norm": 4.130948066711426, "learning_rate": 1.5140776699029127e-05, "loss": 0.2424, "step": 7181 }, { "epoch": 69.63975903614458, "grad_norm": 13.460091590881348, "learning_rate": 1.5135922330097088e-05, "loss": 0.2367, "step": 7182 }, { "epoch": 69.64939759036145, "grad_norm": 6.817763328552246, "learning_rate": 1.513106796116505e-05, "loss": 0.3431, "step": 7183 }, { "epoch": 69.65903614457831, "grad_norm": 5.177753925323486, "learning_rate": 1.512621359223301e-05, "loss": 0.2264, "step": 7184 }, { "epoch": 69.66867469879519, "grad_norm": 12.711750984191895, "learning_rate": 1.5121359223300972e-05, "loss": 0.3019, "step": 7185 }, { "epoch": 69.67831325301205, "grad_norm": 17.797725677490234, "learning_rate": 1.5116504854368931e-05, "loss": 0.3056, "step": 7186 }, { "epoch": 69.68795180722891, "grad_norm": 4.914797306060791, "learning_rate": 1.5111650485436892e-05, "loss": 0.2789, "step": 7187 }, { "epoch": 69.69759036144578, "grad_norm": 5.374954700469971, "learning_rate": 1.5106796116504853e-05, "loss": 0.3341, "step": 7188 }, { "epoch": 69.70722891566265, "grad_norm": 23.593584060668945, "learning_rate": 1.5101941747572818e-05, "loss": 0.4387, "step": 7189 }, { "epoch": 69.71686746987952, "grad_norm": 2.951535940170288, "learning_rate": 1.509708737864078e-05, "loss": 0.1553, "step": 7190 }, { "epoch": 69.72650602409638, "grad_norm": 12.817415237426758, "learning_rate": 1.5092233009708739e-05, "loss": 0.2426, "step": 7191 }, { "epoch": 69.73614457831326, "grad_norm": 4.186347961425781, "learning_rate": 1.50873786407767e-05, "loss": 0.4219, "step": 7192 }, { "epoch": 69.74578313253012, "grad_norm": 10.75236701965332, "learning_rate": 1.5082524271844661e-05, "loss": 0.3276, "step": 7193 }, { "epoch": 69.75542168674698, "grad_norm": 5.242624759674072, "learning_rate": 1.5077669902912622e-05, "loss": 0.1677, "step": 7194 }, { "epoch": 69.76506024096386, "grad_norm": 4.8877854347229, "learning_rate": 1.5072815533980583e-05, "loss": 0.2688, "step": 7195 }, { "epoch": 69.77469879518073, "grad_norm": 0.9793592691421509, "learning_rate": 1.5067961165048544e-05, "loss": 0.052, "step": 7196 }, { "epoch": 69.78433734939759, "grad_norm": 3.2584376335144043, "learning_rate": 1.5063106796116505e-05, "loss": 0.2208, "step": 7197 }, { "epoch": 69.79397590361445, "grad_norm": 4.627966403961182, "learning_rate": 1.5058252427184467e-05, "loss": 0.3296, "step": 7198 }, { "epoch": 69.80361445783133, "grad_norm": 6.23416805267334, "learning_rate": 1.5053398058252428e-05, "loss": 0.1309, "step": 7199 }, { "epoch": 69.8132530120482, "grad_norm": 4.133662700653076, "learning_rate": 1.5048543689320387e-05, "loss": 0.1293, "step": 7200 }, { "epoch": 69.82289156626506, "grad_norm": 3.4665334224700928, "learning_rate": 1.5043689320388348e-05, "loss": 0.1508, "step": 7201 }, { "epoch": 69.83253012048193, "grad_norm": 16.862754821777344, "learning_rate": 1.5038834951456313e-05, "loss": 0.2196, "step": 7202 }, { "epoch": 69.8421686746988, "grad_norm": 3.7879810333251953, "learning_rate": 1.5033980582524274e-05, "loss": 0.321, "step": 7203 }, { "epoch": 69.85180722891566, "grad_norm": 4.113712310791016, "learning_rate": 1.5029126213592235e-05, "loss": 0.1201, "step": 7204 }, { "epoch": 69.86144578313252, "grad_norm": 5.943226337432861, "learning_rate": 1.5024271844660195e-05, "loss": 0.1735, "step": 7205 }, { "epoch": 69.8710843373494, "grad_norm": 4.48432731628418, "learning_rate": 1.5019417475728156e-05, "loss": 0.1698, "step": 7206 }, { "epoch": 69.88072289156626, "grad_norm": 4.067119598388672, "learning_rate": 1.5014563106796117e-05, "loss": 0.2686, "step": 7207 }, { "epoch": 69.89036144578313, "grad_norm": 6.690308570861816, "learning_rate": 1.5009708737864078e-05, "loss": 0.2462, "step": 7208 }, { "epoch": 69.9, "grad_norm": 5.1811933517456055, "learning_rate": 1.500485436893204e-05, "loss": 0.067, "step": 7209 }, { "epoch": 69.90963855421687, "grad_norm": 2.42472243309021, "learning_rate": 1.5e-05, "loss": 0.1231, "step": 7210 }, { "epoch": 69.91927710843373, "grad_norm": 19.905851364135742, "learning_rate": 1.4995145631067962e-05, "loss": 0.4344, "step": 7211 }, { "epoch": 69.9289156626506, "grad_norm": 25.60679817199707, "learning_rate": 1.4990291262135923e-05, "loss": 0.2544, "step": 7212 }, { "epoch": 69.93855421686747, "grad_norm": 2.225637674331665, "learning_rate": 1.4985436893203884e-05, "loss": 0.1645, "step": 7213 }, { "epoch": 69.94819277108434, "grad_norm": 2.632395029067993, "learning_rate": 1.4980582524271847e-05, "loss": 0.15, "step": 7214 }, { "epoch": 69.9578313253012, "grad_norm": 2.0753402709960938, "learning_rate": 1.4975728155339808e-05, "loss": 0.1477, "step": 7215 }, { "epoch": 69.96746987951808, "grad_norm": 11.804931640625, "learning_rate": 1.4970873786407769e-05, "loss": 0.224, "step": 7216 }, { "epoch": 69.97710843373494, "grad_norm": 5.808808326721191, "learning_rate": 1.496601941747573e-05, "loss": 0.1026, "step": 7217 }, { "epoch": 69.9867469879518, "grad_norm": 3.3747215270996094, "learning_rate": 1.4961165048543691e-05, "loss": 0.1663, "step": 7218 }, { "epoch": 70.00240963855421, "grad_norm": 2.5271267890930176, "learning_rate": 1.495631067961165e-05, "loss": 0.0861, "step": 7219 }, { "epoch": 70.01204819277109, "grad_norm": 3.262885332107544, "learning_rate": 1.4951456310679612e-05, "loss": 0.1726, "step": 7220 }, { "epoch": 70.02168674698795, "grad_norm": 6.127290725708008, "learning_rate": 1.4946601941747573e-05, "loss": 0.189, "step": 7221 }, { "epoch": 70.03132530120482, "grad_norm": 4.526941299438477, "learning_rate": 1.4941747572815534e-05, "loss": 0.1225, "step": 7222 }, { "epoch": 70.04096385542168, "grad_norm": 3.008774757385254, "learning_rate": 1.4936893203883495e-05, "loss": 0.1383, "step": 7223 }, { "epoch": 70.05060240963856, "grad_norm": 14.7131986618042, "learning_rate": 1.4932038834951456e-05, "loss": 0.3204, "step": 7224 }, { "epoch": 70.06024096385542, "grad_norm": 5.113036155700684, "learning_rate": 1.4927184466019418e-05, "loss": 0.2028, "step": 7225 }, { "epoch": 70.06987951807228, "grad_norm": 6.842735290527344, "learning_rate": 1.4922330097087379e-05, "loss": 0.1663, "step": 7226 }, { "epoch": 70.07951807228916, "grad_norm": 1.6621427536010742, "learning_rate": 1.4917475728155342e-05, "loss": 0.1297, "step": 7227 }, { "epoch": 70.08915662650602, "grad_norm": 14.994545936584473, "learning_rate": 1.4912621359223303e-05, "loss": 0.2045, "step": 7228 }, { "epoch": 70.09879518072289, "grad_norm": 6.437835216522217, "learning_rate": 1.4907766990291264e-05, "loss": 0.1125, "step": 7229 }, { "epoch": 70.10843373493977, "grad_norm": 7.4079413414001465, "learning_rate": 1.4902912621359225e-05, "loss": 0.3535, "step": 7230 }, { "epoch": 70.11807228915663, "grad_norm": 13.956947326660156, "learning_rate": 1.4898058252427186e-05, "loss": 0.3081, "step": 7231 }, { "epoch": 70.12771084337349, "grad_norm": 22.5035457611084, "learning_rate": 1.4893203883495147e-05, "loss": 0.4805, "step": 7232 }, { "epoch": 70.13734939759036, "grad_norm": 11.132275581359863, "learning_rate": 1.4888349514563107e-05, "loss": 0.1467, "step": 7233 }, { "epoch": 70.14698795180723, "grad_norm": 10.475896835327148, "learning_rate": 1.4883495145631068e-05, "loss": 0.2955, "step": 7234 }, { "epoch": 70.1566265060241, "grad_norm": 2.0910134315490723, "learning_rate": 1.4878640776699029e-05, "loss": 0.2165, "step": 7235 }, { "epoch": 70.16626506024096, "grad_norm": 2.0326919555664062, "learning_rate": 1.487378640776699e-05, "loss": 0.1454, "step": 7236 }, { "epoch": 70.17590361445784, "grad_norm": 1.0162240266799927, "learning_rate": 1.4868932038834951e-05, "loss": 0.1103, "step": 7237 }, { "epoch": 70.1855421686747, "grad_norm": 5.665231227874756, "learning_rate": 1.4864077669902912e-05, "loss": 0.1455, "step": 7238 }, { "epoch": 70.19518072289156, "grad_norm": 6.4855475425720215, "learning_rate": 1.4859223300970874e-05, "loss": 0.3806, "step": 7239 }, { "epoch": 70.20481927710843, "grad_norm": 4.515404224395752, "learning_rate": 1.4854368932038836e-05, "loss": 0.3637, "step": 7240 }, { "epoch": 70.2144578313253, "grad_norm": 1.8962981700897217, "learning_rate": 1.4849514563106798e-05, "loss": 0.1791, "step": 7241 }, { "epoch": 70.22409638554217, "grad_norm": 3.864392042160034, "learning_rate": 1.4844660194174759e-05, "loss": 0.3931, "step": 7242 }, { "epoch": 70.23373493975903, "grad_norm": 2.2776665687561035, "learning_rate": 1.483980582524272e-05, "loss": 0.171, "step": 7243 }, { "epoch": 70.24337349397591, "grad_norm": 4.160886764526367, "learning_rate": 1.4834951456310681e-05, "loss": 0.3707, "step": 7244 }, { "epoch": 70.25301204819277, "grad_norm": 5.302742004394531, "learning_rate": 1.4830097087378642e-05, "loss": 0.0871, "step": 7245 }, { "epoch": 70.26265060240964, "grad_norm": 12.986651420593262, "learning_rate": 1.4825242718446603e-05, "loss": 0.2204, "step": 7246 }, { "epoch": 70.2722891566265, "grad_norm": 4.224802017211914, "learning_rate": 1.4820388349514563e-05, "loss": 0.2501, "step": 7247 }, { "epoch": 70.28192771084338, "grad_norm": 4.810129165649414, "learning_rate": 1.4815533980582524e-05, "loss": 0.1972, "step": 7248 }, { "epoch": 70.29156626506024, "grad_norm": 6.353113174438477, "learning_rate": 1.4810679611650485e-05, "loss": 0.1781, "step": 7249 }, { "epoch": 70.3012048192771, "grad_norm": 4.8126630783081055, "learning_rate": 1.4805825242718446e-05, "loss": 0.1575, "step": 7250 }, { "epoch": 70.31084337349398, "grad_norm": 2.916579008102417, "learning_rate": 1.4800970873786407e-05, "loss": 0.1831, "step": 7251 }, { "epoch": 70.32048192771084, "grad_norm": 6.666656017303467, "learning_rate": 1.479611650485437e-05, "loss": 0.4618, "step": 7252 }, { "epoch": 70.33012048192771, "grad_norm": 5.949609279632568, "learning_rate": 1.4791262135922331e-05, "loss": 0.2914, "step": 7253 }, { "epoch": 70.33975903614459, "grad_norm": 4.699831008911133, "learning_rate": 1.4786407766990292e-05, "loss": 0.1657, "step": 7254 }, { "epoch": 70.34939759036145, "grad_norm": 4.869801998138428, "learning_rate": 1.4781553398058254e-05, "loss": 0.2538, "step": 7255 }, { "epoch": 70.35903614457831, "grad_norm": 3.659343957901001, "learning_rate": 1.4776699029126215e-05, "loss": 0.2004, "step": 7256 }, { "epoch": 70.36867469879518, "grad_norm": 5.5852742195129395, "learning_rate": 1.4771844660194176e-05, "loss": 0.2436, "step": 7257 }, { "epoch": 70.37831325301205, "grad_norm": 7.381258010864258, "learning_rate": 1.4766990291262137e-05, "loss": 0.2051, "step": 7258 }, { "epoch": 70.38795180722892, "grad_norm": 17.202167510986328, "learning_rate": 1.4762135922330098e-05, "loss": 0.3299, "step": 7259 }, { "epoch": 70.39759036144578, "grad_norm": 2.8283982276916504, "learning_rate": 1.4757281553398058e-05, "loss": 0.1751, "step": 7260 }, { "epoch": 70.40722891566266, "grad_norm": 2.8338308334350586, "learning_rate": 1.4752427184466019e-05, "loss": 0.1522, "step": 7261 }, { "epoch": 70.41686746987952, "grad_norm": 5.240034580230713, "learning_rate": 1.474757281553398e-05, "loss": 0.2338, "step": 7262 }, { "epoch": 70.42650602409638, "grad_norm": 9.697994232177734, "learning_rate": 1.4742718446601941e-05, "loss": 0.2962, "step": 7263 }, { "epoch": 70.43614457831325, "grad_norm": 26.054079055786133, "learning_rate": 1.4737864077669902e-05, "loss": 0.1596, "step": 7264 }, { "epoch": 70.44578313253012, "grad_norm": 3.7879440784454346, "learning_rate": 1.4733009708737867e-05, "loss": 0.2126, "step": 7265 }, { "epoch": 70.45542168674699, "grad_norm": 2.9518096446990967, "learning_rate": 1.4728155339805826e-05, "loss": 0.1966, "step": 7266 }, { "epoch": 70.46506024096385, "grad_norm": 5.19529914855957, "learning_rate": 1.4723300970873787e-05, "loss": 0.1108, "step": 7267 }, { "epoch": 70.47469879518073, "grad_norm": 6.151778221130371, "learning_rate": 1.4718446601941749e-05, "loss": 0.2681, "step": 7268 }, { "epoch": 70.48433734939759, "grad_norm": 6.976593494415283, "learning_rate": 1.471359223300971e-05, "loss": 0.2257, "step": 7269 }, { "epoch": 70.49397590361446, "grad_norm": 3.9965903759002686, "learning_rate": 1.470873786407767e-05, "loss": 0.2178, "step": 7270 }, { "epoch": 70.50361445783132, "grad_norm": 40.92190170288086, "learning_rate": 1.4703883495145632e-05, "loss": 0.3721, "step": 7271 }, { "epoch": 70.5132530120482, "grad_norm": 5.693111896514893, "learning_rate": 1.4699029126213593e-05, "loss": 0.2157, "step": 7272 }, { "epoch": 70.52289156626506, "grad_norm": 9.85759162902832, "learning_rate": 1.4694174757281554e-05, "loss": 0.2284, "step": 7273 }, { "epoch": 70.53253012048192, "grad_norm": 1.4198194742202759, "learning_rate": 1.4689320388349514e-05, "loss": 0.1325, "step": 7274 }, { "epoch": 70.5421686746988, "grad_norm": 10.322670936584473, "learning_rate": 1.4684466019417475e-05, "loss": 0.3994, "step": 7275 }, { "epoch": 70.55180722891566, "grad_norm": 8.039132118225098, "learning_rate": 1.4679611650485436e-05, "loss": 0.2086, "step": 7276 }, { "epoch": 70.56144578313253, "grad_norm": 3.08992075920105, "learning_rate": 1.46747572815534e-05, "loss": 0.2013, "step": 7277 }, { "epoch": 70.5710843373494, "grad_norm": 10.440771102905273, "learning_rate": 1.4669902912621362e-05, "loss": 0.2848, "step": 7278 }, { "epoch": 70.58072289156627, "grad_norm": 26.54157066345215, "learning_rate": 1.4665048543689323e-05, "loss": 0.2869, "step": 7279 }, { "epoch": 70.59036144578313, "grad_norm": 20.043777465820312, "learning_rate": 1.4660194174757282e-05, "loss": 0.2401, "step": 7280 }, { "epoch": 70.6, "grad_norm": 3.2317750453948975, "learning_rate": 1.4655339805825243e-05, "loss": 0.1809, "step": 7281 }, { "epoch": 70.60963855421687, "grad_norm": 2.257962465286255, "learning_rate": 1.4650485436893205e-05, "loss": 0.0985, "step": 7282 }, { "epoch": 70.61927710843374, "grad_norm": 4.30560827255249, "learning_rate": 1.4645631067961166e-05, "loss": 0.2671, "step": 7283 }, { "epoch": 70.6289156626506, "grad_norm": 7.36243200302124, "learning_rate": 1.4640776699029127e-05, "loss": 0.2022, "step": 7284 }, { "epoch": 70.63855421686748, "grad_norm": 5.711222171783447, "learning_rate": 1.4635922330097088e-05, "loss": 0.219, "step": 7285 }, { "epoch": 70.64819277108434, "grad_norm": 7.679614543914795, "learning_rate": 1.4631067961165049e-05, "loss": 0.1385, "step": 7286 }, { "epoch": 70.6578313253012, "grad_norm": 5.192963600158691, "learning_rate": 1.462621359223301e-05, "loss": 0.1028, "step": 7287 }, { "epoch": 70.66746987951807, "grad_norm": 5.354909896850586, "learning_rate": 1.462135922330097e-05, "loss": 0.2113, "step": 7288 }, { "epoch": 70.67710843373494, "grad_norm": 2.907486915588379, "learning_rate": 1.4616504854368931e-05, "loss": 0.1541, "step": 7289 }, { "epoch": 70.6867469879518, "grad_norm": 5.3598856925964355, "learning_rate": 1.4611650485436895e-05, "loss": 0.3089, "step": 7290 }, { "epoch": 70.69638554216867, "grad_norm": 6.517385482788086, "learning_rate": 1.4606796116504857e-05, "loss": 0.2435, "step": 7291 }, { "epoch": 70.70602409638555, "grad_norm": 7.103650093078613, "learning_rate": 1.4601941747572818e-05, "loss": 0.3753, "step": 7292 }, { "epoch": 70.71566265060241, "grad_norm": 3.518958806991577, "learning_rate": 1.4597087378640779e-05, "loss": 0.1502, "step": 7293 }, { "epoch": 70.72530120481927, "grad_norm": 5.044373035430908, "learning_rate": 1.4592233009708738e-05, "loss": 0.0984, "step": 7294 }, { "epoch": 70.73493975903614, "grad_norm": 6.182910919189453, "learning_rate": 1.45873786407767e-05, "loss": 0.2199, "step": 7295 }, { "epoch": 70.74457831325302, "grad_norm": 2.195108413696289, "learning_rate": 1.458252427184466e-05, "loss": 0.1358, "step": 7296 }, { "epoch": 70.75421686746988, "grad_norm": 5.144702911376953, "learning_rate": 1.4577669902912622e-05, "loss": 0.2465, "step": 7297 }, { "epoch": 70.76385542168674, "grad_norm": 13.805564880371094, "learning_rate": 1.4572815533980583e-05, "loss": 0.278, "step": 7298 }, { "epoch": 70.77349397590362, "grad_norm": 7.015249729156494, "learning_rate": 1.4567961165048544e-05, "loss": 0.1653, "step": 7299 }, { "epoch": 70.78313253012048, "grad_norm": 2.5156805515289307, "learning_rate": 1.4563106796116505e-05, "loss": 0.1492, "step": 7300 }, { "epoch": 70.79277108433735, "grad_norm": 4.233535289764404, "learning_rate": 1.4558252427184466e-05, "loss": 0.1644, "step": 7301 }, { "epoch": 70.80240963855422, "grad_norm": 3.5171525478363037, "learning_rate": 1.4553398058252426e-05, "loss": 0.2214, "step": 7302 }, { "epoch": 70.81204819277109, "grad_norm": 3.4701530933380127, "learning_rate": 1.454854368932039e-05, "loss": 0.2288, "step": 7303 }, { "epoch": 70.82168674698795, "grad_norm": 11.408559799194336, "learning_rate": 1.4543689320388351e-05, "loss": 0.3872, "step": 7304 }, { "epoch": 70.83132530120481, "grad_norm": 13.320489883422852, "learning_rate": 1.4538834951456313e-05, "loss": 0.1526, "step": 7305 }, { "epoch": 70.84096385542169, "grad_norm": 2.749523639678955, "learning_rate": 1.4533980582524274e-05, "loss": 0.2232, "step": 7306 }, { "epoch": 70.85060240963855, "grad_norm": 5.2997846603393555, "learning_rate": 1.4529126213592233e-05, "loss": 0.2691, "step": 7307 }, { "epoch": 70.86024096385542, "grad_norm": 1.8377981185913086, "learning_rate": 1.4524271844660194e-05, "loss": 0.1022, "step": 7308 }, { "epoch": 70.8698795180723, "grad_norm": 6.676365375518799, "learning_rate": 1.4519417475728156e-05, "loss": 0.362, "step": 7309 }, { "epoch": 70.87951807228916, "grad_norm": 2.5965826511383057, "learning_rate": 1.4514563106796117e-05, "loss": 0.1556, "step": 7310 }, { "epoch": 70.88915662650602, "grad_norm": 12.04053783416748, "learning_rate": 1.4509708737864078e-05, "loss": 0.3286, "step": 7311 }, { "epoch": 70.89879518072289, "grad_norm": 4.502112865447998, "learning_rate": 1.4504854368932039e-05, "loss": 0.1634, "step": 7312 }, { "epoch": 70.90843373493976, "grad_norm": 5.839219093322754, "learning_rate": 1.45e-05, "loss": 0.293, "step": 7313 }, { "epoch": 70.91807228915663, "grad_norm": 3.1204233169555664, "learning_rate": 1.4495145631067961e-05, "loss": 0.2694, "step": 7314 }, { "epoch": 70.92771084337349, "grad_norm": 4.912414073944092, "learning_rate": 1.4490291262135924e-05, "loss": 0.2524, "step": 7315 }, { "epoch": 70.93734939759037, "grad_norm": 84.34854125976562, "learning_rate": 1.4485436893203885e-05, "loss": 0.3083, "step": 7316 }, { "epoch": 70.94698795180723, "grad_norm": 2.0388450622558594, "learning_rate": 1.4480582524271846e-05, "loss": 0.1729, "step": 7317 }, { "epoch": 70.9566265060241, "grad_norm": 5.338753700256348, "learning_rate": 1.4475728155339808e-05, "loss": 0.3796, "step": 7318 }, { "epoch": 70.96626506024096, "grad_norm": 4.748612880706787, "learning_rate": 1.4470873786407769e-05, "loss": 0.3404, "step": 7319 }, { "epoch": 70.97590361445783, "grad_norm": 5.368844985961914, "learning_rate": 1.446601941747573e-05, "loss": 0.1966, "step": 7320 }, { "epoch": 70.9855421686747, "grad_norm": 5.111117839813232, "learning_rate": 1.446116504854369e-05, "loss": 0.2183, "step": 7321 }, { "epoch": 71.0012048192771, "grad_norm": 2.4942703247070312, "learning_rate": 1.445631067961165e-05, "loss": 0.3094, "step": 7322 }, { "epoch": 71.01084337349397, "grad_norm": 2.7781190872192383, "learning_rate": 1.4451456310679612e-05, "loss": 0.1634, "step": 7323 }, { "epoch": 71.02048192771085, "grad_norm": 16.541292190551758, "learning_rate": 1.4446601941747573e-05, "loss": 0.2138, "step": 7324 }, { "epoch": 71.03012048192771, "grad_norm": 9.845409393310547, "learning_rate": 1.4441747572815534e-05, "loss": 0.2117, "step": 7325 }, { "epoch": 71.03975903614457, "grad_norm": 6.019775867462158, "learning_rate": 1.4436893203883495e-05, "loss": 0.2336, "step": 7326 }, { "epoch": 71.04939759036145, "grad_norm": 4.735624313354492, "learning_rate": 1.4432038834951456e-05, "loss": 0.192, "step": 7327 }, { "epoch": 71.05903614457831, "grad_norm": 6.984135150909424, "learning_rate": 1.4427184466019419e-05, "loss": 0.155, "step": 7328 }, { "epoch": 71.06867469879518, "grad_norm": 4.1220526695251465, "learning_rate": 1.442233009708738e-05, "loss": 0.3044, "step": 7329 }, { "epoch": 71.07831325301204, "grad_norm": 4.532059192657471, "learning_rate": 1.4417475728155341e-05, "loss": 0.1772, "step": 7330 }, { "epoch": 71.08795180722892, "grad_norm": 4.934746742248535, "learning_rate": 1.4412621359223302e-05, "loss": 0.2319, "step": 7331 }, { "epoch": 71.09759036144578, "grad_norm": 5.637747287750244, "learning_rate": 1.4407766990291264e-05, "loss": 0.3488, "step": 7332 }, { "epoch": 71.10722891566265, "grad_norm": 10.819060325622559, "learning_rate": 1.4402912621359225e-05, "loss": 0.3666, "step": 7333 }, { "epoch": 71.11686746987952, "grad_norm": 6.1219258308410645, "learning_rate": 1.4398058252427186e-05, "loss": 0.296, "step": 7334 }, { "epoch": 71.12650602409639, "grad_norm": 9.166553497314453, "learning_rate": 1.4393203883495145e-05, "loss": 0.3145, "step": 7335 }, { "epoch": 71.13614457831325, "grad_norm": 4.27263069152832, "learning_rate": 1.4388349514563106e-05, "loss": 0.2118, "step": 7336 }, { "epoch": 71.14578313253013, "grad_norm": 6.075311183929443, "learning_rate": 1.4383495145631068e-05, "loss": 0.2747, "step": 7337 }, { "epoch": 71.15542168674699, "grad_norm": 5.719926357269287, "learning_rate": 1.4378640776699029e-05, "loss": 0.3434, "step": 7338 }, { "epoch": 71.16506024096385, "grad_norm": 8.84835433959961, "learning_rate": 1.437378640776699e-05, "loss": 0.2292, "step": 7339 }, { "epoch": 71.17469879518072, "grad_norm": 14.261758804321289, "learning_rate": 1.4368932038834951e-05, "loss": 0.3268, "step": 7340 }, { "epoch": 71.1843373493976, "grad_norm": 5.357775688171387, "learning_rate": 1.4364077669902914e-05, "loss": 0.3113, "step": 7341 }, { "epoch": 71.19397590361446, "grad_norm": 5.3052144050598145, "learning_rate": 1.4359223300970875e-05, "loss": 0.2614, "step": 7342 }, { "epoch": 71.20361445783132, "grad_norm": 4.111733913421631, "learning_rate": 1.4354368932038836e-05, "loss": 0.2182, "step": 7343 }, { "epoch": 71.2132530120482, "grad_norm": 7.07475471496582, "learning_rate": 1.4349514563106797e-05, "loss": 0.1983, "step": 7344 }, { "epoch": 71.22289156626506, "grad_norm": 3.0635557174682617, "learning_rate": 1.4344660194174758e-05, "loss": 0.1938, "step": 7345 }, { "epoch": 71.23253012048193, "grad_norm": 3.8389153480529785, "learning_rate": 1.433980582524272e-05, "loss": 0.147, "step": 7346 }, { "epoch": 71.24216867469879, "grad_norm": 5.2888503074646, "learning_rate": 1.433495145631068e-05, "loss": 0.2877, "step": 7347 }, { "epoch": 71.25180722891567, "grad_norm": 4.470031261444092, "learning_rate": 1.4330097087378642e-05, "loss": 0.3819, "step": 7348 }, { "epoch": 71.26144578313253, "grad_norm": 4.502262592315674, "learning_rate": 1.4325242718446601e-05, "loss": 0.2192, "step": 7349 }, { "epoch": 71.2710843373494, "grad_norm": 3.6534903049468994, "learning_rate": 1.4320388349514562e-05, "loss": 0.1466, "step": 7350 }, { "epoch": 71.28072289156627, "grad_norm": 12.112414360046387, "learning_rate": 1.4315533980582524e-05, "loss": 0.3591, "step": 7351 }, { "epoch": 71.29036144578313, "grad_norm": 4.958770275115967, "learning_rate": 1.4310679611650485e-05, "loss": 0.2115, "step": 7352 }, { "epoch": 71.3, "grad_norm": 22.643857955932617, "learning_rate": 1.430582524271845e-05, "loss": 0.3731, "step": 7353 }, { "epoch": 71.30963855421686, "grad_norm": 7.820619583129883, "learning_rate": 1.4300970873786409e-05, "loss": 0.1266, "step": 7354 }, { "epoch": 71.31927710843374, "grad_norm": 3.901688814163208, "learning_rate": 1.429611650485437e-05, "loss": 0.2454, "step": 7355 }, { "epoch": 71.3289156626506, "grad_norm": 10.359216690063477, "learning_rate": 1.4291262135922331e-05, "loss": 0.2756, "step": 7356 }, { "epoch": 71.33855421686746, "grad_norm": 5.10329008102417, "learning_rate": 1.4286407766990292e-05, "loss": 0.2409, "step": 7357 }, { "epoch": 71.34819277108434, "grad_norm": 5.545781135559082, "learning_rate": 1.4281553398058253e-05, "loss": 0.2735, "step": 7358 }, { "epoch": 71.3578313253012, "grad_norm": 7.765834331512451, "learning_rate": 1.4276699029126214e-05, "loss": 0.3961, "step": 7359 }, { "epoch": 71.36746987951807, "grad_norm": 5.0416388511657715, "learning_rate": 1.4271844660194176e-05, "loss": 0.2986, "step": 7360 }, { "epoch": 71.37710843373495, "grad_norm": 4.935950756072998, "learning_rate": 1.4266990291262137e-05, "loss": 0.2542, "step": 7361 }, { "epoch": 71.38674698795181, "grad_norm": 23.9366397857666, "learning_rate": 1.4262135922330098e-05, "loss": 0.2967, "step": 7362 }, { "epoch": 71.39638554216867, "grad_norm": 4.746790885925293, "learning_rate": 1.4257281553398057e-05, "loss": 0.2569, "step": 7363 }, { "epoch": 71.40602409638554, "grad_norm": 2.5142576694488525, "learning_rate": 1.4252427184466019e-05, "loss": 0.077, "step": 7364 }, { "epoch": 71.41566265060241, "grad_norm": 3.427422285079956, "learning_rate": 1.424757281553398e-05, "loss": 0.1731, "step": 7365 }, { "epoch": 71.42530120481928, "grad_norm": 7.9286017417907715, "learning_rate": 1.4242718446601944e-05, "loss": 0.3338, "step": 7366 }, { "epoch": 71.43493975903614, "grad_norm": 3.8763129711151123, "learning_rate": 1.4237864077669905e-05, "loss": 0.3306, "step": 7367 }, { "epoch": 71.44457831325302, "grad_norm": 7.378862380981445, "learning_rate": 1.4233009708737865e-05, "loss": 0.2295, "step": 7368 }, { "epoch": 71.45421686746988, "grad_norm": 5.067225456237793, "learning_rate": 1.4228155339805826e-05, "loss": 0.3779, "step": 7369 }, { "epoch": 71.46385542168674, "grad_norm": 6.044809341430664, "learning_rate": 1.4223300970873787e-05, "loss": 0.2426, "step": 7370 }, { "epoch": 71.47349397590361, "grad_norm": 3.9023966789245605, "learning_rate": 1.4218446601941748e-05, "loss": 0.2071, "step": 7371 }, { "epoch": 71.48313253012049, "grad_norm": 4.155221462249756, "learning_rate": 1.421359223300971e-05, "loss": 0.2417, "step": 7372 }, { "epoch": 71.49277108433735, "grad_norm": 3.614845037460327, "learning_rate": 1.420873786407767e-05, "loss": 0.1066, "step": 7373 }, { "epoch": 71.50240963855421, "grad_norm": 13.98613452911377, "learning_rate": 1.4203883495145632e-05, "loss": 0.3206, "step": 7374 }, { "epoch": 71.51204819277109, "grad_norm": 2.7996582984924316, "learning_rate": 1.4199029126213593e-05, "loss": 0.0979, "step": 7375 }, { "epoch": 71.52168674698795, "grad_norm": 8.352932929992676, "learning_rate": 1.4194174757281552e-05, "loss": 0.2604, "step": 7376 }, { "epoch": 71.53132530120482, "grad_norm": 4.674605846405029, "learning_rate": 1.4189320388349513e-05, "loss": 0.2727, "step": 7377 }, { "epoch": 71.54096385542168, "grad_norm": 5.898141384124756, "learning_rate": 1.4184466019417478e-05, "loss": 0.2234, "step": 7378 }, { "epoch": 71.55060240963856, "grad_norm": 11.221837997436523, "learning_rate": 1.4179611650485439e-05, "loss": 0.3016, "step": 7379 }, { "epoch": 71.56024096385542, "grad_norm": 3.742391586303711, "learning_rate": 1.41747572815534e-05, "loss": 0.1742, "step": 7380 }, { "epoch": 71.56987951807228, "grad_norm": 7.689365386962891, "learning_rate": 1.4169902912621361e-05, "loss": 0.3928, "step": 7381 }, { "epoch": 71.57951807228916, "grad_norm": 4.468850612640381, "learning_rate": 1.416504854368932e-05, "loss": 0.2612, "step": 7382 }, { "epoch": 71.58915662650602, "grad_norm": 6.423106670379639, "learning_rate": 1.4160194174757282e-05, "loss": 0.3307, "step": 7383 }, { "epoch": 71.59879518072289, "grad_norm": 4.336431503295898, "learning_rate": 1.4155339805825243e-05, "loss": 0.1318, "step": 7384 }, { "epoch": 71.60843373493977, "grad_norm": 3.759997844696045, "learning_rate": 1.4150485436893204e-05, "loss": 0.1583, "step": 7385 }, { "epoch": 71.61807228915663, "grad_norm": 8.25376033782959, "learning_rate": 1.4145631067961165e-05, "loss": 0.3582, "step": 7386 }, { "epoch": 71.62771084337349, "grad_norm": 12.173544883728027, "learning_rate": 1.4140776699029127e-05, "loss": 0.5577, "step": 7387 }, { "epoch": 71.63734939759036, "grad_norm": 16.91606903076172, "learning_rate": 1.4135922330097088e-05, "loss": 0.1575, "step": 7388 }, { "epoch": 71.64698795180723, "grad_norm": 5.104602336883545, "learning_rate": 1.4131067961165049e-05, "loss": 0.2874, "step": 7389 }, { "epoch": 71.6566265060241, "grad_norm": 4.065889358520508, "learning_rate": 1.4126213592233008e-05, "loss": 0.1878, "step": 7390 }, { "epoch": 71.66626506024096, "grad_norm": 6.978777885437012, "learning_rate": 1.4121359223300973e-05, "loss": 0.1708, "step": 7391 }, { "epoch": 71.67590361445784, "grad_norm": 10.111701965332031, "learning_rate": 1.4116504854368934e-05, "loss": 0.2438, "step": 7392 }, { "epoch": 71.6855421686747, "grad_norm": 6.8700337409973145, "learning_rate": 1.4111650485436895e-05, "loss": 0.4167, "step": 7393 }, { "epoch": 71.69518072289156, "grad_norm": 8.169598579406738, "learning_rate": 1.4106796116504856e-05, "loss": 0.2063, "step": 7394 }, { "epoch": 71.70481927710843, "grad_norm": 2.5868418216705322, "learning_rate": 1.4101941747572817e-05, "loss": 0.1684, "step": 7395 }, { "epoch": 71.7144578313253, "grad_norm": 3.413095235824585, "learning_rate": 1.4097087378640777e-05, "loss": 0.2266, "step": 7396 }, { "epoch": 71.72409638554217, "grad_norm": 3.561403512954712, "learning_rate": 1.4092233009708738e-05, "loss": 0.2548, "step": 7397 }, { "epoch": 71.73373493975903, "grad_norm": 3.173137903213501, "learning_rate": 1.40873786407767e-05, "loss": 0.1271, "step": 7398 }, { "epoch": 71.74337349397591, "grad_norm": 5.031655788421631, "learning_rate": 1.408252427184466e-05, "loss": 0.2756, "step": 7399 }, { "epoch": 71.75301204819277, "grad_norm": 8.433070182800293, "learning_rate": 1.4077669902912621e-05, "loss": 0.511, "step": 7400 }, { "epoch": 71.76265060240964, "grad_norm": 3.168119192123413, "learning_rate": 1.4072815533980583e-05, "loss": 0.2157, "step": 7401 }, { "epoch": 71.7722891566265, "grad_norm": 10.277710914611816, "learning_rate": 1.4067961165048544e-05, "loss": 0.3181, "step": 7402 }, { "epoch": 71.78192771084338, "grad_norm": 4.900312900543213, "learning_rate": 1.4063106796116505e-05, "loss": 0.2959, "step": 7403 }, { "epoch": 71.79156626506024, "grad_norm": 2.8223912715911865, "learning_rate": 1.4058252427184468e-05, "loss": 0.1161, "step": 7404 }, { "epoch": 71.8012048192771, "grad_norm": 7.567714214324951, "learning_rate": 1.4053398058252429e-05, "loss": 0.1867, "step": 7405 }, { "epoch": 71.81084337349398, "grad_norm": 4.64154577255249, "learning_rate": 1.404854368932039e-05, "loss": 0.1745, "step": 7406 }, { "epoch": 71.82048192771084, "grad_norm": 4.349926948547363, "learning_rate": 1.4043689320388351e-05, "loss": 0.1621, "step": 7407 }, { "epoch": 71.83012048192771, "grad_norm": 4.861605167388916, "learning_rate": 1.4038834951456312e-05, "loss": 0.3454, "step": 7408 }, { "epoch": 71.83975903614459, "grad_norm": 3.111940860748291, "learning_rate": 1.4033980582524272e-05, "loss": 0.1243, "step": 7409 }, { "epoch": 71.84939759036145, "grad_norm": 5.249116897583008, "learning_rate": 1.4029126213592233e-05, "loss": 0.1598, "step": 7410 }, { "epoch": 71.85903614457831, "grad_norm": 5.313566207885742, "learning_rate": 1.4024271844660194e-05, "loss": 0.1787, "step": 7411 }, { "epoch": 71.86867469879518, "grad_norm": 5.094844818115234, "learning_rate": 1.4019417475728155e-05, "loss": 0.2518, "step": 7412 }, { "epoch": 71.87831325301205, "grad_norm": 4.335862159729004, "learning_rate": 1.4014563106796116e-05, "loss": 0.1879, "step": 7413 }, { "epoch": 71.88795180722892, "grad_norm": 3.823552370071411, "learning_rate": 1.4009708737864077e-05, "loss": 0.248, "step": 7414 }, { "epoch": 71.89759036144578, "grad_norm": 7.747868061065674, "learning_rate": 1.4004854368932039e-05, "loss": 0.2995, "step": 7415 }, { "epoch": 71.90722891566266, "grad_norm": 2.8748607635498047, "learning_rate": 1.4000000000000001e-05, "loss": 0.1329, "step": 7416 }, { "epoch": 71.91686746987952, "grad_norm": 3.5324044227600098, "learning_rate": 1.3995145631067963e-05, "loss": 0.1237, "step": 7417 }, { "epoch": 71.92650602409638, "grad_norm": 6.249337673187256, "learning_rate": 1.3990291262135924e-05, "loss": 0.2579, "step": 7418 }, { "epoch": 71.93614457831325, "grad_norm": 2.9217355251312256, "learning_rate": 1.3985436893203885e-05, "loss": 0.2634, "step": 7419 }, { "epoch": 71.94578313253012, "grad_norm": 3.298149824142456, "learning_rate": 1.3980582524271846e-05, "loss": 0.2038, "step": 7420 }, { "epoch": 71.95542168674699, "grad_norm": 2.688784599304199, "learning_rate": 1.3975728155339807e-05, "loss": 0.1943, "step": 7421 }, { "epoch": 71.96506024096385, "grad_norm": 12.409194946289062, "learning_rate": 1.3970873786407768e-05, "loss": 0.4543, "step": 7422 }, { "epoch": 71.97469879518073, "grad_norm": 5.733576774597168, "learning_rate": 1.3966019417475728e-05, "loss": 0.1323, "step": 7423 }, { "epoch": 71.98433734939759, "grad_norm": 4.9406256675720215, "learning_rate": 1.3961165048543689e-05, "loss": 0.2215, "step": 7424 }, { "epoch": 71.99397590361446, "grad_norm": 7.615166187286377, "learning_rate": 1.395631067961165e-05, "loss": 0.2551, "step": 7425 }, { "epoch": 72.00963855421686, "grad_norm": 4.257242202758789, "learning_rate": 1.3951456310679611e-05, "loss": 0.1658, "step": 7426 }, { "epoch": 72.01927710843374, "grad_norm": 4.4579949378967285, "learning_rate": 1.3946601941747572e-05, "loss": 0.3098, "step": 7427 }, { "epoch": 72.0289156626506, "grad_norm": 4.369452953338623, "learning_rate": 1.3941747572815534e-05, "loss": 0.2999, "step": 7428 }, { "epoch": 72.03855421686747, "grad_norm": 2.908606767654419, "learning_rate": 1.3936893203883496e-05, "loss": 0.2483, "step": 7429 }, { "epoch": 72.04819277108433, "grad_norm": 6.627464771270752, "learning_rate": 1.3932038834951458e-05, "loss": 0.325, "step": 7430 }, { "epoch": 72.05783132530121, "grad_norm": 3.2633285522460938, "learning_rate": 1.3927184466019419e-05, "loss": 0.135, "step": 7431 }, { "epoch": 72.06746987951807, "grad_norm": 4.628861904144287, "learning_rate": 1.392233009708738e-05, "loss": 0.308, "step": 7432 }, { "epoch": 72.07710843373494, "grad_norm": 5.661384105682373, "learning_rate": 1.3917475728155341e-05, "loss": 0.2268, "step": 7433 }, { "epoch": 72.08674698795181, "grad_norm": 5.605383396148682, "learning_rate": 1.3912621359223302e-05, "loss": 0.1808, "step": 7434 }, { "epoch": 72.09638554216868, "grad_norm": 8.015965461730957, "learning_rate": 1.3907766990291263e-05, "loss": 0.3925, "step": 7435 }, { "epoch": 72.10602409638554, "grad_norm": 6.2174906730651855, "learning_rate": 1.3902912621359224e-05, "loss": 0.1998, "step": 7436 }, { "epoch": 72.1156626506024, "grad_norm": 4.378666877746582, "learning_rate": 1.3898058252427184e-05, "loss": 0.2429, "step": 7437 }, { "epoch": 72.12530120481928, "grad_norm": 7.772856712341309, "learning_rate": 1.3893203883495145e-05, "loss": 0.4779, "step": 7438 }, { "epoch": 72.13493975903614, "grad_norm": 4.977338790893555, "learning_rate": 1.3888349514563106e-05, "loss": 0.1505, "step": 7439 }, { "epoch": 72.144578313253, "grad_norm": 6.693029880523682, "learning_rate": 1.3883495145631067e-05, "loss": 0.273, "step": 7440 }, { "epoch": 72.15421686746988, "grad_norm": 7.192094802856445, "learning_rate": 1.3878640776699032e-05, "loss": 0.2182, "step": 7441 }, { "epoch": 72.16385542168675, "grad_norm": 4.707312107086182, "learning_rate": 1.3873786407766993e-05, "loss": 0.3874, "step": 7442 }, { "epoch": 72.17349397590361, "grad_norm": 4.84478235244751, "learning_rate": 1.3868932038834952e-05, "loss": 0.239, "step": 7443 }, { "epoch": 72.18313253012049, "grad_norm": 6.76341438293457, "learning_rate": 1.3864077669902914e-05, "loss": 0.3331, "step": 7444 }, { "epoch": 72.19277108433735, "grad_norm": 3.7235352993011475, "learning_rate": 1.3859223300970875e-05, "loss": 0.252, "step": 7445 }, { "epoch": 72.20240963855422, "grad_norm": 5.373242378234863, "learning_rate": 1.3854368932038836e-05, "loss": 0.3079, "step": 7446 }, { "epoch": 72.21204819277108, "grad_norm": 5.506955146789551, "learning_rate": 1.3849514563106797e-05, "loss": 0.3089, "step": 7447 }, { "epoch": 72.22168674698796, "grad_norm": 5.4674787521362305, "learning_rate": 1.3844660194174758e-05, "loss": 0.2035, "step": 7448 }, { "epoch": 72.23132530120482, "grad_norm": 9.062259674072266, "learning_rate": 1.383980582524272e-05, "loss": 0.3457, "step": 7449 }, { "epoch": 72.24096385542168, "grad_norm": 5.11400032043457, "learning_rate": 1.383495145631068e-05, "loss": 0.1566, "step": 7450 }, { "epoch": 72.25060240963856, "grad_norm": 4.180708408355713, "learning_rate": 1.383009708737864e-05, "loss": 0.197, "step": 7451 }, { "epoch": 72.26024096385542, "grad_norm": 5.131812572479248, "learning_rate": 1.3825242718446601e-05, "loss": 0.3299, "step": 7452 }, { "epoch": 72.26987951807229, "grad_norm": 18.849853515625, "learning_rate": 1.3820388349514562e-05, "loss": 0.2474, "step": 7453 }, { "epoch": 72.27951807228915, "grad_norm": 5.042159080505371, "learning_rate": 1.3815533980582527e-05, "loss": 0.3304, "step": 7454 }, { "epoch": 72.28915662650603, "grad_norm": 5.8108720779418945, "learning_rate": 1.3810679611650488e-05, "loss": 0.328, "step": 7455 }, { "epoch": 72.29879518072289, "grad_norm": 3.063063383102417, "learning_rate": 1.3805825242718447e-05, "loss": 0.2315, "step": 7456 }, { "epoch": 72.30843373493975, "grad_norm": 7.643234729766846, "learning_rate": 1.3800970873786408e-05, "loss": 0.2317, "step": 7457 }, { "epoch": 72.31807228915663, "grad_norm": 6.3178863525390625, "learning_rate": 1.379611650485437e-05, "loss": 0.5254, "step": 7458 }, { "epoch": 72.3277108433735, "grad_norm": 3.7402822971343994, "learning_rate": 1.379126213592233e-05, "loss": 0.2759, "step": 7459 }, { "epoch": 72.33734939759036, "grad_norm": 1.9196549654006958, "learning_rate": 1.3786407766990292e-05, "loss": 0.0981, "step": 7460 }, { "epoch": 72.34698795180722, "grad_norm": 5.647222995758057, "learning_rate": 1.3781553398058253e-05, "loss": 0.3133, "step": 7461 }, { "epoch": 72.3566265060241, "grad_norm": 4.859006881713867, "learning_rate": 1.3776699029126214e-05, "loss": 0.3742, "step": 7462 }, { "epoch": 72.36626506024096, "grad_norm": 3.7933831214904785, "learning_rate": 1.3771844660194175e-05, "loss": 0.1138, "step": 7463 }, { "epoch": 72.37590361445783, "grad_norm": 4.1984171867370605, "learning_rate": 1.3766990291262136e-05, "loss": 0.1051, "step": 7464 }, { "epoch": 72.3855421686747, "grad_norm": 7.219234943389893, "learning_rate": 1.3762135922330096e-05, "loss": 0.3205, "step": 7465 }, { "epoch": 72.39518072289157, "grad_norm": 3.6462574005126953, "learning_rate": 1.3757281553398057e-05, "loss": 0.1637, "step": 7466 }, { "epoch": 72.40481927710843, "grad_norm": 5.465885162353516, "learning_rate": 1.3752427184466022e-05, "loss": 0.2793, "step": 7467 }, { "epoch": 72.41445783132531, "grad_norm": 9.586843490600586, "learning_rate": 1.3747572815533983e-05, "loss": 0.2889, "step": 7468 }, { "epoch": 72.42409638554217, "grad_norm": 3.913090229034424, "learning_rate": 1.3742718446601944e-05, "loss": 0.3928, "step": 7469 }, { "epoch": 72.43373493975903, "grad_norm": 2.9956729412078857, "learning_rate": 1.3737864077669903e-05, "loss": 0.1253, "step": 7470 }, { "epoch": 72.4433734939759, "grad_norm": 4.875590801239014, "learning_rate": 1.3733009708737864e-05, "loss": 0.2187, "step": 7471 }, { "epoch": 72.45301204819278, "grad_norm": 3.4632623195648193, "learning_rate": 1.3728155339805826e-05, "loss": 0.1961, "step": 7472 }, { "epoch": 72.46265060240964, "grad_norm": 9.062500953674316, "learning_rate": 1.3723300970873787e-05, "loss": 0.2811, "step": 7473 }, { "epoch": 72.4722891566265, "grad_norm": 4.0369954109191895, "learning_rate": 1.3718446601941748e-05, "loss": 0.2511, "step": 7474 }, { "epoch": 72.48192771084338, "grad_norm": 4.511277198791504, "learning_rate": 1.3713592233009709e-05, "loss": 0.1874, "step": 7475 }, { "epoch": 72.49156626506024, "grad_norm": 6.045956611633301, "learning_rate": 1.370873786407767e-05, "loss": 0.3798, "step": 7476 }, { "epoch": 72.5012048192771, "grad_norm": 6.497618675231934, "learning_rate": 1.3703883495145631e-05, "loss": 0.2598, "step": 7477 }, { "epoch": 72.51084337349397, "grad_norm": 4.279545307159424, "learning_rate": 1.3699029126213593e-05, "loss": 0.262, "step": 7478 }, { "epoch": 72.52048192771085, "grad_norm": 3.31215500831604, "learning_rate": 1.3694174757281555e-05, "loss": 0.1845, "step": 7479 }, { "epoch": 72.53012048192771, "grad_norm": 9.77278995513916, "learning_rate": 1.3689320388349517e-05, "loss": 0.1477, "step": 7480 }, { "epoch": 72.53975903614457, "grad_norm": 3.392622470855713, "learning_rate": 1.3684466019417478e-05, "loss": 0.1423, "step": 7481 }, { "epoch": 72.54939759036145, "grad_norm": 6.0442962646484375, "learning_rate": 1.3679611650485439e-05, "loss": 0.2708, "step": 7482 }, { "epoch": 72.55903614457831, "grad_norm": 4.317915916442871, "learning_rate": 1.36747572815534e-05, "loss": 0.1605, "step": 7483 }, { "epoch": 72.56867469879518, "grad_norm": 5.461977005004883, "learning_rate": 1.366990291262136e-05, "loss": 0.2668, "step": 7484 }, { "epoch": 72.57831325301204, "grad_norm": 4.268441200256348, "learning_rate": 1.366504854368932e-05, "loss": 0.3504, "step": 7485 }, { "epoch": 72.58795180722892, "grad_norm": 2.6209089756011963, "learning_rate": 1.3660194174757282e-05, "loss": 0.1112, "step": 7486 }, { "epoch": 72.59759036144578, "grad_norm": 5.377438545227051, "learning_rate": 1.3655339805825243e-05, "loss": 0.2501, "step": 7487 }, { "epoch": 72.60722891566265, "grad_norm": 7.902615547180176, "learning_rate": 1.3650485436893204e-05, "loss": 0.3213, "step": 7488 }, { "epoch": 72.61686746987952, "grad_norm": 5.862393856048584, "learning_rate": 1.3645631067961165e-05, "loss": 0.2109, "step": 7489 }, { "epoch": 72.62650602409639, "grad_norm": 6.30065393447876, "learning_rate": 1.3640776699029126e-05, "loss": 0.2015, "step": 7490 }, { "epoch": 72.63614457831325, "grad_norm": 8.757974624633789, "learning_rate": 1.3635922330097087e-05, "loss": 0.3332, "step": 7491 }, { "epoch": 72.64578313253013, "grad_norm": 8.494771957397461, "learning_rate": 1.363106796116505e-05, "loss": 0.4199, "step": 7492 }, { "epoch": 72.65542168674699, "grad_norm": 5.216258525848389, "learning_rate": 1.3626213592233011e-05, "loss": 0.2734, "step": 7493 }, { "epoch": 72.66506024096385, "grad_norm": 3.6596217155456543, "learning_rate": 1.3621359223300973e-05, "loss": 0.1835, "step": 7494 }, { "epoch": 72.67469879518072, "grad_norm": 5.3101677894592285, "learning_rate": 1.3616504854368934e-05, "loss": 0.3907, "step": 7495 }, { "epoch": 72.6843373493976, "grad_norm": 2.9942846298217773, "learning_rate": 1.3611650485436895e-05, "loss": 0.0809, "step": 7496 }, { "epoch": 72.69397590361446, "grad_norm": 8.730660438537598, "learning_rate": 1.3606796116504856e-05, "loss": 0.1905, "step": 7497 }, { "epoch": 72.70361445783132, "grad_norm": 7.648002624511719, "learning_rate": 1.3601941747572815e-05, "loss": 0.2902, "step": 7498 }, { "epoch": 72.7132530120482, "grad_norm": 2.2279317378997803, "learning_rate": 1.3597087378640777e-05, "loss": 0.1339, "step": 7499 }, { "epoch": 72.72289156626506, "grad_norm": 8.901542663574219, "learning_rate": 1.3592233009708738e-05, "loss": 0.2301, "step": 7500 }, { "epoch": 72.73253012048193, "grad_norm": 6.7071356773376465, "learning_rate": 1.3587378640776699e-05, "loss": 0.2811, "step": 7501 }, { "epoch": 72.74216867469879, "grad_norm": 8.754498481750488, "learning_rate": 1.358252427184466e-05, "loss": 0.2356, "step": 7502 }, { "epoch": 72.75180722891567, "grad_norm": 2.988360643386841, "learning_rate": 1.3577669902912621e-05, "loss": 0.1488, "step": 7503 }, { "epoch": 72.76144578313253, "grad_norm": 3.5394668579101562, "learning_rate": 1.3572815533980582e-05, "loss": 0.2664, "step": 7504 }, { "epoch": 72.7710843373494, "grad_norm": 8.633587837219238, "learning_rate": 1.3567961165048545e-05, "loss": 0.1347, "step": 7505 }, { "epoch": 72.78072289156627, "grad_norm": 3.580538511276245, "learning_rate": 1.3563106796116506e-05, "loss": 0.2183, "step": 7506 }, { "epoch": 72.79036144578313, "grad_norm": 2.5152931213378906, "learning_rate": 1.3558252427184467e-05, "loss": 0.0631, "step": 7507 }, { "epoch": 72.8, "grad_norm": 2.2305188179016113, "learning_rate": 1.3553398058252429e-05, "loss": 0.09, "step": 7508 }, { "epoch": 72.80963855421686, "grad_norm": 9.186384201049805, "learning_rate": 1.354854368932039e-05, "loss": 0.3452, "step": 7509 }, { "epoch": 72.81927710843374, "grad_norm": 7.7239203453063965, "learning_rate": 1.3543689320388351e-05, "loss": 0.301, "step": 7510 }, { "epoch": 72.8289156626506, "grad_norm": 6.093076229095459, "learning_rate": 1.3538834951456312e-05, "loss": 0.2109, "step": 7511 }, { "epoch": 72.83855421686746, "grad_norm": 3.7458674907684326, "learning_rate": 1.3533980582524271e-05, "loss": 0.1311, "step": 7512 }, { "epoch": 72.84819277108434, "grad_norm": 4.130919456481934, "learning_rate": 1.3529126213592233e-05, "loss": 0.3408, "step": 7513 }, { "epoch": 72.8578313253012, "grad_norm": 3.3200364112854004, "learning_rate": 1.3524271844660194e-05, "loss": 0.2381, "step": 7514 }, { "epoch": 72.86746987951807, "grad_norm": 2.1999294757843018, "learning_rate": 1.3519417475728155e-05, "loss": 0.101, "step": 7515 }, { "epoch": 72.87710843373495, "grad_norm": 1.8513624668121338, "learning_rate": 1.3514563106796116e-05, "loss": 0.1195, "step": 7516 }, { "epoch": 72.88674698795181, "grad_norm": 5.602113723754883, "learning_rate": 1.3509708737864079e-05, "loss": 0.2191, "step": 7517 }, { "epoch": 72.89638554216867, "grad_norm": 4.445949554443359, "learning_rate": 1.350485436893204e-05, "loss": 0.1973, "step": 7518 }, { "epoch": 72.90602409638554, "grad_norm": 5.319553852081299, "learning_rate": 1.3500000000000001e-05, "loss": 0.3745, "step": 7519 }, { "epoch": 72.91566265060241, "grad_norm": 7.414830207824707, "learning_rate": 1.3495145631067962e-05, "loss": 0.3795, "step": 7520 }, { "epoch": 72.92530120481928, "grad_norm": 3.5100269317626953, "learning_rate": 1.3490291262135923e-05, "loss": 0.1896, "step": 7521 }, { "epoch": 72.93493975903614, "grad_norm": 3.697843074798584, "learning_rate": 1.3485436893203885e-05, "loss": 0.3641, "step": 7522 }, { "epoch": 72.94457831325302, "grad_norm": 4.3679633140563965, "learning_rate": 1.3480582524271846e-05, "loss": 0.143, "step": 7523 }, { "epoch": 72.95421686746988, "grad_norm": 6.767796993255615, "learning_rate": 1.3475728155339807e-05, "loss": 0.2787, "step": 7524 }, { "epoch": 72.96385542168674, "grad_norm": 4.516509056091309, "learning_rate": 1.3470873786407766e-05, "loss": 0.1188, "step": 7525 }, { "epoch": 72.97349397590361, "grad_norm": 5.03796911239624, "learning_rate": 1.3466019417475728e-05, "loss": 0.2817, "step": 7526 }, { "epoch": 72.98313253012049, "grad_norm": 2.1255722045898438, "learning_rate": 1.3461165048543689e-05, "loss": 0.0786, "step": 7527 }, { "epoch": 72.99277108433735, "grad_norm": 11.57309341430664, "learning_rate": 1.345631067961165e-05, "loss": 0.3758, "step": 7528 }, { "epoch": 73.00843373493976, "grad_norm": 3.3636488914489746, "learning_rate": 1.3451456310679611e-05, "loss": 0.3185, "step": 7529 }, { "epoch": 73.01807228915662, "grad_norm": 3.473660707473755, "learning_rate": 1.3446601941747575e-05, "loss": 0.1861, "step": 7530 }, { "epoch": 73.0277108433735, "grad_norm": 22.792781829833984, "learning_rate": 1.3441747572815535e-05, "loss": 0.1987, "step": 7531 }, { "epoch": 73.03734939759036, "grad_norm": 12.483328819274902, "learning_rate": 1.3436893203883496e-05, "loss": 0.2512, "step": 7532 }, { "epoch": 73.04698795180722, "grad_norm": 13.357545852661133, "learning_rate": 1.3432038834951457e-05, "loss": 0.2902, "step": 7533 }, { "epoch": 73.0566265060241, "grad_norm": 10.726541519165039, "learning_rate": 1.3427184466019418e-05, "loss": 0.3046, "step": 7534 }, { "epoch": 73.06626506024097, "grad_norm": 7.250017166137695, "learning_rate": 1.342233009708738e-05, "loss": 0.4033, "step": 7535 }, { "epoch": 73.07590361445783, "grad_norm": 14.121593475341797, "learning_rate": 1.341747572815534e-05, "loss": 0.2421, "step": 7536 }, { "epoch": 73.08554216867469, "grad_norm": 1.1651151180267334, "learning_rate": 1.3412621359223302e-05, "loss": 0.3306, "step": 7537 }, { "epoch": 73.09518072289157, "grad_norm": 16.811738967895508, "learning_rate": 1.3407766990291263e-05, "loss": 0.2052, "step": 7538 }, { "epoch": 73.10481927710843, "grad_norm": 25.045955657958984, "learning_rate": 1.3402912621359222e-05, "loss": 0.3998, "step": 7539 }, { "epoch": 73.1144578313253, "grad_norm": 20.05167579650879, "learning_rate": 1.3398058252427184e-05, "loss": 0.2521, "step": 7540 }, { "epoch": 73.12409638554217, "grad_norm": 24.33039093017578, "learning_rate": 1.3393203883495145e-05, "loss": 0.1528, "step": 7541 }, { "epoch": 73.13373493975904, "grad_norm": 18.856300354003906, "learning_rate": 1.338834951456311e-05, "loss": 0.3117, "step": 7542 }, { "epoch": 73.1433734939759, "grad_norm": 4.571796894073486, "learning_rate": 1.338349514563107e-05, "loss": 0.2679, "step": 7543 }, { "epoch": 73.15301204819278, "grad_norm": 1.3625673055648804, "learning_rate": 1.3378640776699032e-05, "loss": 0.2555, "step": 7544 }, { "epoch": 73.16265060240964, "grad_norm": 17.429780960083008, "learning_rate": 1.3373786407766991e-05, "loss": 0.3652, "step": 7545 }, { "epoch": 73.1722891566265, "grad_norm": 45.836212158203125, "learning_rate": 1.3368932038834952e-05, "loss": 0.4431, "step": 7546 }, { "epoch": 73.18192771084337, "grad_norm": 7.281952857971191, "learning_rate": 1.3364077669902913e-05, "loss": 0.3358, "step": 7547 }, { "epoch": 73.19156626506025, "grad_norm": 8.374460220336914, "learning_rate": 1.3359223300970874e-05, "loss": 0.3198, "step": 7548 }, { "epoch": 73.20120481927711, "grad_norm": 7.4593892097473145, "learning_rate": 1.3354368932038836e-05, "loss": 0.1711, "step": 7549 }, { "epoch": 73.21084337349397, "grad_norm": 11.51608943939209, "learning_rate": 1.3349514563106797e-05, "loss": 0.1546, "step": 7550 }, { "epoch": 73.22048192771085, "grad_norm": 28.040321350097656, "learning_rate": 1.3344660194174758e-05, "loss": 0.2467, "step": 7551 }, { "epoch": 73.23012048192771, "grad_norm": 17.960264205932617, "learning_rate": 1.3339805825242719e-05, "loss": 0.2042, "step": 7552 }, { "epoch": 73.23975903614458, "grad_norm": 9.223358154296875, "learning_rate": 1.3334951456310678e-05, "loss": 0.0883, "step": 7553 }, { "epoch": 73.24939759036144, "grad_norm": 4.235716819763184, "learning_rate": 1.333009708737864e-05, "loss": 0.2067, "step": 7554 }, { "epoch": 73.25903614457832, "grad_norm": 35.948177337646484, "learning_rate": 1.3325242718446604e-05, "loss": 0.3727, "step": 7555 }, { "epoch": 73.26867469879518, "grad_norm": 3.3447017669677734, "learning_rate": 1.3320388349514565e-05, "loss": 0.1525, "step": 7556 }, { "epoch": 73.27831325301204, "grad_norm": 2.3007864952087402, "learning_rate": 1.3315533980582526e-05, "loss": 0.1545, "step": 7557 }, { "epoch": 73.28795180722892, "grad_norm": 17.348833084106445, "learning_rate": 1.3310679611650486e-05, "loss": 0.2992, "step": 7558 }, { "epoch": 73.29759036144578, "grad_norm": 18.940616607666016, "learning_rate": 1.3305825242718447e-05, "loss": 0.3256, "step": 7559 }, { "epoch": 73.30722891566265, "grad_norm": 16.100950241088867, "learning_rate": 1.3300970873786408e-05, "loss": 0.3012, "step": 7560 }, { "epoch": 73.31686746987951, "grad_norm": 2.3180572986602783, "learning_rate": 1.329611650485437e-05, "loss": 0.2582, "step": 7561 }, { "epoch": 73.32650602409639, "grad_norm": 3.6889045238494873, "learning_rate": 1.329126213592233e-05, "loss": 0.3331, "step": 7562 }, { "epoch": 73.33614457831325, "grad_norm": 33.85983657836914, "learning_rate": 1.3286407766990292e-05, "loss": 0.3258, "step": 7563 }, { "epoch": 73.34578313253012, "grad_norm": 8.565929412841797, "learning_rate": 1.3281553398058253e-05, "loss": 0.2192, "step": 7564 }, { "epoch": 73.355421686747, "grad_norm": 7.835470676422119, "learning_rate": 1.3276699029126214e-05, "loss": 0.2356, "step": 7565 }, { "epoch": 73.36506024096386, "grad_norm": 23.286888122558594, "learning_rate": 1.3271844660194175e-05, "loss": 0.3052, "step": 7566 }, { "epoch": 73.37469879518072, "grad_norm": 16.961017608642578, "learning_rate": 1.3266990291262134e-05, "loss": 0.1149, "step": 7567 }, { "epoch": 73.38433734939758, "grad_norm": 4.306958198547363, "learning_rate": 1.3262135922330099e-05, "loss": 0.2144, "step": 7568 }, { "epoch": 73.39397590361446, "grad_norm": 21.393550872802734, "learning_rate": 1.325728155339806e-05, "loss": 0.6334, "step": 7569 }, { "epoch": 73.40361445783132, "grad_norm": 8.550466537475586, "learning_rate": 1.3252427184466021e-05, "loss": 0.1668, "step": 7570 }, { "epoch": 73.41325301204819, "grad_norm": 5.817537784576416, "learning_rate": 1.3247572815533982e-05, "loss": 0.2338, "step": 7571 }, { "epoch": 73.42289156626506, "grad_norm": 7.827059745788574, "learning_rate": 1.3242718446601942e-05, "loss": 0.2493, "step": 7572 }, { "epoch": 73.43253012048193, "grad_norm": 16.69160270690918, "learning_rate": 1.3237864077669903e-05, "loss": 0.249, "step": 7573 }, { "epoch": 73.44216867469879, "grad_norm": 10.397263526916504, "learning_rate": 1.3233009708737864e-05, "loss": 0.2593, "step": 7574 }, { "epoch": 73.45180722891567, "grad_norm": 6.658772945404053, "learning_rate": 1.3228155339805825e-05, "loss": 0.2634, "step": 7575 }, { "epoch": 73.46144578313253, "grad_norm": 4.648955821990967, "learning_rate": 1.3223300970873786e-05, "loss": 0.1667, "step": 7576 }, { "epoch": 73.4710843373494, "grad_norm": 29.205434799194336, "learning_rate": 1.3218446601941748e-05, "loss": 0.1069, "step": 7577 }, { "epoch": 73.48072289156626, "grad_norm": 3.51043701171875, "learning_rate": 1.3213592233009709e-05, "loss": 0.1881, "step": 7578 }, { "epoch": 73.49036144578314, "grad_norm": 7.669178485870361, "learning_rate": 1.320873786407767e-05, "loss": 0.2218, "step": 7579 }, { "epoch": 73.5, "grad_norm": 16.310894012451172, "learning_rate": 1.3203883495145633e-05, "loss": 0.41, "step": 7580 }, { "epoch": 73.50963855421686, "grad_norm": 11.644830703735352, "learning_rate": 1.3199029126213594e-05, "loss": 0.3334, "step": 7581 }, { "epoch": 73.51927710843374, "grad_norm": 3.3468542098999023, "learning_rate": 1.3194174757281555e-05, "loss": 0.3293, "step": 7582 }, { "epoch": 73.5289156626506, "grad_norm": 11.771232604980469, "learning_rate": 1.3189320388349516e-05, "loss": 0.287, "step": 7583 }, { "epoch": 73.53855421686747, "grad_norm": 6.667688369750977, "learning_rate": 1.3184466019417477e-05, "loss": 0.4746, "step": 7584 }, { "epoch": 73.54819277108433, "grad_norm": 4.774615287780762, "learning_rate": 1.3179611650485439e-05, "loss": 0.1332, "step": 7585 }, { "epoch": 73.55783132530121, "grad_norm": 14.73287296295166, "learning_rate": 1.3174757281553398e-05, "loss": 0.1271, "step": 7586 }, { "epoch": 73.56746987951807, "grad_norm": 5.712788105010986, "learning_rate": 1.3169902912621359e-05, "loss": 0.2473, "step": 7587 }, { "epoch": 73.57710843373494, "grad_norm": 5.3336029052734375, "learning_rate": 1.316504854368932e-05, "loss": 0.3201, "step": 7588 }, { "epoch": 73.58674698795181, "grad_norm": 12.343122482299805, "learning_rate": 1.3160194174757281e-05, "loss": 0.1984, "step": 7589 }, { "epoch": 73.59638554216868, "grad_norm": 12.925705909729004, "learning_rate": 1.3155339805825243e-05, "loss": 0.2435, "step": 7590 }, { "epoch": 73.60602409638554, "grad_norm": 25.03518295288086, "learning_rate": 1.3150485436893204e-05, "loss": 0.1435, "step": 7591 }, { "epoch": 73.61566265060242, "grad_norm": 6.392588138580322, "learning_rate": 1.3145631067961165e-05, "loss": 0.1845, "step": 7592 }, { "epoch": 73.62530120481928, "grad_norm": 2.634286642074585, "learning_rate": 1.3140776699029128e-05, "loss": 0.2759, "step": 7593 }, { "epoch": 73.63493975903614, "grad_norm": 6.53139066696167, "learning_rate": 1.3135922330097089e-05, "loss": 0.1788, "step": 7594 }, { "epoch": 73.644578313253, "grad_norm": 11.474238395690918, "learning_rate": 1.313106796116505e-05, "loss": 0.1902, "step": 7595 }, { "epoch": 73.65421686746988, "grad_norm": 24.350296020507812, "learning_rate": 1.3126213592233011e-05, "loss": 0.3157, "step": 7596 }, { "epoch": 73.66385542168675, "grad_norm": 3.767177104949951, "learning_rate": 1.3121359223300972e-05, "loss": 0.1766, "step": 7597 }, { "epoch": 73.67349397590361, "grad_norm": 5.4997639656066895, "learning_rate": 1.3116504854368933e-05, "loss": 0.1911, "step": 7598 }, { "epoch": 73.68313253012049, "grad_norm": 24.035051345825195, "learning_rate": 1.3111650485436895e-05, "loss": 0.3419, "step": 7599 }, { "epoch": 73.69277108433735, "grad_norm": 0.5926496386528015, "learning_rate": 1.3106796116504854e-05, "loss": 0.1132, "step": 7600 }, { "epoch": 73.70240963855422, "grad_norm": 22.902734756469727, "learning_rate": 1.3101941747572815e-05, "loss": 0.1592, "step": 7601 }, { "epoch": 73.71204819277108, "grad_norm": 1.3406857252120972, "learning_rate": 1.3097087378640776e-05, "loss": 0.199, "step": 7602 }, { "epoch": 73.72168674698796, "grad_norm": 2.1507487297058105, "learning_rate": 1.3092233009708737e-05, "loss": 0.2106, "step": 7603 }, { "epoch": 73.73132530120482, "grad_norm": 5.884621620178223, "learning_rate": 1.3087378640776699e-05, "loss": 0.3094, "step": 7604 }, { "epoch": 73.74096385542168, "grad_norm": 4.920483589172363, "learning_rate": 1.308252427184466e-05, "loss": 0.1776, "step": 7605 }, { "epoch": 73.75060240963856, "grad_norm": 4.875829219818115, "learning_rate": 1.3077669902912623e-05, "loss": 0.2029, "step": 7606 }, { "epoch": 73.76024096385542, "grad_norm": 12.408075332641602, "learning_rate": 1.3072815533980584e-05, "loss": 0.3652, "step": 7607 }, { "epoch": 73.76987951807229, "grad_norm": 2.918118953704834, "learning_rate": 1.3067961165048545e-05, "loss": 0.2312, "step": 7608 }, { "epoch": 73.77951807228915, "grad_norm": 1.7333521842956543, "learning_rate": 1.3063106796116506e-05, "loss": 0.2407, "step": 7609 }, { "epoch": 73.78915662650603, "grad_norm": 12.894477844238281, "learning_rate": 1.3058252427184467e-05, "loss": 0.2881, "step": 7610 }, { "epoch": 73.79879518072289, "grad_norm": 7.0770697593688965, "learning_rate": 1.3053398058252428e-05, "loss": 0.1942, "step": 7611 }, { "epoch": 73.80843373493975, "grad_norm": 4.755352020263672, "learning_rate": 1.304854368932039e-05, "loss": 0.2793, "step": 7612 }, { "epoch": 73.81807228915663, "grad_norm": 0.9026758670806885, "learning_rate": 1.304368932038835e-05, "loss": 0.1429, "step": 7613 }, { "epoch": 73.8277108433735, "grad_norm": 24.9082088470459, "learning_rate": 1.303883495145631e-05, "loss": 0.2486, "step": 7614 }, { "epoch": 73.83734939759036, "grad_norm": 18.045991897583008, "learning_rate": 1.3033980582524271e-05, "loss": 0.1923, "step": 7615 }, { "epoch": 73.84698795180722, "grad_norm": 16.63937759399414, "learning_rate": 1.3029126213592232e-05, "loss": 0.1697, "step": 7616 }, { "epoch": 73.8566265060241, "grad_norm": 2.6803152561187744, "learning_rate": 1.3024271844660193e-05, "loss": 0.1136, "step": 7617 }, { "epoch": 73.86626506024096, "grad_norm": 16.76304817199707, "learning_rate": 1.3019417475728158e-05, "loss": 0.3382, "step": 7618 }, { "epoch": 73.87590361445783, "grad_norm": 1.8349380493164062, "learning_rate": 1.3014563106796117e-05, "loss": 0.2188, "step": 7619 }, { "epoch": 73.8855421686747, "grad_norm": 5.892763614654541, "learning_rate": 1.3009708737864079e-05, "loss": 0.269, "step": 7620 }, { "epoch": 73.89518072289157, "grad_norm": 15.926074028015137, "learning_rate": 1.300485436893204e-05, "loss": 0.1413, "step": 7621 }, { "epoch": 73.90481927710843, "grad_norm": 2.0480146408081055, "learning_rate": 1.3000000000000001e-05, "loss": 0.359, "step": 7622 }, { "epoch": 73.91445783132531, "grad_norm": 11.889345169067383, "learning_rate": 1.2995145631067962e-05, "loss": 0.4521, "step": 7623 }, { "epoch": 73.92409638554217, "grad_norm": 73.5718765258789, "learning_rate": 1.2990291262135923e-05, "loss": 0.2076, "step": 7624 }, { "epoch": 73.93373493975903, "grad_norm": 15.449206352233887, "learning_rate": 1.2985436893203884e-05, "loss": 0.1679, "step": 7625 }, { "epoch": 73.9433734939759, "grad_norm": 2.5306684970855713, "learning_rate": 1.2980582524271845e-05, "loss": 0.1245, "step": 7626 }, { "epoch": 73.95301204819278, "grad_norm": 8.652677536010742, "learning_rate": 1.2975728155339807e-05, "loss": 0.2106, "step": 7627 }, { "epoch": 73.96265060240964, "grad_norm": 1.754908800125122, "learning_rate": 1.2970873786407766e-05, "loss": 0.4124, "step": 7628 }, { "epoch": 73.9722891566265, "grad_norm": 13.444787979125977, "learning_rate": 1.2966019417475727e-05, "loss": 0.2227, "step": 7629 }, { "epoch": 73.98192771084338, "grad_norm": 19.837352752685547, "learning_rate": 1.2961165048543688e-05, "loss": 0.3621, "step": 7630 }, { "epoch": 73.99156626506024, "grad_norm": 2.0658514499664307, "learning_rate": 1.2956310679611653e-05, "loss": 0.1955, "step": 7631 }, { "epoch": 74.00722891566265, "grad_norm": 13.054939270019531, "learning_rate": 1.2951456310679614e-05, "loss": 0.1199, "step": 7632 }, { "epoch": 74.01686746987951, "grad_norm": 9.891912460327148, "learning_rate": 1.2946601941747573e-05, "loss": 0.1213, "step": 7633 }, { "epoch": 74.02650602409639, "grad_norm": 59.111480712890625, "learning_rate": 1.2941747572815535e-05, "loss": 0.2847, "step": 7634 }, { "epoch": 74.03614457831326, "grad_norm": 29.4600887298584, "learning_rate": 1.2936893203883496e-05, "loss": 0.1253, "step": 7635 }, { "epoch": 74.04578313253012, "grad_norm": 1.475768804550171, "learning_rate": 1.2932038834951457e-05, "loss": 0.2651, "step": 7636 }, { "epoch": 74.05542168674698, "grad_norm": 6.971964359283447, "learning_rate": 1.2927184466019418e-05, "loss": 0.3294, "step": 7637 }, { "epoch": 74.06506024096386, "grad_norm": 8.744260787963867, "learning_rate": 1.292233009708738e-05, "loss": 0.3942, "step": 7638 }, { "epoch": 74.07469879518072, "grad_norm": 3.5497257709503174, "learning_rate": 1.291747572815534e-05, "loss": 0.195, "step": 7639 }, { "epoch": 74.08433734939759, "grad_norm": 4.219114303588867, "learning_rate": 1.2912621359223302e-05, "loss": 0.1748, "step": 7640 }, { "epoch": 74.09397590361446, "grad_norm": 11.288854598999023, "learning_rate": 1.2907766990291261e-05, "loss": 0.2112, "step": 7641 }, { "epoch": 74.10361445783133, "grad_norm": 15.19611930847168, "learning_rate": 1.2902912621359222e-05, "loss": 0.3306, "step": 7642 }, { "epoch": 74.11325301204819, "grad_norm": 2.973956346511841, "learning_rate": 1.2898058252427187e-05, "loss": 0.129, "step": 7643 }, { "epoch": 74.12289156626505, "grad_norm": 9.713763236999512, "learning_rate": 1.2893203883495148e-05, "loss": 0.1872, "step": 7644 }, { "epoch": 74.13253012048193, "grad_norm": 32.715450286865234, "learning_rate": 1.2888349514563109e-05, "loss": 0.144, "step": 7645 }, { "epoch": 74.1421686746988, "grad_norm": 3.0642428398132324, "learning_rate": 1.288349514563107e-05, "loss": 0.2712, "step": 7646 }, { "epoch": 74.15180722891566, "grad_norm": 4.0329766273498535, "learning_rate": 1.287864077669903e-05, "loss": 0.3078, "step": 7647 }, { "epoch": 74.16144578313254, "grad_norm": 1.805753231048584, "learning_rate": 1.287378640776699e-05, "loss": 0.1024, "step": 7648 }, { "epoch": 74.1710843373494, "grad_norm": 9.27163028717041, "learning_rate": 1.2868932038834952e-05, "loss": 0.0703, "step": 7649 }, { "epoch": 74.18072289156626, "grad_norm": 9.585987091064453, "learning_rate": 1.2864077669902913e-05, "loss": 0.3534, "step": 7650 }, { "epoch": 74.19036144578314, "grad_norm": 32.59801483154297, "learning_rate": 1.2859223300970874e-05, "loss": 0.1807, "step": 7651 }, { "epoch": 74.2, "grad_norm": 2.894613027572632, "learning_rate": 1.2854368932038835e-05, "loss": 0.2925, "step": 7652 }, { "epoch": 74.20963855421687, "grad_norm": 8.633170127868652, "learning_rate": 1.2849514563106796e-05, "loss": 0.1805, "step": 7653 }, { "epoch": 74.21927710843373, "grad_norm": 5.298768997192383, "learning_rate": 1.2844660194174758e-05, "loss": 0.376, "step": 7654 }, { "epoch": 74.2289156626506, "grad_norm": 13.17257308959961, "learning_rate": 1.2839805825242717e-05, "loss": 0.2477, "step": 7655 }, { "epoch": 74.23855421686747, "grad_norm": 2.2429370880126953, "learning_rate": 1.2834951456310682e-05, "loss": 0.1985, "step": 7656 }, { "epoch": 74.24819277108433, "grad_norm": 17.550582885742188, "learning_rate": 1.2830097087378643e-05, "loss": 0.2663, "step": 7657 }, { "epoch": 74.25783132530121, "grad_norm": 9.625936508178711, "learning_rate": 1.2825242718446604e-05, "loss": 0.2577, "step": 7658 }, { "epoch": 74.26746987951807, "grad_norm": 20.936832427978516, "learning_rate": 1.2820388349514565e-05, "loss": 0.3384, "step": 7659 }, { "epoch": 74.27710843373494, "grad_norm": 13.39192008972168, "learning_rate": 1.2815533980582526e-05, "loss": 0.4161, "step": 7660 }, { "epoch": 74.2867469879518, "grad_norm": 9.784247398376465, "learning_rate": 1.2810679611650486e-05, "loss": 0.3999, "step": 7661 }, { "epoch": 74.29638554216868, "grad_norm": 15.605695724487305, "learning_rate": 1.2805825242718447e-05, "loss": 0.2225, "step": 7662 }, { "epoch": 74.30602409638554, "grad_norm": 4.068199634552002, "learning_rate": 1.2800970873786408e-05, "loss": 0.1896, "step": 7663 }, { "epoch": 74.3156626506024, "grad_norm": 4.792213439941406, "learning_rate": 1.2796116504854369e-05, "loss": 0.1662, "step": 7664 }, { "epoch": 74.32530120481928, "grad_norm": 7.332712173461914, "learning_rate": 1.279126213592233e-05, "loss": 0.1625, "step": 7665 }, { "epoch": 74.33493975903615, "grad_norm": 5.7127885818481445, "learning_rate": 1.2786407766990291e-05, "loss": 0.1651, "step": 7666 }, { "epoch": 74.34457831325301, "grad_norm": 3.6446545124053955, "learning_rate": 1.2781553398058252e-05, "loss": 0.2055, "step": 7667 }, { "epoch": 74.35421686746987, "grad_norm": 10.861620903015137, "learning_rate": 1.2776699029126214e-05, "loss": 0.3879, "step": 7668 }, { "epoch": 74.36385542168675, "grad_norm": 4.966390132904053, "learning_rate": 1.2771844660194176e-05, "loss": 0.1223, "step": 7669 }, { "epoch": 74.37349397590361, "grad_norm": 18.454864501953125, "learning_rate": 1.2766990291262138e-05, "loss": 0.3498, "step": 7670 }, { "epoch": 74.38313253012048, "grad_norm": 12.631815910339355, "learning_rate": 1.2762135922330099e-05, "loss": 0.2462, "step": 7671 }, { "epoch": 74.39277108433735, "grad_norm": 11.402878761291504, "learning_rate": 1.275728155339806e-05, "loss": 0.2368, "step": 7672 }, { "epoch": 74.40240963855422, "grad_norm": 33.132957458496094, "learning_rate": 1.2752427184466021e-05, "loss": 0.2617, "step": 7673 }, { "epoch": 74.41204819277108, "grad_norm": 2.863926649093628, "learning_rate": 1.274757281553398e-05, "loss": 0.2044, "step": 7674 }, { "epoch": 74.42168674698796, "grad_norm": 5.137744903564453, "learning_rate": 1.2742718446601942e-05, "loss": 0.2419, "step": 7675 }, { "epoch": 74.43132530120482, "grad_norm": 11.82606029510498, "learning_rate": 1.2737864077669903e-05, "loss": 0.206, "step": 7676 }, { "epoch": 74.44096385542169, "grad_norm": 6.5153045654296875, "learning_rate": 1.2733009708737864e-05, "loss": 0.2144, "step": 7677 }, { "epoch": 74.45060240963855, "grad_norm": 23.44772720336914, "learning_rate": 1.2728155339805825e-05, "loss": 0.2843, "step": 7678 }, { "epoch": 74.46024096385543, "grad_norm": 9.76302719116211, "learning_rate": 1.2723300970873786e-05, "loss": 0.4432, "step": 7679 }, { "epoch": 74.46987951807229, "grad_norm": 14.634160041809082, "learning_rate": 1.2718446601941747e-05, "loss": 0.3611, "step": 7680 }, { "epoch": 74.47951807228915, "grad_norm": 2.1628754138946533, "learning_rate": 1.271359223300971e-05, "loss": 0.2978, "step": 7681 }, { "epoch": 74.48915662650603, "grad_norm": 0.7195686101913452, "learning_rate": 1.2708737864077671e-05, "loss": 0.0923, "step": 7682 }, { "epoch": 74.4987951807229, "grad_norm": 4.318025588989258, "learning_rate": 1.2703883495145632e-05, "loss": 0.1441, "step": 7683 }, { "epoch": 74.50843373493976, "grad_norm": 9.54394245147705, "learning_rate": 1.2699029126213594e-05, "loss": 0.2366, "step": 7684 }, { "epoch": 74.51807228915662, "grad_norm": 23.736223220825195, "learning_rate": 1.2694174757281555e-05, "loss": 0.2822, "step": 7685 }, { "epoch": 74.5277108433735, "grad_norm": 0.6267825961112976, "learning_rate": 1.2689320388349516e-05, "loss": 0.2686, "step": 7686 }, { "epoch": 74.53734939759036, "grad_norm": 18.202438354492188, "learning_rate": 1.2684466019417477e-05, "loss": 0.2975, "step": 7687 }, { "epoch": 74.54698795180722, "grad_norm": 6.251936912536621, "learning_rate": 1.2679611650485437e-05, "loss": 0.3096, "step": 7688 }, { "epoch": 74.5566265060241, "grad_norm": 31.11107063293457, "learning_rate": 1.2674757281553398e-05, "loss": 0.2891, "step": 7689 }, { "epoch": 74.56626506024097, "grad_norm": 2.2481584548950195, "learning_rate": 1.2669902912621359e-05, "loss": 0.2802, "step": 7690 }, { "epoch": 74.57590361445783, "grad_norm": 3.002964735031128, "learning_rate": 1.266504854368932e-05, "loss": 0.2301, "step": 7691 }, { "epoch": 74.58554216867469, "grad_norm": 3.165102005004883, "learning_rate": 1.2660194174757281e-05, "loss": 0.108, "step": 7692 }, { "epoch": 74.59518072289157, "grad_norm": 9.25101375579834, "learning_rate": 1.2655339805825242e-05, "loss": 0.1952, "step": 7693 }, { "epoch": 74.60481927710843, "grad_norm": 4.5163774490356445, "learning_rate": 1.2650485436893205e-05, "loss": 0.2814, "step": 7694 }, { "epoch": 74.6144578313253, "grad_norm": 11.081367492675781, "learning_rate": 1.2645631067961166e-05, "loss": 0.436, "step": 7695 }, { "epoch": 74.62409638554217, "grad_norm": 3.2269859313964844, "learning_rate": 1.2640776699029127e-05, "loss": 0.2649, "step": 7696 }, { "epoch": 74.63373493975904, "grad_norm": 23.427061080932617, "learning_rate": 1.2635922330097089e-05, "loss": 0.2729, "step": 7697 }, { "epoch": 74.6433734939759, "grad_norm": 4.925484657287598, "learning_rate": 1.263106796116505e-05, "loss": 0.1953, "step": 7698 }, { "epoch": 74.65301204819278, "grad_norm": 7.182336807250977, "learning_rate": 1.262621359223301e-05, "loss": 0.2543, "step": 7699 }, { "epoch": 74.66265060240964, "grad_norm": 7.788825035095215, "learning_rate": 1.2621359223300972e-05, "loss": 0.0928, "step": 7700 }, { "epoch": 74.6722891566265, "grad_norm": 6.695478439331055, "learning_rate": 1.2616504854368933e-05, "loss": 0.2767, "step": 7701 }, { "epoch": 74.68192771084337, "grad_norm": 6.102154731750488, "learning_rate": 1.2611650485436893e-05, "loss": 0.1516, "step": 7702 }, { "epoch": 74.69156626506025, "grad_norm": 2.8916616439819336, "learning_rate": 1.2606796116504854e-05, "loss": 0.2886, "step": 7703 }, { "epoch": 74.70120481927711, "grad_norm": 2.5530951023101807, "learning_rate": 1.2601941747572815e-05, "loss": 0.1916, "step": 7704 }, { "epoch": 74.71084337349397, "grad_norm": 2.4116196632385254, "learning_rate": 1.2597087378640776e-05, "loss": 0.0643, "step": 7705 }, { "epoch": 74.72048192771085, "grad_norm": 5.3688178062438965, "learning_rate": 1.2592233009708737e-05, "loss": 0.3869, "step": 7706 }, { "epoch": 74.73012048192771, "grad_norm": 19.064865112304688, "learning_rate": 1.25873786407767e-05, "loss": 0.2564, "step": 7707 }, { "epoch": 74.73975903614458, "grad_norm": 3.1347813606262207, "learning_rate": 1.2582524271844661e-05, "loss": 0.134, "step": 7708 }, { "epoch": 74.74939759036144, "grad_norm": 2.8808364868164062, "learning_rate": 1.2577669902912622e-05, "loss": 0.2399, "step": 7709 }, { "epoch": 74.75903614457832, "grad_norm": 2.277327060699463, "learning_rate": 1.2572815533980583e-05, "loss": 0.2539, "step": 7710 }, { "epoch": 74.76867469879518, "grad_norm": 3.178346872329712, "learning_rate": 1.2567961165048545e-05, "loss": 0.153, "step": 7711 }, { "epoch": 74.77831325301204, "grad_norm": 9.263223648071289, "learning_rate": 1.2563106796116506e-05, "loss": 0.1739, "step": 7712 }, { "epoch": 74.78795180722892, "grad_norm": 7.326648235321045, "learning_rate": 1.2558252427184467e-05, "loss": 0.3476, "step": 7713 }, { "epoch": 74.79759036144578, "grad_norm": 7.884246826171875, "learning_rate": 1.2553398058252428e-05, "loss": 0.2678, "step": 7714 }, { "epoch": 74.80722891566265, "grad_norm": 1.8979697227478027, "learning_rate": 1.2548543689320389e-05, "loss": 0.0912, "step": 7715 }, { "epoch": 74.81686746987951, "grad_norm": 10.289167404174805, "learning_rate": 1.2543689320388349e-05, "loss": 0.2786, "step": 7716 }, { "epoch": 74.82650602409639, "grad_norm": 3.3154401779174805, "learning_rate": 1.253883495145631e-05, "loss": 0.0857, "step": 7717 }, { "epoch": 74.83614457831325, "grad_norm": 1.443986177444458, "learning_rate": 1.2533980582524271e-05, "loss": 0.1957, "step": 7718 }, { "epoch": 74.84578313253012, "grad_norm": 1.6138449907302856, "learning_rate": 1.2529126213592235e-05, "loss": 0.1587, "step": 7719 }, { "epoch": 74.855421686747, "grad_norm": 2.16637921333313, "learning_rate": 1.2524271844660197e-05, "loss": 0.1917, "step": 7720 }, { "epoch": 74.86506024096386, "grad_norm": 9.127096176147461, "learning_rate": 1.2519417475728156e-05, "loss": 0.2852, "step": 7721 }, { "epoch": 74.87469879518072, "grad_norm": 1.5920257568359375, "learning_rate": 1.2514563106796117e-05, "loss": 0.1055, "step": 7722 }, { "epoch": 74.88433734939758, "grad_norm": 17.05771827697754, "learning_rate": 1.2509708737864078e-05, "loss": 0.2544, "step": 7723 }, { "epoch": 74.89397590361446, "grad_norm": 2.5673882961273193, "learning_rate": 1.250485436893204e-05, "loss": 0.1959, "step": 7724 }, { "epoch": 74.90361445783132, "grad_norm": 3.1468913555145264, "learning_rate": 1.25e-05, "loss": 0.1767, "step": 7725 }, { "epoch": 74.91325301204819, "grad_norm": 3.7476513385772705, "learning_rate": 1.2495145631067962e-05, "loss": 0.2006, "step": 7726 }, { "epoch": 74.92289156626506, "grad_norm": 6.158455848693848, "learning_rate": 1.2490291262135923e-05, "loss": 0.4358, "step": 7727 }, { "epoch": 74.93253012048193, "grad_norm": 7.2584099769592285, "learning_rate": 1.2485436893203884e-05, "loss": 0.23, "step": 7728 }, { "epoch": 74.94216867469879, "grad_norm": 1.9774906635284424, "learning_rate": 1.2480582524271845e-05, "loss": 0.0623, "step": 7729 }, { "epoch": 74.95180722891567, "grad_norm": 8.565584182739258, "learning_rate": 1.2475728155339806e-05, "loss": 0.1941, "step": 7730 }, { "epoch": 74.96144578313253, "grad_norm": 4.208019733428955, "learning_rate": 1.2470873786407767e-05, "loss": 0.1788, "step": 7731 }, { "epoch": 74.9710843373494, "grad_norm": 30.853397369384766, "learning_rate": 1.2466019417475729e-05, "loss": 0.2615, "step": 7732 }, { "epoch": 74.98072289156626, "grad_norm": 1.802680253982544, "learning_rate": 1.246116504854369e-05, "loss": 0.0458, "step": 7733 }, { "epoch": 74.99036144578314, "grad_norm": 3.222984790802002, "learning_rate": 1.2456310679611651e-05, "loss": 0.2616, "step": 7734 }, { "epoch": 75.00602409638554, "grad_norm": 14.480806350708008, "learning_rate": 1.2451456310679612e-05, "loss": 0.4032, "step": 7735 }, { "epoch": 75.01566265060241, "grad_norm": 5.08269739151001, "learning_rate": 1.2446601941747573e-05, "loss": 0.3614, "step": 7736 }, { "epoch": 75.02530120481927, "grad_norm": 13.367281913757324, "learning_rate": 1.2441747572815534e-05, "loss": 0.2454, "step": 7737 }, { "epoch": 75.03493975903615, "grad_norm": 21.660673141479492, "learning_rate": 1.2436893203883495e-05, "loss": 0.2566, "step": 7738 }, { "epoch": 75.04457831325301, "grad_norm": 1.6003843545913696, "learning_rate": 1.2432038834951457e-05, "loss": 0.1255, "step": 7739 }, { "epoch": 75.05421686746988, "grad_norm": 31.757816314697266, "learning_rate": 1.2427184466019418e-05, "loss": 0.4338, "step": 7740 }, { "epoch": 75.06385542168675, "grad_norm": 24.726980209350586, "learning_rate": 1.242233009708738e-05, "loss": 0.3517, "step": 7741 }, { "epoch": 75.07349397590362, "grad_norm": 31.042774200439453, "learning_rate": 1.241747572815534e-05, "loss": 0.2049, "step": 7742 }, { "epoch": 75.08313253012048, "grad_norm": 11.592698097229004, "learning_rate": 1.2412621359223301e-05, "loss": 0.2292, "step": 7743 }, { "epoch": 75.09277108433734, "grad_norm": 2.854090690612793, "learning_rate": 1.2407766990291262e-05, "loss": 0.2046, "step": 7744 }, { "epoch": 75.10240963855422, "grad_norm": 13.198930740356445, "learning_rate": 1.2402912621359224e-05, "loss": 0.1979, "step": 7745 }, { "epoch": 75.11204819277108, "grad_norm": 7.380181789398193, "learning_rate": 1.2398058252427185e-05, "loss": 0.2943, "step": 7746 }, { "epoch": 75.12168674698795, "grad_norm": 4.110651016235352, "learning_rate": 1.2393203883495146e-05, "loss": 0.1711, "step": 7747 }, { "epoch": 75.13132530120482, "grad_norm": 3.129575252532959, "learning_rate": 1.2388349514563109e-05, "loss": 0.244, "step": 7748 }, { "epoch": 75.14096385542169, "grad_norm": 17.451038360595703, "learning_rate": 1.2383495145631068e-05, "loss": 0.232, "step": 7749 }, { "epoch": 75.15060240963855, "grad_norm": 2.3710763454437256, "learning_rate": 1.237864077669903e-05, "loss": 0.2838, "step": 7750 }, { "epoch": 75.16024096385541, "grad_norm": 14.223217964172363, "learning_rate": 1.237378640776699e-05, "loss": 0.2582, "step": 7751 }, { "epoch": 75.16987951807229, "grad_norm": 5.380396842956543, "learning_rate": 1.2368932038834952e-05, "loss": 0.1489, "step": 7752 }, { "epoch": 75.17951807228916, "grad_norm": 22.13960075378418, "learning_rate": 1.2364077669902913e-05, "loss": 0.6437, "step": 7753 }, { "epoch": 75.18915662650602, "grad_norm": 2.8790993690490723, "learning_rate": 1.2359223300970876e-05, "loss": 0.2018, "step": 7754 }, { "epoch": 75.1987951807229, "grad_norm": 10.368551254272461, "learning_rate": 1.2354368932038837e-05, "loss": 0.3715, "step": 7755 }, { "epoch": 75.20843373493976, "grad_norm": 2.2805004119873047, "learning_rate": 1.2349514563106796e-05, "loss": 0.2476, "step": 7756 }, { "epoch": 75.21807228915662, "grad_norm": 2.9973151683807373, "learning_rate": 1.2344660194174757e-05, "loss": 0.1984, "step": 7757 }, { "epoch": 75.2277108433735, "grad_norm": 2.7130677700042725, "learning_rate": 1.2339805825242718e-05, "loss": 0.1716, "step": 7758 }, { "epoch": 75.23734939759036, "grad_norm": 9.861053466796875, "learning_rate": 1.233495145631068e-05, "loss": 0.2323, "step": 7759 }, { "epoch": 75.24698795180723, "grad_norm": 10.219980239868164, "learning_rate": 1.2330097087378642e-05, "loss": 0.2021, "step": 7760 }, { "epoch": 75.25662650602409, "grad_norm": 7.159033298492432, "learning_rate": 1.2325242718446604e-05, "loss": 0.2612, "step": 7761 }, { "epoch": 75.26626506024097, "grad_norm": 3.229506254196167, "learning_rate": 1.2320388349514565e-05, "loss": 0.3427, "step": 7762 }, { "epoch": 75.27590361445783, "grad_norm": 11.867716789245605, "learning_rate": 1.2315533980582524e-05, "loss": 0.2393, "step": 7763 }, { "epoch": 75.2855421686747, "grad_norm": 13.66392993927002, "learning_rate": 1.2310679611650485e-05, "loss": 0.2329, "step": 7764 }, { "epoch": 75.29518072289157, "grad_norm": 2.826321840286255, "learning_rate": 1.2305825242718446e-05, "loss": 0.1729, "step": 7765 }, { "epoch": 75.30481927710844, "grad_norm": 5.9698405265808105, "learning_rate": 1.2300970873786408e-05, "loss": 0.3781, "step": 7766 }, { "epoch": 75.3144578313253, "grad_norm": 16.94877815246582, "learning_rate": 1.229611650485437e-05, "loss": 0.2555, "step": 7767 }, { "epoch": 75.32409638554216, "grad_norm": 16.061704635620117, "learning_rate": 1.2291262135922332e-05, "loss": 0.3455, "step": 7768 }, { "epoch": 75.33373493975904, "grad_norm": 3.6968250274658203, "learning_rate": 1.2286407766990293e-05, "loss": 0.2299, "step": 7769 }, { "epoch": 75.3433734939759, "grad_norm": 4.083518028259277, "learning_rate": 1.2281553398058252e-05, "loss": 0.3254, "step": 7770 }, { "epoch": 75.35301204819277, "grad_norm": 3.7682361602783203, "learning_rate": 1.2276699029126213e-05, "loss": 0.1427, "step": 7771 }, { "epoch": 75.36265060240964, "grad_norm": 2.581926107406616, "learning_rate": 1.2271844660194174e-05, "loss": 0.1502, "step": 7772 }, { "epoch": 75.37228915662651, "grad_norm": 5.550256252288818, "learning_rate": 1.2266990291262137e-05, "loss": 0.2266, "step": 7773 }, { "epoch": 75.38192771084337, "grad_norm": 13.324951171875, "learning_rate": 1.2262135922330098e-05, "loss": 0.2993, "step": 7774 }, { "epoch": 75.39156626506023, "grad_norm": 14.9302339553833, "learning_rate": 1.225728155339806e-05, "loss": 0.1702, "step": 7775 }, { "epoch": 75.40120481927711, "grad_norm": 9.798942565917969, "learning_rate": 1.225242718446602e-05, "loss": 0.2955, "step": 7776 }, { "epoch": 75.41084337349398, "grad_norm": 2.00837779045105, "learning_rate": 1.224757281553398e-05, "loss": 0.1194, "step": 7777 }, { "epoch": 75.42048192771084, "grad_norm": 11.3426513671875, "learning_rate": 1.2242718446601941e-05, "loss": 0.1235, "step": 7778 }, { "epoch": 75.43012048192772, "grad_norm": 4.013009548187256, "learning_rate": 1.2237864077669904e-05, "loss": 0.2798, "step": 7779 }, { "epoch": 75.43975903614458, "grad_norm": 9.842982292175293, "learning_rate": 1.2233009708737865e-05, "loss": 0.2328, "step": 7780 }, { "epoch": 75.44939759036144, "grad_norm": 3.049527406692505, "learning_rate": 1.2228155339805826e-05, "loss": 0.1958, "step": 7781 }, { "epoch": 75.45903614457832, "grad_norm": 22.453266143798828, "learning_rate": 1.2223300970873788e-05, "loss": 0.146, "step": 7782 }, { "epoch": 75.46867469879518, "grad_norm": 9.851238250732422, "learning_rate": 1.2218446601941747e-05, "loss": 0.4202, "step": 7783 }, { "epoch": 75.47831325301205, "grad_norm": 10.93753719329834, "learning_rate": 1.2213592233009708e-05, "loss": 0.1266, "step": 7784 }, { "epoch": 75.48795180722891, "grad_norm": 2.3148374557495117, "learning_rate": 1.2208737864077671e-05, "loss": 0.1271, "step": 7785 }, { "epoch": 75.49759036144579, "grad_norm": 2.676520586013794, "learning_rate": 1.2203883495145632e-05, "loss": 0.2502, "step": 7786 }, { "epoch": 75.50722891566265, "grad_norm": 5.224440097808838, "learning_rate": 1.2199029126213593e-05, "loss": 0.2073, "step": 7787 }, { "epoch": 75.51686746987951, "grad_norm": 9.121297836303711, "learning_rate": 1.2194174757281554e-05, "loss": 0.1986, "step": 7788 }, { "epoch": 75.52650602409639, "grad_norm": 4.366279602050781, "learning_rate": 1.2189320388349516e-05, "loss": 0.1563, "step": 7789 }, { "epoch": 75.53614457831326, "grad_norm": 2.8251540660858154, "learning_rate": 1.2184466019417475e-05, "loss": 0.2033, "step": 7790 }, { "epoch": 75.54578313253012, "grad_norm": 20.335145950317383, "learning_rate": 1.2179611650485436e-05, "loss": 0.2882, "step": 7791 }, { "epoch": 75.55542168674698, "grad_norm": 3.5718579292297363, "learning_rate": 1.2174757281553399e-05, "loss": 0.1073, "step": 7792 }, { "epoch": 75.56506024096386, "grad_norm": 16.13226890563965, "learning_rate": 1.216990291262136e-05, "loss": 0.1445, "step": 7793 }, { "epoch": 75.57469879518072, "grad_norm": 13.228642463684082, "learning_rate": 1.2165048543689321e-05, "loss": 0.3048, "step": 7794 }, { "epoch": 75.58433734939759, "grad_norm": 8.754866600036621, "learning_rate": 1.2160194174757282e-05, "loss": 0.1823, "step": 7795 }, { "epoch": 75.59397590361446, "grad_norm": 4.1953959465026855, "learning_rate": 1.2155339805825244e-05, "loss": 0.3033, "step": 7796 }, { "epoch": 75.60361445783133, "grad_norm": 16.351638793945312, "learning_rate": 1.2150485436893203e-05, "loss": 0.3147, "step": 7797 }, { "epoch": 75.61325301204819, "grad_norm": 3.0399246215820312, "learning_rate": 1.2145631067961166e-05, "loss": 0.2463, "step": 7798 }, { "epoch": 75.62289156626505, "grad_norm": 2.58821439743042, "learning_rate": 1.2140776699029127e-05, "loss": 0.3747, "step": 7799 }, { "epoch": 75.63253012048193, "grad_norm": 4.460848331451416, "learning_rate": 1.2135922330097088e-05, "loss": 0.2637, "step": 7800 }, { "epoch": 75.6421686746988, "grad_norm": 14.554647445678711, "learning_rate": 1.213106796116505e-05, "loss": 0.1978, "step": 7801 }, { "epoch": 75.65180722891566, "grad_norm": 3.7293708324432373, "learning_rate": 1.212621359223301e-05, "loss": 0.2768, "step": 7802 }, { "epoch": 75.66144578313254, "grad_norm": 2.600367307662964, "learning_rate": 1.2121359223300972e-05, "loss": 0.2955, "step": 7803 }, { "epoch": 75.6710843373494, "grad_norm": 12.135760307312012, "learning_rate": 1.2116504854368933e-05, "loss": 0.5491, "step": 7804 }, { "epoch": 75.68072289156626, "grad_norm": 3.9823601245880127, "learning_rate": 1.2111650485436894e-05, "loss": 0.1934, "step": 7805 }, { "epoch": 75.69036144578314, "grad_norm": 3.3720498085021973, "learning_rate": 1.2106796116504855e-05, "loss": 0.2402, "step": 7806 }, { "epoch": 75.7, "grad_norm": 14.449723243713379, "learning_rate": 1.2101941747572816e-05, "loss": 0.1299, "step": 7807 }, { "epoch": 75.70963855421687, "grad_norm": 6.763325214385986, "learning_rate": 1.2097087378640777e-05, "loss": 0.2951, "step": 7808 }, { "epoch": 75.71927710843373, "grad_norm": 1.8401535749435425, "learning_rate": 1.2092233009708739e-05, "loss": 0.2745, "step": 7809 }, { "epoch": 75.7289156626506, "grad_norm": 12.554854393005371, "learning_rate": 1.20873786407767e-05, "loss": 0.109, "step": 7810 }, { "epoch": 75.73855421686747, "grad_norm": 3.0334813594818115, "learning_rate": 1.208252427184466e-05, "loss": 0.3021, "step": 7811 }, { "epoch": 75.74819277108433, "grad_norm": 4.5790934562683105, "learning_rate": 1.2077669902912622e-05, "loss": 0.0842, "step": 7812 }, { "epoch": 75.75783132530121, "grad_norm": 16.51813507080078, "learning_rate": 1.2072815533980583e-05, "loss": 0.2233, "step": 7813 }, { "epoch": 75.76746987951807, "grad_norm": 7.864075660705566, "learning_rate": 1.2067961165048544e-05, "loss": 0.2848, "step": 7814 }, { "epoch": 75.77710843373494, "grad_norm": 3.879088878631592, "learning_rate": 1.2063106796116505e-05, "loss": 0.2718, "step": 7815 }, { "epoch": 75.7867469879518, "grad_norm": 3.942824125289917, "learning_rate": 1.2058252427184467e-05, "loss": 0.143, "step": 7816 }, { "epoch": 75.79638554216868, "grad_norm": 4.891573905944824, "learning_rate": 1.2053398058252428e-05, "loss": 0.2301, "step": 7817 }, { "epoch": 75.80602409638554, "grad_norm": 19.57550621032715, "learning_rate": 1.2048543689320389e-05, "loss": 0.4199, "step": 7818 }, { "epoch": 75.8156626506024, "grad_norm": 9.736475944519043, "learning_rate": 1.204368932038835e-05, "loss": 0.2071, "step": 7819 }, { "epoch": 75.82530120481928, "grad_norm": 4.155850887298584, "learning_rate": 1.2038834951456311e-05, "loss": 0.1431, "step": 7820 }, { "epoch": 75.83493975903615, "grad_norm": 20.20188331604004, "learning_rate": 1.2033980582524272e-05, "loss": 0.3181, "step": 7821 }, { "epoch": 75.84457831325301, "grad_norm": 21.94890785217285, "learning_rate": 1.2029126213592233e-05, "loss": 0.2595, "step": 7822 }, { "epoch": 75.85421686746987, "grad_norm": 2.9199280738830566, "learning_rate": 1.2024271844660195e-05, "loss": 0.2294, "step": 7823 }, { "epoch": 75.86385542168675, "grad_norm": 3.1532070636749268, "learning_rate": 1.2019417475728156e-05, "loss": 0.1821, "step": 7824 }, { "epoch": 75.87349397590361, "grad_norm": 6.033351898193359, "learning_rate": 1.2014563106796117e-05, "loss": 0.224, "step": 7825 }, { "epoch": 75.88313253012048, "grad_norm": 12.939510345458984, "learning_rate": 1.2009708737864078e-05, "loss": 0.2114, "step": 7826 }, { "epoch": 75.89277108433735, "grad_norm": 7.556155681610107, "learning_rate": 1.200485436893204e-05, "loss": 0.3424, "step": 7827 }, { "epoch": 75.90240963855422, "grad_norm": 14.985199928283691, "learning_rate": 1.2e-05, "loss": 0.1676, "step": 7828 }, { "epoch": 75.91204819277108, "grad_norm": 2.1180715560913086, "learning_rate": 1.1995145631067961e-05, "loss": 0.1003, "step": 7829 }, { "epoch": 75.92168674698796, "grad_norm": 1.0980054140090942, "learning_rate": 1.1990291262135923e-05, "loss": 0.1721, "step": 7830 }, { "epoch": 75.93132530120482, "grad_norm": 12.809814453125, "learning_rate": 1.1985436893203884e-05, "loss": 0.3076, "step": 7831 }, { "epoch": 75.94096385542169, "grad_norm": 3.633216381072998, "learning_rate": 1.1980582524271845e-05, "loss": 0.1072, "step": 7832 }, { "epoch": 75.95060240963855, "grad_norm": 9.412810325622559, "learning_rate": 1.1975728155339806e-05, "loss": 0.3177, "step": 7833 }, { "epoch": 75.96024096385543, "grad_norm": 15.787140846252441, "learning_rate": 1.1970873786407767e-05, "loss": 0.3486, "step": 7834 }, { "epoch": 75.96987951807229, "grad_norm": 6.050962924957275, "learning_rate": 1.1966019417475728e-05, "loss": 0.1812, "step": 7835 }, { "epoch": 75.97951807228915, "grad_norm": 23.025026321411133, "learning_rate": 1.1961165048543691e-05, "loss": 0.292, "step": 7836 }, { "epoch": 75.98915662650603, "grad_norm": 27.718645095825195, "learning_rate": 1.195631067961165e-05, "loss": 0.3047, "step": 7837 }, { "epoch": 76.00481927710844, "grad_norm": 2.780672073364258, "learning_rate": 1.1951456310679612e-05, "loss": 0.2809, "step": 7838 }, { "epoch": 76.0144578313253, "grad_norm": 9.576288223266602, "learning_rate": 1.1946601941747573e-05, "loss": 0.4894, "step": 7839 }, { "epoch": 76.02409638554217, "grad_norm": 2.8949074745178223, "learning_rate": 1.1941747572815534e-05, "loss": 0.2255, "step": 7840 }, { "epoch": 76.03373493975904, "grad_norm": 16.478572845458984, "learning_rate": 1.1936893203883495e-05, "loss": 0.278, "step": 7841 }, { "epoch": 76.0433734939759, "grad_norm": 5.5073041915893555, "learning_rate": 1.1932038834951458e-05, "loss": 0.2067, "step": 7842 }, { "epoch": 76.05301204819277, "grad_norm": 16.913549423217773, "learning_rate": 1.192718446601942e-05, "loss": 0.2858, "step": 7843 }, { "epoch": 76.06265060240963, "grad_norm": 18.60103988647461, "learning_rate": 1.1922330097087379e-05, "loss": 0.3141, "step": 7844 }, { "epoch": 76.07228915662651, "grad_norm": 9.402812957763672, "learning_rate": 1.191747572815534e-05, "loss": 0.3386, "step": 7845 }, { "epoch": 76.08192771084337, "grad_norm": 2.4603209495544434, "learning_rate": 1.1912621359223301e-05, "loss": 0.1141, "step": 7846 }, { "epoch": 76.09156626506024, "grad_norm": 1.7016280889511108, "learning_rate": 1.1907766990291262e-05, "loss": 0.1514, "step": 7847 }, { "epoch": 76.10120481927711, "grad_norm": 18.16265869140625, "learning_rate": 1.1902912621359223e-05, "loss": 0.2896, "step": 7848 }, { "epoch": 76.11084337349398, "grad_norm": 3.1364545822143555, "learning_rate": 1.1898058252427186e-05, "loss": 0.2353, "step": 7849 }, { "epoch": 76.12048192771084, "grad_norm": 11.420258522033691, "learning_rate": 1.1893203883495147e-05, "loss": 0.3607, "step": 7850 }, { "epoch": 76.1301204819277, "grad_norm": 1.3846080303192139, "learning_rate": 1.1888349514563107e-05, "loss": 0.141, "step": 7851 }, { "epoch": 76.13975903614458, "grad_norm": 13.60465145111084, "learning_rate": 1.1883495145631068e-05, "loss": 0.2017, "step": 7852 }, { "epoch": 76.14939759036145, "grad_norm": 1.622072458267212, "learning_rate": 1.1878640776699029e-05, "loss": 0.3187, "step": 7853 }, { "epoch": 76.15903614457831, "grad_norm": 3.299107074737549, "learning_rate": 1.187378640776699e-05, "loss": 0.1366, "step": 7854 }, { "epoch": 76.16867469879519, "grad_norm": 11.408197402954102, "learning_rate": 1.1868932038834953e-05, "loss": 0.233, "step": 7855 }, { "epoch": 76.17831325301205, "grad_norm": 6.4298601150512695, "learning_rate": 1.1864077669902914e-05, "loss": 0.2475, "step": 7856 }, { "epoch": 76.18795180722891, "grad_norm": 7.534939765930176, "learning_rate": 1.1859223300970875e-05, "loss": 0.1941, "step": 7857 }, { "epoch": 76.19759036144578, "grad_norm": 4.025649547576904, "learning_rate": 1.1854368932038835e-05, "loss": 0.1321, "step": 7858 }, { "epoch": 76.20722891566265, "grad_norm": 2.0551211833953857, "learning_rate": 1.1849514563106796e-05, "loss": 0.1749, "step": 7859 }, { "epoch": 76.21686746987952, "grad_norm": 4.684847831726074, "learning_rate": 1.1844660194174757e-05, "loss": 0.3059, "step": 7860 }, { "epoch": 76.22650602409638, "grad_norm": 6.84615421295166, "learning_rate": 1.183980582524272e-05, "loss": 0.2319, "step": 7861 }, { "epoch": 76.23614457831326, "grad_norm": 10.851419448852539, "learning_rate": 1.1834951456310681e-05, "loss": 0.2541, "step": 7862 }, { "epoch": 76.24578313253012, "grad_norm": 9.74658489227295, "learning_rate": 1.1830097087378642e-05, "loss": 0.3598, "step": 7863 }, { "epoch": 76.25542168674698, "grad_norm": 3.693387269973755, "learning_rate": 1.1825242718446603e-05, "loss": 0.2447, "step": 7864 }, { "epoch": 76.26506024096386, "grad_norm": 1.0012733936309814, "learning_rate": 1.1820388349514563e-05, "loss": 0.0815, "step": 7865 }, { "epoch": 76.27469879518073, "grad_norm": 4.319483280181885, "learning_rate": 1.1815533980582524e-05, "loss": 0.1115, "step": 7866 }, { "epoch": 76.28433734939759, "grad_norm": 32.547760009765625, "learning_rate": 1.1810679611650485e-05, "loss": 0.1319, "step": 7867 }, { "epoch": 76.29397590361445, "grad_norm": 2.6078200340270996, "learning_rate": 1.1805825242718448e-05, "loss": 0.2033, "step": 7868 }, { "epoch": 76.30361445783133, "grad_norm": 6.448188781738281, "learning_rate": 1.1800970873786409e-05, "loss": 0.593, "step": 7869 }, { "epoch": 76.3132530120482, "grad_norm": 2.372995138168335, "learning_rate": 1.179611650485437e-05, "loss": 0.0963, "step": 7870 }, { "epoch": 76.32289156626506, "grad_norm": 3.4634389877319336, "learning_rate": 1.1791262135922331e-05, "loss": 0.1757, "step": 7871 }, { "epoch": 76.33253012048193, "grad_norm": 5.5812811851501465, "learning_rate": 1.178640776699029e-05, "loss": 0.0961, "step": 7872 }, { "epoch": 76.3421686746988, "grad_norm": 7.471453666687012, "learning_rate": 1.1781553398058252e-05, "loss": 0.3468, "step": 7873 }, { "epoch": 76.35180722891566, "grad_norm": 2.3039047718048096, "learning_rate": 1.1776699029126215e-05, "loss": 0.1445, "step": 7874 }, { "epoch": 76.36144578313252, "grad_norm": 6.387101173400879, "learning_rate": 1.1771844660194176e-05, "loss": 0.1531, "step": 7875 }, { "epoch": 76.3710843373494, "grad_norm": 16.16396713256836, "learning_rate": 1.1766990291262137e-05, "loss": 0.3049, "step": 7876 }, { "epoch": 76.38072289156626, "grad_norm": 3.453948736190796, "learning_rate": 1.1762135922330098e-05, "loss": 0.0867, "step": 7877 }, { "epoch": 76.39036144578313, "grad_norm": 4.084175109863281, "learning_rate": 1.175728155339806e-05, "loss": 0.1353, "step": 7878 }, { "epoch": 76.4, "grad_norm": 4.471474647521973, "learning_rate": 1.1752427184466019e-05, "loss": 0.3068, "step": 7879 }, { "epoch": 76.40963855421687, "grad_norm": 4.2320475578308105, "learning_rate": 1.1747572815533982e-05, "loss": 0.3584, "step": 7880 }, { "epoch": 76.41927710843373, "grad_norm": 16.989255905151367, "learning_rate": 1.1742718446601943e-05, "loss": 0.1852, "step": 7881 }, { "epoch": 76.4289156626506, "grad_norm": 12.831215858459473, "learning_rate": 1.1737864077669904e-05, "loss": 0.2853, "step": 7882 }, { "epoch": 76.43855421686747, "grad_norm": 7.13157320022583, "learning_rate": 1.1733009708737865e-05, "loss": 0.3595, "step": 7883 }, { "epoch": 76.44819277108434, "grad_norm": 8.75289535522461, "learning_rate": 1.1728155339805826e-05, "loss": 0.337, "step": 7884 }, { "epoch": 76.4578313253012, "grad_norm": 9.866744041442871, "learning_rate": 1.1723300970873787e-05, "loss": 0.1435, "step": 7885 }, { "epoch": 76.46746987951808, "grad_norm": 5.594421863555908, "learning_rate": 1.1718446601941748e-05, "loss": 0.2337, "step": 7886 }, { "epoch": 76.47710843373494, "grad_norm": 20.44558334350586, "learning_rate": 1.171359223300971e-05, "loss": 0.3021, "step": 7887 }, { "epoch": 76.4867469879518, "grad_norm": 2.389986276626587, "learning_rate": 1.170873786407767e-05, "loss": 0.27, "step": 7888 }, { "epoch": 76.49638554216868, "grad_norm": 10.19232177734375, "learning_rate": 1.1703883495145632e-05, "loss": 0.2259, "step": 7889 }, { "epoch": 76.50602409638554, "grad_norm": 9.057320594787598, "learning_rate": 1.1699029126213593e-05, "loss": 0.2258, "step": 7890 }, { "epoch": 76.51566265060241, "grad_norm": 22.197376251220703, "learning_rate": 1.1694174757281554e-05, "loss": 0.2621, "step": 7891 }, { "epoch": 76.52530120481927, "grad_norm": 5.450077533721924, "learning_rate": 1.1689320388349515e-05, "loss": 0.1787, "step": 7892 }, { "epoch": 76.53493975903615, "grad_norm": 28.068708419799805, "learning_rate": 1.1684466019417476e-05, "loss": 0.2972, "step": 7893 }, { "epoch": 76.54457831325301, "grad_norm": 5.477574825286865, "learning_rate": 1.1679611650485438e-05, "loss": 0.1937, "step": 7894 }, { "epoch": 76.55421686746988, "grad_norm": 2.2767350673675537, "learning_rate": 1.1674757281553399e-05, "loss": 0.1059, "step": 7895 }, { "epoch": 76.56385542168675, "grad_norm": 4.575446128845215, "learning_rate": 1.166990291262136e-05, "loss": 0.1682, "step": 7896 }, { "epoch": 76.57349397590362, "grad_norm": 8.282829284667969, "learning_rate": 1.1665048543689321e-05, "loss": 0.2244, "step": 7897 }, { "epoch": 76.58313253012048, "grad_norm": 5.097293853759766, "learning_rate": 1.1660194174757282e-05, "loss": 0.1182, "step": 7898 }, { "epoch": 76.59277108433734, "grad_norm": 10.230706214904785, "learning_rate": 1.1655339805825243e-05, "loss": 0.1335, "step": 7899 }, { "epoch": 76.60240963855422, "grad_norm": 4.745948791503906, "learning_rate": 1.1650485436893204e-05, "loss": 0.1717, "step": 7900 }, { "epoch": 76.61204819277108, "grad_norm": 3.234022378921509, "learning_rate": 1.1645631067961166e-05, "loss": 0.1402, "step": 7901 }, { "epoch": 76.62168674698795, "grad_norm": 2.721261739730835, "learning_rate": 1.1640776699029127e-05, "loss": 0.0669, "step": 7902 }, { "epoch": 76.63132530120482, "grad_norm": 4.44167947769165, "learning_rate": 1.1635922330097088e-05, "loss": 0.419, "step": 7903 }, { "epoch": 76.64096385542169, "grad_norm": 8.197081565856934, "learning_rate": 1.1631067961165049e-05, "loss": 0.2364, "step": 7904 }, { "epoch": 76.65060240963855, "grad_norm": 4.645725250244141, "learning_rate": 1.162621359223301e-05, "loss": 0.4233, "step": 7905 }, { "epoch": 76.66024096385541, "grad_norm": 4.124464511871338, "learning_rate": 1.1621359223300971e-05, "loss": 0.2243, "step": 7906 }, { "epoch": 76.66987951807229, "grad_norm": 5.801762580871582, "learning_rate": 1.1616504854368933e-05, "loss": 0.2495, "step": 7907 }, { "epoch": 76.67951807228916, "grad_norm": 8.122299194335938, "learning_rate": 1.1611650485436894e-05, "loss": 0.1826, "step": 7908 }, { "epoch": 76.68915662650602, "grad_norm": 5.917198181152344, "learning_rate": 1.1606796116504855e-05, "loss": 0.2245, "step": 7909 }, { "epoch": 76.6987951807229, "grad_norm": 2.898120880126953, "learning_rate": 1.1601941747572816e-05, "loss": 0.2054, "step": 7910 }, { "epoch": 76.70843373493976, "grad_norm": 5.61959981918335, "learning_rate": 1.1597087378640777e-05, "loss": 0.1478, "step": 7911 }, { "epoch": 76.71807228915662, "grad_norm": 13.049211502075195, "learning_rate": 1.1592233009708738e-05, "loss": 0.3393, "step": 7912 }, { "epoch": 76.7277108433735, "grad_norm": 8.043731689453125, "learning_rate": 1.15873786407767e-05, "loss": 0.2806, "step": 7913 }, { "epoch": 76.73734939759036, "grad_norm": 3.027294397354126, "learning_rate": 1.158252427184466e-05, "loss": 0.264, "step": 7914 }, { "epoch": 76.74698795180723, "grad_norm": 15.731002807617188, "learning_rate": 1.1577669902912622e-05, "loss": 0.4067, "step": 7915 }, { "epoch": 76.75662650602409, "grad_norm": 7.200222969055176, "learning_rate": 1.1572815533980583e-05, "loss": 0.2969, "step": 7916 }, { "epoch": 76.76626506024097, "grad_norm": 1.945034146308899, "learning_rate": 1.1567961165048544e-05, "loss": 0.1456, "step": 7917 }, { "epoch": 76.77590361445783, "grad_norm": 3.0804800987243652, "learning_rate": 1.1563106796116507e-05, "loss": 0.1147, "step": 7918 }, { "epoch": 76.7855421686747, "grad_norm": 5.073786735534668, "learning_rate": 1.1558252427184466e-05, "loss": 0.2679, "step": 7919 }, { "epoch": 76.79518072289157, "grad_norm": 6.390594005584717, "learning_rate": 1.1553398058252427e-05, "loss": 0.2544, "step": 7920 }, { "epoch": 76.80481927710844, "grad_norm": 2.341226577758789, "learning_rate": 1.1548543689320389e-05, "loss": 0.2368, "step": 7921 }, { "epoch": 76.8144578313253, "grad_norm": 2.141303777694702, "learning_rate": 1.154368932038835e-05, "loss": 0.2128, "step": 7922 }, { "epoch": 76.82409638554216, "grad_norm": 4.23491907119751, "learning_rate": 1.153883495145631e-05, "loss": 0.3053, "step": 7923 }, { "epoch": 76.83373493975904, "grad_norm": 14.072453498840332, "learning_rate": 1.1533980582524274e-05, "loss": 0.3554, "step": 7924 }, { "epoch": 76.8433734939759, "grad_norm": 3.3626208305358887, "learning_rate": 1.1529126213592235e-05, "loss": 0.2883, "step": 7925 }, { "epoch": 76.85301204819277, "grad_norm": 12.358016967773438, "learning_rate": 1.1524271844660194e-05, "loss": 0.3425, "step": 7926 }, { "epoch": 76.86265060240964, "grad_norm": 11.367598533630371, "learning_rate": 1.1519417475728155e-05, "loss": 0.3366, "step": 7927 }, { "epoch": 76.87228915662651, "grad_norm": 7.388315200805664, "learning_rate": 1.1514563106796117e-05, "loss": 0.0798, "step": 7928 }, { "epoch": 76.88192771084337, "grad_norm": 9.839822769165039, "learning_rate": 1.1509708737864078e-05, "loss": 0.2367, "step": 7929 }, { "epoch": 76.89156626506023, "grad_norm": 8.258691787719727, "learning_rate": 1.1504854368932039e-05, "loss": 0.3136, "step": 7930 }, { "epoch": 76.90120481927711, "grad_norm": 3.8852176666259766, "learning_rate": 1.1500000000000002e-05, "loss": 0.2288, "step": 7931 }, { "epoch": 76.91084337349398, "grad_norm": 3.5607950687408447, "learning_rate": 1.1495145631067961e-05, "loss": 0.1429, "step": 7932 }, { "epoch": 76.92048192771084, "grad_norm": 1.9592620134353638, "learning_rate": 1.1490291262135922e-05, "loss": 0.1125, "step": 7933 }, { "epoch": 76.93012048192772, "grad_norm": 18.885700225830078, "learning_rate": 1.1485436893203883e-05, "loss": 0.35, "step": 7934 }, { "epoch": 76.93975903614458, "grad_norm": 7.698785305023193, "learning_rate": 1.1480582524271845e-05, "loss": 0.1402, "step": 7935 }, { "epoch": 76.94939759036144, "grad_norm": 16.38745880126953, "learning_rate": 1.1475728155339806e-05, "loss": 0.2733, "step": 7936 }, { "epoch": 76.95903614457832, "grad_norm": 2.752030849456787, "learning_rate": 1.1470873786407769e-05, "loss": 0.1822, "step": 7937 }, { "epoch": 76.96867469879518, "grad_norm": 5.463254451751709, "learning_rate": 1.146601941747573e-05, "loss": 0.292, "step": 7938 }, { "epoch": 76.97831325301205, "grad_norm": 2.9709951877593994, "learning_rate": 1.146116504854369e-05, "loss": 0.1662, "step": 7939 }, { "epoch": 76.98795180722891, "grad_norm": 30.195032119750977, "learning_rate": 1.145631067961165e-05, "loss": 0.3507, "step": 7940 }, { "epoch": 77.00361445783132, "grad_norm": 8.914795875549316, "learning_rate": 1.1451456310679611e-05, "loss": 0.1858, "step": 7941 }, { "epoch": 77.0132530120482, "grad_norm": 1.9635788202285767, "learning_rate": 1.1446601941747573e-05, "loss": 0.1383, "step": 7942 }, { "epoch": 77.02289156626506, "grad_norm": 4.455480098724365, "learning_rate": 1.1441747572815535e-05, "loss": 0.1581, "step": 7943 }, { "epoch": 77.03253012048192, "grad_norm": 7.694397926330566, "learning_rate": 1.1436893203883497e-05, "loss": 0.3327, "step": 7944 }, { "epoch": 77.0421686746988, "grad_norm": 10.4535493850708, "learning_rate": 1.1432038834951458e-05, "loss": 0.3197, "step": 7945 }, { "epoch": 77.05180722891566, "grad_norm": 4.88520622253418, "learning_rate": 1.1427184466019417e-05, "loss": 0.3317, "step": 7946 }, { "epoch": 77.06144578313253, "grad_norm": 3.596242666244507, "learning_rate": 1.1422330097087378e-05, "loss": 0.2946, "step": 7947 }, { "epoch": 77.0710843373494, "grad_norm": 6.495180130004883, "learning_rate": 1.141747572815534e-05, "loss": 0.0821, "step": 7948 }, { "epoch": 77.08072289156627, "grad_norm": 2.1064047813415527, "learning_rate": 1.14126213592233e-05, "loss": 0.1763, "step": 7949 }, { "epoch": 77.09036144578313, "grad_norm": 12.163755416870117, "learning_rate": 1.1407766990291263e-05, "loss": 0.2326, "step": 7950 }, { "epoch": 77.1, "grad_norm": 6.708062648773193, "learning_rate": 1.1402912621359225e-05, "loss": 0.1314, "step": 7951 }, { "epoch": 77.10963855421687, "grad_norm": 9.551826477050781, "learning_rate": 1.1398058252427186e-05, "loss": 0.3126, "step": 7952 }, { "epoch": 77.11927710843374, "grad_norm": 5.4259490966796875, "learning_rate": 1.1393203883495145e-05, "loss": 0.2887, "step": 7953 }, { "epoch": 77.1289156626506, "grad_norm": 11.865653038024902, "learning_rate": 1.1388349514563106e-05, "loss": 0.3042, "step": 7954 }, { "epoch": 77.13855421686748, "grad_norm": 4.223208904266357, "learning_rate": 1.1383495145631068e-05, "loss": 0.2968, "step": 7955 }, { "epoch": 77.14819277108434, "grad_norm": 9.756196022033691, "learning_rate": 1.137864077669903e-05, "loss": 0.2152, "step": 7956 }, { "epoch": 77.1578313253012, "grad_norm": 12.145014762878418, "learning_rate": 1.1373786407766991e-05, "loss": 0.3785, "step": 7957 }, { "epoch": 77.16746987951807, "grad_norm": 9.91189956665039, "learning_rate": 1.1368932038834953e-05, "loss": 0.2545, "step": 7958 }, { "epoch": 77.17710843373494, "grad_norm": 15.817997932434082, "learning_rate": 1.1364077669902914e-05, "loss": 0.2921, "step": 7959 }, { "epoch": 77.1867469879518, "grad_norm": 3.2535626888275146, "learning_rate": 1.1359223300970873e-05, "loss": 0.1425, "step": 7960 }, { "epoch": 77.19638554216867, "grad_norm": 1.5654401779174805, "learning_rate": 1.1354368932038834e-05, "loss": 0.04, "step": 7961 }, { "epoch": 77.20602409638555, "grad_norm": 2.5266823768615723, "learning_rate": 1.1349514563106797e-05, "loss": 0.157, "step": 7962 }, { "epoch": 77.21566265060241, "grad_norm": 11.647339820861816, "learning_rate": 1.1344660194174758e-05, "loss": 0.3553, "step": 7963 }, { "epoch": 77.22530120481927, "grad_norm": 3.900158405303955, "learning_rate": 1.133980582524272e-05, "loss": 0.3552, "step": 7964 }, { "epoch": 77.23493975903614, "grad_norm": 3.385779857635498, "learning_rate": 1.133495145631068e-05, "loss": 0.284, "step": 7965 }, { "epoch": 77.24457831325302, "grad_norm": 12.831918716430664, "learning_rate": 1.1330097087378642e-05, "loss": 0.1501, "step": 7966 }, { "epoch": 77.25421686746988, "grad_norm": 7.942318439483643, "learning_rate": 1.1325242718446601e-05, "loss": 0.526, "step": 7967 }, { "epoch": 77.26385542168674, "grad_norm": 3.585317850112915, "learning_rate": 1.1320388349514564e-05, "loss": 0.2036, "step": 7968 }, { "epoch": 77.27349397590362, "grad_norm": 5.489278793334961, "learning_rate": 1.1315533980582525e-05, "loss": 0.3424, "step": 7969 }, { "epoch": 77.28313253012048, "grad_norm": 4.116550445556641, "learning_rate": 1.1310679611650486e-05, "loss": 0.1516, "step": 7970 }, { "epoch": 77.29277108433735, "grad_norm": 4.2653937339782715, "learning_rate": 1.1305825242718448e-05, "loss": 0.1829, "step": 7971 }, { "epoch": 77.30240963855422, "grad_norm": 6.957610607147217, "learning_rate": 1.1300970873786409e-05, "loss": 0.3499, "step": 7972 }, { "epoch": 77.31204819277109, "grad_norm": 7.4699931144714355, "learning_rate": 1.129611650485437e-05, "loss": 0.3091, "step": 7973 }, { "epoch": 77.32168674698795, "grad_norm": 5.762004852294922, "learning_rate": 1.129126213592233e-05, "loss": 0.3452, "step": 7974 }, { "epoch": 77.33132530120481, "grad_norm": 5.74900484085083, "learning_rate": 1.1286407766990292e-05, "loss": 0.311, "step": 7975 }, { "epoch": 77.34096385542169, "grad_norm": 3.9299960136413574, "learning_rate": 1.1281553398058253e-05, "loss": 0.3397, "step": 7976 }, { "epoch": 77.35060240963855, "grad_norm": 5.732352256774902, "learning_rate": 1.1276699029126214e-05, "loss": 0.3591, "step": 7977 }, { "epoch": 77.36024096385542, "grad_norm": 4.375752925872803, "learning_rate": 1.1271844660194176e-05, "loss": 0.2571, "step": 7978 }, { "epoch": 77.3698795180723, "grad_norm": 16.11515235900879, "learning_rate": 1.1266990291262137e-05, "loss": 0.3082, "step": 7979 }, { "epoch": 77.37951807228916, "grad_norm": 20.42410659790039, "learning_rate": 1.1262135922330098e-05, "loss": 0.2402, "step": 7980 }, { "epoch": 77.38915662650602, "grad_norm": 5.1590189933776855, "learning_rate": 1.1257281553398059e-05, "loss": 0.1519, "step": 7981 }, { "epoch": 77.39879518072289, "grad_norm": 7.181189060211182, "learning_rate": 1.125242718446602e-05, "loss": 0.2987, "step": 7982 }, { "epoch": 77.40843373493976, "grad_norm": 2.8566930294036865, "learning_rate": 1.1247572815533981e-05, "loss": 0.123, "step": 7983 }, { "epoch": 77.41807228915663, "grad_norm": 4.394765853881836, "learning_rate": 1.1242718446601942e-05, "loss": 0.1355, "step": 7984 }, { "epoch": 77.42771084337349, "grad_norm": 4.675063133239746, "learning_rate": 1.1237864077669904e-05, "loss": 0.2589, "step": 7985 }, { "epoch": 77.43734939759037, "grad_norm": 6.606180191040039, "learning_rate": 1.1233009708737865e-05, "loss": 0.2785, "step": 7986 }, { "epoch": 77.44698795180723, "grad_norm": 8.913394927978516, "learning_rate": 1.1228155339805826e-05, "loss": 0.2932, "step": 7987 }, { "epoch": 77.4566265060241, "grad_norm": 3.0647659301757812, "learning_rate": 1.1223300970873787e-05, "loss": 0.1955, "step": 7988 }, { "epoch": 77.46626506024096, "grad_norm": 12.633268356323242, "learning_rate": 1.1218446601941748e-05, "loss": 0.2009, "step": 7989 }, { "epoch": 77.47590361445783, "grad_norm": 5.418723106384277, "learning_rate": 1.121359223300971e-05, "loss": 0.3097, "step": 7990 }, { "epoch": 77.4855421686747, "grad_norm": 15.470754623413086, "learning_rate": 1.120873786407767e-05, "loss": 0.3242, "step": 7991 }, { "epoch": 77.49518072289156, "grad_norm": 5.935530662536621, "learning_rate": 1.1203883495145632e-05, "loss": 0.2387, "step": 7992 }, { "epoch": 77.50481927710844, "grad_norm": 14.510820388793945, "learning_rate": 1.1199029126213593e-05, "loss": 0.3327, "step": 7993 }, { "epoch": 77.5144578313253, "grad_norm": 4.794726371765137, "learning_rate": 1.1194174757281554e-05, "loss": 0.0937, "step": 7994 }, { "epoch": 77.52409638554217, "grad_norm": 9.54537296295166, "learning_rate": 1.1189320388349515e-05, "loss": 0.4385, "step": 7995 }, { "epoch": 77.53373493975904, "grad_norm": 5.0150628089904785, "learning_rate": 1.1184466019417476e-05, "loss": 0.2577, "step": 7996 }, { "epoch": 77.5433734939759, "grad_norm": 2.590106964111328, "learning_rate": 1.1179611650485437e-05, "loss": 0.0712, "step": 7997 }, { "epoch": 77.55301204819277, "grad_norm": 7.007582187652588, "learning_rate": 1.1174757281553398e-05, "loss": 0.3037, "step": 7998 }, { "epoch": 77.56265060240963, "grad_norm": 5.6483306884765625, "learning_rate": 1.116990291262136e-05, "loss": 0.1884, "step": 7999 }, { "epoch": 77.57228915662651, "grad_norm": 4.357292652130127, "learning_rate": 1.116504854368932e-05, "loss": 0.316, "step": 8000 }, { "epoch": 77.58192771084337, "grad_norm": 8.102478981018066, "learning_rate": 1.1160194174757282e-05, "loss": 0.3085, "step": 8001 }, { "epoch": 77.59156626506024, "grad_norm": 3.6312270164489746, "learning_rate": 1.1155339805825243e-05, "loss": 0.0984, "step": 8002 }, { "epoch": 77.60120481927711, "grad_norm": 2.352238416671753, "learning_rate": 1.1150485436893204e-05, "loss": 0.2666, "step": 8003 }, { "epoch": 77.61084337349398, "grad_norm": 4.217928409576416, "learning_rate": 1.1145631067961165e-05, "loss": 0.1925, "step": 8004 }, { "epoch": 77.62048192771084, "grad_norm": 2.945528268814087, "learning_rate": 1.1140776699029126e-05, "loss": 0.1817, "step": 8005 }, { "epoch": 77.6301204819277, "grad_norm": 2.72294020652771, "learning_rate": 1.113592233009709e-05, "loss": 0.1159, "step": 8006 }, { "epoch": 77.63975903614458, "grad_norm": 69.3137435913086, "learning_rate": 1.1131067961165049e-05, "loss": 0.3, "step": 8007 }, { "epoch": 77.64939759036145, "grad_norm": 4.5378336906433105, "learning_rate": 1.112621359223301e-05, "loss": 0.3715, "step": 8008 }, { "epoch": 77.65903614457831, "grad_norm": 1.9045451879501343, "learning_rate": 1.1121359223300971e-05, "loss": 0.1573, "step": 8009 }, { "epoch": 77.66867469879519, "grad_norm": 2.724374294281006, "learning_rate": 1.1116504854368932e-05, "loss": 0.119, "step": 8010 }, { "epoch": 77.67831325301205, "grad_norm": 7.287959098815918, "learning_rate": 1.1111650485436893e-05, "loss": 0.2173, "step": 8011 }, { "epoch": 77.68795180722891, "grad_norm": 5.338148593902588, "learning_rate": 1.1106796116504855e-05, "loss": 0.3433, "step": 8012 }, { "epoch": 77.69759036144578, "grad_norm": 10.856697082519531, "learning_rate": 1.1101941747572817e-05, "loss": 0.2848, "step": 8013 }, { "epoch": 77.70722891566265, "grad_norm": 3.1494064331054688, "learning_rate": 1.1097087378640777e-05, "loss": 0.1754, "step": 8014 }, { "epoch": 77.71686746987952, "grad_norm": 9.438427925109863, "learning_rate": 1.1092233009708738e-05, "loss": 0.2674, "step": 8015 }, { "epoch": 77.72650602409638, "grad_norm": 13.062240600585938, "learning_rate": 1.1087378640776699e-05, "loss": 0.2303, "step": 8016 }, { "epoch": 77.73614457831326, "grad_norm": 6.082959175109863, "learning_rate": 1.108252427184466e-05, "loss": 0.5711, "step": 8017 }, { "epoch": 77.74578313253012, "grad_norm": 3.235393762588501, "learning_rate": 1.1077669902912621e-05, "loss": 0.1579, "step": 8018 }, { "epoch": 77.75542168674698, "grad_norm": 3.131571054458618, "learning_rate": 1.1072815533980584e-05, "loss": 0.2258, "step": 8019 }, { "epoch": 77.76506024096386, "grad_norm": 3.65519118309021, "learning_rate": 1.1067961165048545e-05, "loss": 0.124, "step": 8020 }, { "epoch": 77.77469879518073, "grad_norm": 3.923445701599121, "learning_rate": 1.1063106796116505e-05, "loss": 0.1352, "step": 8021 }, { "epoch": 77.78433734939759, "grad_norm": 3.7818350791931152, "learning_rate": 1.1058252427184466e-05, "loss": 0.3012, "step": 8022 }, { "epoch": 77.79397590361445, "grad_norm": 14.180514335632324, "learning_rate": 1.1053398058252427e-05, "loss": 0.2964, "step": 8023 }, { "epoch": 77.80361445783133, "grad_norm": 4.005202293395996, "learning_rate": 1.1048543689320388e-05, "loss": 0.1489, "step": 8024 }, { "epoch": 77.8132530120482, "grad_norm": 1.7397199869155884, "learning_rate": 1.1043689320388351e-05, "loss": 0.2426, "step": 8025 }, { "epoch": 77.82289156626506, "grad_norm": 3.3046562671661377, "learning_rate": 1.1038834951456312e-05, "loss": 0.1989, "step": 8026 }, { "epoch": 77.83253012048193, "grad_norm": 14.399526596069336, "learning_rate": 1.1033980582524273e-05, "loss": 0.4649, "step": 8027 }, { "epoch": 77.8421686746988, "grad_norm": 3.0284805297851562, "learning_rate": 1.1029126213592233e-05, "loss": 0.1456, "step": 8028 }, { "epoch": 77.85180722891566, "grad_norm": 8.547562599182129, "learning_rate": 1.1024271844660194e-05, "loss": 0.3384, "step": 8029 }, { "epoch": 77.86144578313252, "grad_norm": 4.5300798416137695, "learning_rate": 1.1019417475728155e-05, "loss": 0.2001, "step": 8030 }, { "epoch": 77.8710843373494, "grad_norm": 2.9030282497406006, "learning_rate": 1.1014563106796116e-05, "loss": 0.1855, "step": 8031 }, { "epoch": 77.88072289156626, "grad_norm": 3.7725484371185303, "learning_rate": 1.1009708737864079e-05, "loss": 0.3212, "step": 8032 }, { "epoch": 77.89036144578313, "grad_norm": 3.7813878059387207, "learning_rate": 1.100485436893204e-05, "loss": 0.1972, "step": 8033 }, { "epoch": 77.9, "grad_norm": 7.097784519195557, "learning_rate": 1.1000000000000001e-05, "loss": 0.2412, "step": 8034 }, { "epoch": 77.90963855421687, "grad_norm": 10.171703338623047, "learning_rate": 1.0995145631067961e-05, "loss": 0.4104, "step": 8035 }, { "epoch": 77.91927710843373, "grad_norm": 4.554628849029541, "learning_rate": 1.0990291262135922e-05, "loss": 0.1805, "step": 8036 }, { "epoch": 77.9289156626506, "grad_norm": 9.188939094543457, "learning_rate": 1.0985436893203883e-05, "loss": 0.2531, "step": 8037 }, { "epoch": 77.93855421686747, "grad_norm": 5.476193428039551, "learning_rate": 1.0980582524271846e-05, "loss": 0.3466, "step": 8038 }, { "epoch": 77.94819277108434, "grad_norm": 3.0332412719726562, "learning_rate": 1.0975728155339807e-05, "loss": 0.0829, "step": 8039 }, { "epoch": 77.9578313253012, "grad_norm": 6.403132438659668, "learning_rate": 1.0970873786407768e-05, "loss": 0.422, "step": 8040 }, { "epoch": 77.96746987951808, "grad_norm": 4.844537734985352, "learning_rate": 1.096601941747573e-05, "loss": 0.2291, "step": 8041 }, { "epoch": 77.97710843373494, "grad_norm": 3.0033998489379883, "learning_rate": 1.0961165048543689e-05, "loss": 0.2675, "step": 8042 }, { "epoch": 77.9867469879518, "grad_norm": 11.71489143371582, "learning_rate": 1.095631067961165e-05, "loss": 0.2303, "step": 8043 }, { "epoch": 78.00240963855421, "grad_norm": 5.627169609069824, "learning_rate": 1.0951456310679613e-05, "loss": 0.2937, "step": 8044 }, { "epoch": 78.01204819277109, "grad_norm": 8.871598243713379, "learning_rate": 1.0946601941747574e-05, "loss": 0.4295, "step": 8045 }, { "epoch": 78.02168674698795, "grad_norm": 3.7408246994018555, "learning_rate": 1.0941747572815535e-05, "loss": 0.1655, "step": 8046 }, { "epoch": 78.03132530120482, "grad_norm": 11.250763893127441, "learning_rate": 1.0936893203883496e-05, "loss": 0.3063, "step": 8047 }, { "epoch": 78.04096385542168, "grad_norm": 5.2554545402526855, "learning_rate": 1.0932038834951456e-05, "loss": 0.1377, "step": 8048 }, { "epoch": 78.05060240963856, "grad_norm": 12.877429008483887, "learning_rate": 1.0927184466019417e-05, "loss": 0.1812, "step": 8049 }, { "epoch": 78.06024096385542, "grad_norm": 12.371763229370117, "learning_rate": 1.0922330097087378e-05, "loss": 0.2184, "step": 8050 }, { "epoch": 78.06987951807228, "grad_norm": 5.57973051071167, "learning_rate": 1.0917475728155341e-05, "loss": 0.2563, "step": 8051 }, { "epoch": 78.07951807228916, "grad_norm": 2.568394660949707, "learning_rate": 1.0912621359223302e-05, "loss": 0.1145, "step": 8052 }, { "epoch": 78.08915662650602, "grad_norm": 3.3784961700439453, "learning_rate": 1.0907766990291263e-05, "loss": 0.2491, "step": 8053 }, { "epoch": 78.09879518072289, "grad_norm": 4.9276442527771, "learning_rate": 1.0902912621359224e-05, "loss": 0.2489, "step": 8054 }, { "epoch": 78.10843373493977, "grad_norm": 7.374259948730469, "learning_rate": 1.0898058252427184e-05, "loss": 0.4049, "step": 8055 }, { "epoch": 78.11807228915663, "grad_norm": 10.684683799743652, "learning_rate": 1.0893203883495145e-05, "loss": 0.2671, "step": 8056 }, { "epoch": 78.12771084337349, "grad_norm": 6.708935737609863, "learning_rate": 1.0888349514563108e-05, "loss": 0.4678, "step": 8057 }, { "epoch": 78.13734939759036, "grad_norm": 3.898582935333252, "learning_rate": 1.0883495145631069e-05, "loss": 0.1637, "step": 8058 }, { "epoch": 78.14698795180723, "grad_norm": 3.6775476932525635, "learning_rate": 1.087864077669903e-05, "loss": 0.2687, "step": 8059 }, { "epoch": 78.1566265060241, "grad_norm": 11.891292572021484, "learning_rate": 1.0873786407766991e-05, "loss": 0.1597, "step": 8060 }, { "epoch": 78.16626506024096, "grad_norm": 2.802706480026245, "learning_rate": 1.0868932038834952e-05, "loss": 0.1909, "step": 8061 }, { "epoch": 78.17590361445784, "grad_norm": 4.60139274597168, "learning_rate": 1.0864077669902912e-05, "loss": 0.2141, "step": 8062 }, { "epoch": 78.1855421686747, "grad_norm": 6.23764705657959, "learning_rate": 1.0859223300970875e-05, "loss": 0.311, "step": 8063 }, { "epoch": 78.19518072289156, "grad_norm": 11.078167915344238, "learning_rate": 1.0854368932038836e-05, "loss": 0.2773, "step": 8064 }, { "epoch": 78.20481927710843, "grad_norm": 7.550094127655029, "learning_rate": 1.0849514563106797e-05, "loss": 0.209, "step": 8065 }, { "epoch": 78.2144578313253, "grad_norm": 5.608719348907471, "learning_rate": 1.0844660194174758e-05, "loss": 0.2268, "step": 8066 }, { "epoch": 78.22409638554217, "grad_norm": 6.2266693115234375, "learning_rate": 1.083980582524272e-05, "loss": 0.5662, "step": 8067 }, { "epoch": 78.23373493975903, "grad_norm": 3.432152032852173, "learning_rate": 1.083495145631068e-05, "loss": 0.2302, "step": 8068 }, { "epoch": 78.24337349397591, "grad_norm": 13.679170608520508, "learning_rate": 1.0830097087378642e-05, "loss": 0.4032, "step": 8069 }, { "epoch": 78.25301204819277, "grad_norm": 16.68718910217285, "learning_rate": 1.0825242718446603e-05, "loss": 0.24, "step": 8070 }, { "epoch": 78.26265060240964, "grad_norm": 7.036933898925781, "learning_rate": 1.0820388349514564e-05, "loss": 0.2643, "step": 8071 }, { "epoch": 78.2722891566265, "grad_norm": 7.175605773925781, "learning_rate": 1.0815533980582525e-05, "loss": 0.398, "step": 8072 }, { "epoch": 78.28192771084338, "grad_norm": 5.720911026000977, "learning_rate": 1.0810679611650486e-05, "loss": 0.2484, "step": 8073 }, { "epoch": 78.29156626506024, "grad_norm": 3.295226812362671, "learning_rate": 1.0805825242718447e-05, "loss": 0.1822, "step": 8074 }, { "epoch": 78.3012048192771, "grad_norm": 4.622705936431885, "learning_rate": 1.0800970873786408e-05, "loss": 0.2249, "step": 8075 }, { "epoch": 78.31084337349398, "grad_norm": 4.823983669281006, "learning_rate": 1.079611650485437e-05, "loss": 0.2336, "step": 8076 }, { "epoch": 78.32048192771084, "grad_norm": 6.022970199584961, "learning_rate": 1.079126213592233e-05, "loss": 0.0806, "step": 8077 }, { "epoch": 78.33012048192771, "grad_norm": 8.326544761657715, "learning_rate": 1.0786407766990292e-05, "loss": 0.3142, "step": 8078 }, { "epoch": 78.33975903614459, "grad_norm": 5.052234172821045, "learning_rate": 1.0781553398058253e-05, "loss": 0.2627, "step": 8079 }, { "epoch": 78.34939759036145, "grad_norm": 3.6208064556121826, "learning_rate": 1.0776699029126214e-05, "loss": 0.2143, "step": 8080 }, { "epoch": 78.35903614457831, "grad_norm": 6.364253044128418, "learning_rate": 1.0771844660194175e-05, "loss": 0.4324, "step": 8081 }, { "epoch": 78.36867469879518, "grad_norm": 4.4692583084106445, "learning_rate": 1.0766990291262136e-05, "loss": 0.1199, "step": 8082 }, { "epoch": 78.37831325301205, "grad_norm": 15.56157112121582, "learning_rate": 1.0762135922330098e-05, "loss": 0.2949, "step": 8083 }, { "epoch": 78.38795180722892, "grad_norm": 9.54498291015625, "learning_rate": 1.0757281553398059e-05, "loss": 0.2164, "step": 8084 }, { "epoch": 78.39759036144578, "grad_norm": 1.6405442953109741, "learning_rate": 1.075242718446602e-05, "loss": 0.0416, "step": 8085 }, { "epoch": 78.40722891566266, "grad_norm": 5.45937967300415, "learning_rate": 1.0747572815533981e-05, "loss": 0.2996, "step": 8086 }, { "epoch": 78.41686746987952, "grad_norm": 3.4709110260009766, "learning_rate": 1.0742718446601942e-05, "loss": 0.2442, "step": 8087 }, { "epoch": 78.42650602409638, "grad_norm": 7.84548807144165, "learning_rate": 1.0737864077669903e-05, "loss": 0.5448, "step": 8088 }, { "epoch": 78.43614457831325, "grad_norm": 14.036880493164062, "learning_rate": 1.0733009708737864e-05, "loss": 0.284, "step": 8089 }, { "epoch": 78.44578313253012, "grad_norm": 5.468597412109375, "learning_rate": 1.0728155339805826e-05, "loss": 0.2035, "step": 8090 }, { "epoch": 78.45542168674699, "grad_norm": 4.430537700653076, "learning_rate": 1.0723300970873787e-05, "loss": 0.221, "step": 8091 }, { "epoch": 78.46506024096385, "grad_norm": 1.783774733543396, "learning_rate": 1.0718446601941748e-05, "loss": 0.1418, "step": 8092 }, { "epoch": 78.47469879518073, "grad_norm": 4.0393967628479, "learning_rate": 1.0713592233009709e-05, "loss": 0.2458, "step": 8093 }, { "epoch": 78.48433734939759, "grad_norm": 7.647762775421143, "learning_rate": 1.070873786407767e-05, "loss": 0.3362, "step": 8094 }, { "epoch": 78.49397590361446, "grad_norm": 15.22749137878418, "learning_rate": 1.0703883495145631e-05, "loss": 0.4949, "step": 8095 }, { "epoch": 78.50361445783132, "grad_norm": 9.414151191711426, "learning_rate": 1.0699029126213592e-05, "loss": 0.2117, "step": 8096 }, { "epoch": 78.5132530120482, "grad_norm": 21.526649475097656, "learning_rate": 1.0694174757281554e-05, "loss": 0.2781, "step": 8097 }, { "epoch": 78.52289156626506, "grad_norm": 7.631666660308838, "learning_rate": 1.0689320388349515e-05, "loss": 0.2639, "step": 8098 }, { "epoch": 78.53253012048192, "grad_norm": 6.4146223068237305, "learning_rate": 1.0684466019417476e-05, "loss": 0.1839, "step": 8099 }, { "epoch": 78.5421686746988, "grad_norm": 5.022739410400391, "learning_rate": 1.0679611650485437e-05, "loss": 0.226, "step": 8100 }, { "epoch": 78.55180722891566, "grad_norm": 4.2991108894348145, "learning_rate": 1.06747572815534e-05, "loss": 0.2401, "step": 8101 }, { "epoch": 78.56144578313253, "grad_norm": 11.690465927124023, "learning_rate": 1.066990291262136e-05, "loss": 0.2365, "step": 8102 }, { "epoch": 78.5710843373494, "grad_norm": 9.346451759338379, "learning_rate": 1.066504854368932e-05, "loss": 0.1486, "step": 8103 }, { "epoch": 78.58072289156627, "grad_norm": 4.144191741943359, "learning_rate": 1.0660194174757282e-05, "loss": 0.3109, "step": 8104 }, { "epoch": 78.59036144578313, "grad_norm": 3.0721848011016846, "learning_rate": 1.0655339805825243e-05, "loss": 0.215, "step": 8105 }, { "epoch": 78.6, "grad_norm": 4.211848258972168, "learning_rate": 1.0650485436893204e-05, "loss": 0.337, "step": 8106 }, { "epoch": 78.60963855421687, "grad_norm": 3.431436538696289, "learning_rate": 1.0645631067961167e-05, "loss": 0.2653, "step": 8107 }, { "epoch": 78.61927710843374, "grad_norm": 18.40499496459961, "learning_rate": 1.0640776699029128e-05, "loss": 0.342, "step": 8108 }, { "epoch": 78.6289156626506, "grad_norm": 4.941100120544434, "learning_rate": 1.0635922330097087e-05, "loss": 0.3514, "step": 8109 }, { "epoch": 78.63855421686748, "grad_norm": 6.058129787445068, "learning_rate": 1.0631067961165048e-05, "loss": 0.1843, "step": 8110 }, { "epoch": 78.64819277108434, "grad_norm": 5.629212379455566, "learning_rate": 1.062621359223301e-05, "loss": 0.273, "step": 8111 }, { "epoch": 78.6578313253012, "grad_norm": 3.0543551445007324, "learning_rate": 1.062135922330097e-05, "loss": 0.1268, "step": 8112 }, { "epoch": 78.66746987951807, "grad_norm": 5.393294334411621, "learning_rate": 1.0616504854368932e-05, "loss": 0.1797, "step": 8113 }, { "epoch": 78.67710843373494, "grad_norm": 5.09151029586792, "learning_rate": 1.0611650485436895e-05, "loss": 0.2203, "step": 8114 }, { "epoch": 78.6867469879518, "grad_norm": 47.47139358520508, "learning_rate": 1.0606796116504856e-05, "loss": 0.1704, "step": 8115 }, { "epoch": 78.69638554216867, "grad_norm": 3.2207908630371094, "learning_rate": 1.0601941747572815e-05, "loss": 0.12, "step": 8116 }, { "epoch": 78.70602409638555, "grad_norm": 7.431762218475342, "learning_rate": 1.0597087378640777e-05, "loss": 0.2064, "step": 8117 }, { "epoch": 78.71566265060241, "grad_norm": 2.6259477138519287, "learning_rate": 1.0592233009708738e-05, "loss": 0.218, "step": 8118 }, { "epoch": 78.72530120481927, "grad_norm": 3.3253517150878906, "learning_rate": 1.0587378640776699e-05, "loss": 0.1337, "step": 8119 }, { "epoch": 78.73493975903614, "grad_norm": 4.463534832000732, "learning_rate": 1.0582524271844662e-05, "loss": 0.3624, "step": 8120 }, { "epoch": 78.74457831325302, "grad_norm": 6.458263397216797, "learning_rate": 1.0577669902912623e-05, "loss": 0.292, "step": 8121 }, { "epoch": 78.75421686746988, "grad_norm": 2.052274227142334, "learning_rate": 1.0572815533980584e-05, "loss": 0.1478, "step": 8122 }, { "epoch": 78.76385542168674, "grad_norm": 6.094509601593018, "learning_rate": 1.0567961165048543e-05, "loss": 0.2355, "step": 8123 }, { "epoch": 78.77349397590362, "grad_norm": 3.340299367904663, "learning_rate": 1.0563106796116505e-05, "loss": 0.1965, "step": 8124 }, { "epoch": 78.78313253012048, "grad_norm": 2.7650206089019775, "learning_rate": 1.0558252427184466e-05, "loss": 0.2403, "step": 8125 }, { "epoch": 78.79277108433735, "grad_norm": 2.0705604553222656, "learning_rate": 1.0553398058252429e-05, "loss": 0.1433, "step": 8126 }, { "epoch": 78.80240963855422, "grad_norm": 3.17543888092041, "learning_rate": 1.054854368932039e-05, "loss": 0.1559, "step": 8127 }, { "epoch": 78.81204819277109, "grad_norm": 6.7730183601379395, "learning_rate": 1.054368932038835e-05, "loss": 0.212, "step": 8128 }, { "epoch": 78.82168674698795, "grad_norm": 2.2574527263641357, "learning_rate": 1.0538834951456312e-05, "loss": 0.0551, "step": 8129 }, { "epoch": 78.83132530120481, "grad_norm": 3.106782913208008, "learning_rate": 1.0533980582524271e-05, "loss": 0.1868, "step": 8130 }, { "epoch": 78.84096385542169, "grad_norm": 16.627344131469727, "learning_rate": 1.0529126213592233e-05, "loss": 0.2799, "step": 8131 }, { "epoch": 78.85060240963855, "grad_norm": 8.252582550048828, "learning_rate": 1.0524271844660194e-05, "loss": 0.2431, "step": 8132 }, { "epoch": 78.86024096385542, "grad_norm": 3.587237596511841, "learning_rate": 1.0519417475728157e-05, "loss": 0.1276, "step": 8133 }, { "epoch": 78.8698795180723, "grad_norm": 5.936932563781738, "learning_rate": 1.0514563106796118e-05, "loss": 0.1067, "step": 8134 }, { "epoch": 78.87951807228916, "grad_norm": 4.079349994659424, "learning_rate": 1.0509708737864079e-05, "loss": 0.2862, "step": 8135 }, { "epoch": 78.88915662650602, "grad_norm": 4.707653522491455, "learning_rate": 1.050485436893204e-05, "loss": 0.2352, "step": 8136 }, { "epoch": 78.89879518072289, "grad_norm": 9.81981086730957, "learning_rate": 1.05e-05, "loss": 0.3203, "step": 8137 }, { "epoch": 78.90843373493976, "grad_norm": 6.294023036956787, "learning_rate": 1.049514563106796e-05, "loss": 0.2513, "step": 8138 }, { "epoch": 78.91807228915663, "grad_norm": 14.115490913391113, "learning_rate": 1.0490291262135923e-05, "loss": 0.3683, "step": 8139 }, { "epoch": 78.92771084337349, "grad_norm": 4.457093715667725, "learning_rate": 1.0485436893203885e-05, "loss": 0.2837, "step": 8140 }, { "epoch": 78.93734939759037, "grad_norm": 2.032036304473877, "learning_rate": 1.0480582524271846e-05, "loss": 0.1027, "step": 8141 }, { "epoch": 78.94698795180723, "grad_norm": 3.3431038856506348, "learning_rate": 1.0475728155339807e-05, "loss": 0.1706, "step": 8142 }, { "epoch": 78.9566265060241, "grad_norm": 3.109645128250122, "learning_rate": 1.0470873786407768e-05, "loss": 0.112, "step": 8143 }, { "epoch": 78.96626506024096, "grad_norm": 6.013323783874512, "learning_rate": 1.0466019417475727e-05, "loss": 0.2178, "step": 8144 }, { "epoch": 78.97590361445783, "grad_norm": 18.786428451538086, "learning_rate": 1.046116504854369e-05, "loss": 0.117, "step": 8145 }, { "epoch": 78.9855421686747, "grad_norm": 5.0629496574401855, "learning_rate": 1.0456310679611651e-05, "loss": 0.2831, "step": 8146 }, { "epoch": 79.0012048192771, "grad_norm": 3.6003990173339844, "learning_rate": 1.0451456310679613e-05, "loss": 0.3715, "step": 8147 }, { "epoch": 79.01084337349397, "grad_norm": 3.288641929626465, "learning_rate": 1.0446601941747574e-05, "loss": 0.1792, "step": 8148 }, { "epoch": 79.02048192771085, "grad_norm": 7.171281337738037, "learning_rate": 1.0441747572815535e-05, "loss": 0.3029, "step": 8149 }, { "epoch": 79.03012048192771, "grad_norm": 4.632438659667969, "learning_rate": 1.0436893203883496e-05, "loss": 0.2402, "step": 8150 }, { "epoch": 79.03975903614457, "grad_norm": 8.872804641723633, "learning_rate": 1.0432038834951455e-05, "loss": 0.3408, "step": 8151 }, { "epoch": 79.04939759036145, "grad_norm": 11.069814682006836, "learning_rate": 1.0427184466019418e-05, "loss": 0.2726, "step": 8152 }, { "epoch": 79.05903614457831, "grad_norm": 3.9480514526367188, "learning_rate": 1.042233009708738e-05, "loss": 0.4259, "step": 8153 }, { "epoch": 79.06867469879518, "grad_norm": 3.703315258026123, "learning_rate": 1.041747572815534e-05, "loss": 0.194, "step": 8154 }, { "epoch": 79.07831325301204, "grad_norm": 4.897498607635498, "learning_rate": 1.0412621359223302e-05, "loss": 0.1488, "step": 8155 }, { "epoch": 79.08795180722892, "grad_norm": 9.447126388549805, "learning_rate": 1.0407766990291263e-05, "loss": 0.3776, "step": 8156 }, { "epoch": 79.09759036144578, "grad_norm": 4.146397113800049, "learning_rate": 1.0402912621359222e-05, "loss": 0.2756, "step": 8157 }, { "epoch": 79.10722891566265, "grad_norm": 1.775147795677185, "learning_rate": 1.0398058252427185e-05, "loss": 0.104, "step": 8158 }, { "epoch": 79.11686746987952, "grad_norm": 3.0618667602539062, "learning_rate": 1.0393203883495146e-05, "loss": 0.193, "step": 8159 }, { "epoch": 79.12650602409639, "grad_norm": 4.116703510284424, "learning_rate": 1.0388349514563107e-05, "loss": 0.2071, "step": 8160 }, { "epoch": 79.13614457831325, "grad_norm": 7.283061504364014, "learning_rate": 1.0383495145631069e-05, "loss": 0.1571, "step": 8161 }, { "epoch": 79.14578313253013, "grad_norm": 3.7163772583007812, "learning_rate": 1.037864077669903e-05, "loss": 0.1794, "step": 8162 }, { "epoch": 79.15542168674699, "grad_norm": 20.764490127563477, "learning_rate": 1.0373786407766991e-05, "loss": 0.4073, "step": 8163 }, { "epoch": 79.16506024096385, "grad_norm": 3.944088935852051, "learning_rate": 1.0368932038834952e-05, "loss": 0.1217, "step": 8164 }, { "epoch": 79.17469879518072, "grad_norm": 4.581742763519287, "learning_rate": 1.0364077669902913e-05, "loss": 0.1643, "step": 8165 }, { "epoch": 79.1843373493976, "grad_norm": 3.2044832706451416, "learning_rate": 1.0359223300970874e-05, "loss": 0.1108, "step": 8166 }, { "epoch": 79.19397590361446, "grad_norm": 5.565596580505371, "learning_rate": 1.0354368932038835e-05, "loss": 0.504, "step": 8167 }, { "epoch": 79.20361445783132, "grad_norm": 6.46153450012207, "learning_rate": 1.0349514563106797e-05, "loss": 0.3769, "step": 8168 }, { "epoch": 79.2132530120482, "grad_norm": 4.172042369842529, "learning_rate": 1.0344660194174758e-05, "loss": 0.2077, "step": 8169 }, { "epoch": 79.22289156626506, "grad_norm": 11.1555757522583, "learning_rate": 1.0339805825242719e-05, "loss": 0.2233, "step": 8170 }, { "epoch": 79.23253012048193, "grad_norm": 5.151177406311035, "learning_rate": 1.033495145631068e-05, "loss": 0.2246, "step": 8171 }, { "epoch": 79.24216867469879, "grad_norm": 7.362760543823242, "learning_rate": 1.0330097087378641e-05, "loss": 0.2792, "step": 8172 }, { "epoch": 79.25180722891567, "grad_norm": 3.2436463832855225, "learning_rate": 1.0325242718446602e-05, "loss": 0.3812, "step": 8173 }, { "epoch": 79.26144578313253, "grad_norm": 6.18414306640625, "learning_rate": 1.0320388349514564e-05, "loss": 0.2205, "step": 8174 }, { "epoch": 79.2710843373494, "grad_norm": 3.5779616832733154, "learning_rate": 1.0315533980582525e-05, "loss": 0.193, "step": 8175 }, { "epoch": 79.28072289156627, "grad_norm": 4.125744819641113, "learning_rate": 1.0310679611650486e-05, "loss": 0.4062, "step": 8176 }, { "epoch": 79.29036144578313, "grad_norm": 3.3138973712921143, "learning_rate": 1.0305825242718447e-05, "loss": 0.2193, "step": 8177 }, { "epoch": 79.3, "grad_norm": 7.446119785308838, "learning_rate": 1.0300970873786408e-05, "loss": 0.4692, "step": 8178 }, { "epoch": 79.30963855421686, "grad_norm": 4.1768035888671875, "learning_rate": 1.029611650485437e-05, "loss": 0.3454, "step": 8179 }, { "epoch": 79.31927710843374, "grad_norm": 5.656885147094727, "learning_rate": 1.029126213592233e-05, "loss": 0.3131, "step": 8180 }, { "epoch": 79.3289156626506, "grad_norm": 23.38660430908203, "learning_rate": 1.0286407766990292e-05, "loss": 0.2551, "step": 8181 }, { "epoch": 79.33855421686746, "grad_norm": 12.85179615020752, "learning_rate": 1.0281553398058253e-05, "loss": 0.4218, "step": 8182 }, { "epoch": 79.34819277108434, "grad_norm": 2.648590326309204, "learning_rate": 1.0276699029126216e-05, "loss": 0.0844, "step": 8183 }, { "epoch": 79.3578313253012, "grad_norm": 4.958404541015625, "learning_rate": 1.0271844660194175e-05, "loss": 0.3501, "step": 8184 }, { "epoch": 79.36746987951807, "grad_norm": 3.5158846378326416, "learning_rate": 1.0266990291262136e-05, "loss": 0.0802, "step": 8185 }, { "epoch": 79.37710843373495, "grad_norm": 3.918539047241211, "learning_rate": 1.0262135922330097e-05, "loss": 0.2355, "step": 8186 }, { "epoch": 79.38674698795181, "grad_norm": 5.646690845489502, "learning_rate": 1.0257281553398058e-05, "loss": 0.183, "step": 8187 }, { "epoch": 79.39638554216867, "grad_norm": 2.462772846221924, "learning_rate": 1.025242718446602e-05, "loss": 0.2705, "step": 8188 }, { "epoch": 79.40602409638554, "grad_norm": 7.168371200561523, "learning_rate": 1.0247572815533982e-05, "loss": 0.3128, "step": 8189 }, { "epoch": 79.41566265060241, "grad_norm": 14.026119232177734, "learning_rate": 1.0242718446601944e-05, "loss": 0.4801, "step": 8190 }, { "epoch": 79.42530120481928, "grad_norm": 10.740985870361328, "learning_rate": 1.0237864077669903e-05, "loss": 0.4327, "step": 8191 }, { "epoch": 79.43493975903614, "grad_norm": 6.012925148010254, "learning_rate": 1.0233009708737864e-05, "loss": 0.3503, "step": 8192 }, { "epoch": 79.44457831325302, "grad_norm": 4.383010387420654, "learning_rate": 1.0228155339805825e-05, "loss": 0.1715, "step": 8193 }, { "epoch": 79.45421686746988, "grad_norm": 3.9923436641693115, "learning_rate": 1.0223300970873786e-05, "loss": 0.1333, "step": 8194 }, { "epoch": 79.46385542168674, "grad_norm": 4.7572126388549805, "learning_rate": 1.0218446601941748e-05, "loss": 0.3006, "step": 8195 }, { "epoch": 79.47349397590361, "grad_norm": 4.253437042236328, "learning_rate": 1.021359223300971e-05, "loss": 0.2222, "step": 8196 }, { "epoch": 79.48313253012049, "grad_norm": 3.2751269340515137, "learning_rate": 1.020873786407767e-05, "loss": 0.1788, "step": 8197 }, { "epoch": 79.49277108433735, "grad_norm": 4.8893585205078125, "learning_rate": 1.0203883495145631e-05, "loss": 0.2163, "step": 8198 }, { "epoch": 79.50240963855421, "grad_norm": 3.3732545375823975, "learning_rate": 1.0199029126213592e-05, "loss": 0.1933, "step": 8199 }, { "epoch": 79.51204819277109, "grad_norm": 15.597807884216309, "learning_rate": 1.0194174757281553e-05, "loss": 0.3639, "step": 8200 }, { "epoch": 79.52168674698795, "grad_norm": 4.1612982749938965, "learning_rate": 1.0189320388349514e-05, "loss": 0.2982, "step": 8201 }, { "epoch": 79.53132530120482, "grad_norm": 4.874331474304199, "learning_rate": 1.0184466019417477e-05, "loss": 0.3145, "step": 8202 }, { "epoch": 79.54096385542168, "grad_norm": 7.8563055992126465, "learning_rate": 1.0179611650485438e-05, "loss": 0.2527, "step": 8203 }, { "epoch": 79.55060240963856, "grad_norm": 4.566743850708008, "learning_rate": 1.0174757281553398e-05, "loss": 0.2331, "step": 8204 }, { "epoch": 79.56024096385542, "grad_norm": 1.6426022052764893, "learning_rate": 1.0169902912621359e-05, "loss": 0.075, "step": 8205 }, { "epoch": 79.56987951807228, "grad_norm": 5.953847885131836, "learning_rate": 1.016504854368932e-05, "loss": 0.5817, "step": 8206 }, { "epoch": 79.57951807228916, "grad_norm": 3.221167802810669, "learning_rate": 1.0160194174757281e-05, "loss": 0.1449, "step": 8207 }, { "epoch": 79.58915662650602, "grad_norm": 5.348805904388428, "learning_rate": 1.0155339805825244e-05, "loss": 0.1269, "step": 8208 }, { "epoch": 79.59879518072289, "grad_norm": 3.181718587875366, "learning_rate": 1.0150485436893205e-05, "loss": 0.1396, "step": 8209 }, { "epoch": 79.60843373493977, "grad_norm": 3.4434773921966553, "learning_rate": 1.0145631067961166e-05, "loss": 0.2039, "step": 8210 }, { "epoch": 79.61807228915663, "grad_norm": 4.722810745239258, "learning_rate": 1.0140776699029126e-05, "loss": 0.1219, "step": 8211 }, { "epoch": 79.62771084337349, "grad_norm": 7.634931564331055, "learning_rate": 1.0135922330097087e-05, "loss": 0.2869, "step": 8212 }, { "epoch": 79.63734939759036, "grad_norm": 21.11284828186035, "learning_rate": 1.0131067961165048e-05, "loss": 0.342, "step": 8213 }, { "epoch": 79.64698795180723, "grad_norm": 8.529373168945312, "learning_rate": 1.012621359223301e-05, "loss": 0.2768, "step": 8214 }, { "epoch": 79.6566265060241, "grad_norm": 3.4386065006256104, "learning_rate": 1.0121359223300972e-05, "loss": 0.1634, "step": 8215 }, { "epoch": 79.66626506024096, "grad_norm": 6.984149932861328, "learning_rate": 1.0116504854368933e-05, "loss": 0.4141, "step": 8216 }, { "epoch": 79.67590361445784, "grad_norm": 11.46200942993164, "learning_rate": 1.0111650485436894e-05, "loss": 0.2265, "step": 8217 }, { "epoch": 79.6855421686747, "grad_norm": 2.4697000980377197, "learning_rate": 1.0106796116504854e-05, "loss": 0.1597, "step": 8218 }, { "epoch": 79.69518072289156, "grad_norm": 4.132386684417725, "learning_rate": 1.0101941747572815e-05, "loss": 0.2184, "step": 8219 }, { "epoch": 79.70481927710843, "grad_norm": 27.1852970123291, "learning_rate": 1.0097087378640776e-05, "loss": 0.3889, "step": 8220 }, { "epoch": 79.7144578313253, "grad_norm": 4.0137038230896, "learning_rate": 1.0092233009708739e-05, "loss": 0.2981, "step": 8221 }, { "epoch": 79.72409638554217, "grad_norm": 5.215389251708984, "learning_rate": 1.00873786407767e-05, "loss": 0.1542, "step": 8222 }, { "epoch": 79.73373493975903, "grad_norm": 7.2493133544921875, "learning_rate": 1.0082524271844661e-05, "loss": 0.2491, "step": 8223 }, { "epoch": 79.74337349397591, "grad_norm": 5.339035511016846, "learning_rate": 1.0077669902912622e-05, "loss": 0.1746, "step": 8224 }, { "epoch": 79.75301204819277, "grad_norm": 11.015357971191406, "learning_rate": 1.0072815533980582e-05, "loss": 0.2343, "step": 8225 }, { "epoch": 79.76265060240964, "grad_norm": 5.287997722625732, "learning_rate": 1.0067961165048543e-05, "loss": 0.2677, "step": 8226 }, { "epoch": 79.7722891566265, "grad_norm": 5.532532215118408, "learning_rate": 1.0063106796116506e-05, "loss": 0.3243, "step": 8227 }, { "epoch": 79.78192771084338, "grad_norm": 4.4476423263549805, "learning_rate": 1.0058252427184467e-05, "loss": 0.1809, "step": 8228 }, { "epoch": 79.79156626506024, "grad_norm": 6.9415178298950195, "learning_rate": 1.0053398058252428e-05, "loss": 0.186, "step": 8229 }, { "epoch": 79.8012048192771, "grad_norm": 5.5784711837768555, "learning_rate": 1.004854368932039e-05, "loss": 0.1227, "step": 8230 }, { "epoch": 79.81084337349398, "grad_norm": 4.246029853820801, "learning_rate": 1.004368932038835e-05, "loss": 0.2244, "step": 8231 }, { "epoch": 79.82048192771084, "grad_norm": 8.147411346435547, "learning_rate": 1.003883495145631e-05, "loss": 0.1836, "step": 8232 }, { "epoch": 79.83012048192771, "grad_norm": 5.121731281280518, "learning_rate": 1.0033980582524271e-05, "loss": 0.2063, "step": 8233 }, { "epoch": 79.83975903614459, "grad_norm": 4.0179219245910645, "learning_rate": 1.0029126213592234e-05, "loss": 0.2087, "step": 8234 }, { "epoch": 79.84939759036145, "grad_norm": 6.300220489501953, "learning_rate": 1.0024271844660195e-05, "loss": 0.2409, "step": 8235 }, { "epoch": 79.85903614457831, "grad_norm": 4.8645124435424805, "learning_rate": 1.0019417475728156e-05, "loss": 0.2851, "step": 8236 }, { "epoch": 79.86867469879518, "grad_norm": 3.3371925354003906, "learning_rate": 1.0014563106796117e-05, "loss": 0.2481, "step": 8237 }, { "epoch": 79.87831325301205, "grad_norm": 3.316214084625244, "learning_rate": 1.0009708737864079e-05, "loss": 0.0822, "step": 8238 }, { "epoch": 79.88795180722892, "grad_norm": 2.55379581451416, "learning_rate": 1.0004854368932038e-05, "loss": 0.1539, "step": 8239 }, { "epoch": 79.89759036144578, "grad_norm": 2.576063871383667, "learning_rate": 1e-05, "loss": 0.1663, "step": 8240 }, { "epoch": 79.90722891566266, "grad_norm": 5.335185527801514, "learning_rate": 9.995145631067962e-06, "loss": 0.2755, "step": 8241 }, { "epoch": 79.91686746987952, "grad_norm": 4.359859466552734, "learning_rate": 9.990291262135923e-06, "loss": 0.1317, "step": 8242 }, { "epoch": 79.92650602409638, "grad_norm": 4.094494342803955, "learning_rate": 9.985436893203884e-06, "loss": 0.3005, "step": 8243 }, { "epoch": 79.93614457831325, "grad_norm": 3.702623128890991, "learning_rate": 9.980582524271845e-06, "loss": 0.26, "step": 8244 }, { "epoch": 79.94578313253012, "grad_norm": 2.668525218963623, "learning_rate": 9.975728155339807e-06, "loss": 0.2097, "step": 8245 }, { "epoch": 79.95542168674699, "grad_norm": 2.2337076663970947, "learning_rate": 9.970873786407768e-06, "loss": 0.1628, "step": 8246 }, { "epoch": 79.96506024096385, "grad_norm": 3.875018358230591, "learning_rate": 9.966019417475729e-06, "loss": 0.1738, "step": 8247 }, { "epoch": 79.97469879518073, "grad_norm": 7.751603126525879, "learning_rate": 9.96116504854369e-06, "loss": 0.2718, "step": 8248 }, { "epoch": 79.98433734939759, "grad_norm": 2.661510467529297, "learning_rate": 9.956310679611651e-06, "loss": 0.2242, "step": 8249 }, { "epoch": 79.99397590361446, "grad_norm": 3.0753228664398193, "learning_rate": 9.951456310679612e-06, "loss": 0.117, "step": 8250 }, { "epoch": 80.00963855421686, "grad_norm": 7.634326934814453, "learning_rate": 9.946601941747573e-06, "loss": 0.3433, "step": 8251 }, { "epoch": 80.01927710843374, "grad_norm": 3.927312135696411, "learning_rate": 9.941747572815535e-06, "loss": 0.1905, "step": 8252 }, { "epoch": 80.0289156626506, "grad_norm": 3.852287530899048, "learning_rate": 9.936893203883496e-06, "loss": 0.2493, "step": 8253 }, { "epoch": 80.03855421686747, "grad_norm": 4.703474998474121, "learning_rate": 9.932038834951457e-06, "loss": 0.164, "step": 8254 }, { "epoch": 80.04819277108433, "grad_norm": 3.8975980281829834, "learning_rate": 9.927184466019418e-06, "loss": 0.1507, "step": 8255 }, { "epoch": 80.05783132530121, "grad_norm": 5.285161018371582, "learning_rate": 9.922330097087379e-06, "loss": 0.2, "step": 8256 }, { "epoch": 80.06746987951807, "grad_norm": 5.979661464691162, "learning_rate": 9.91747572815534e-06, "loss": 0.2966, "step": 8257 }, { "epoch": 80.07710843373494, "grad_norm": 11.76790714263916, "learning_rate": 9.912621359223301e-06, "loss": 0.2178, "step": 8258 }, { "epoch": 80.08674698795181, "grad_norm": 4.2500810623168945, "learning_rate": 9.907766990291263e-06, "loss": 0.1407, "step": 8259 }, { "epoch": 80.09638554216868, "grad_norm": 5.793179988861084, "learning_rate": 9.902912621359224e-06, "loss": 0.1835, "step": 8260 }, { "epoch": 80.10602409638554, "grad_norm": 5.871501922607422, "learning_rate": 9.898058252427185e-06, "loss": 0.2814, "step": 8261 }, { "epoch": 80.1156626506024, "grad_norm": 6.247763156890869, "learning_rate": 9.893203883495146e-06, "loss": 0.4378, "step": 8262 }, { "epoch": 80.12530120481928, "grad_norm": 2.88136625289917, "learning_rate": 9.888349514563107e-06, "loss": 0.1432, "step": 8263 }, { "epoch": 80.13493975903614, "grad_norm": 3.096651554107666, "learning_rate": 9.883495145631068e-06, "loss": 0.0799, "step": 8264 }, { "epoch": 80.144578313253, "grad_norm": 7.230511665344238, "learning_rate": 9.87864077669903e-06, "loss": 0.1838, "step": 8265 }, { "epoch": 80.15421686746988, "grad_norm": 3.9123899936676025, "learning_rate": 9.87378640776699e-06, "loss": 0.1793, "step": 8266 }, { "epoch": 80.16385542168675, "grad_norm": 6.9654717445373535, "learning_rate": 9.868932038834952e-06, "loss": 0.4323, "step": 8267 }, { "epoch": 80.17349397590361, "grad_norm": 6.093751430511475, "learning_rate": 9.864077669902913e-06, "loss": 0.3783, "step": 8268 }, { "epoch": 80.18313253012049, "grad_norm": 6.113009929656982, "learning_rate": 9.859223300970874e-06, "loss": 0.2867, "step": 8269 }, { "epoch": 80.19277108433735, "grad_norm": 4.160780429840088, "learning_rate": 9.854368932038835e-06, "loss": 0.2568, "step": 8270 }, { "epoch": 80.20240963855422, "grad_norm": 4.27778959274292, "learning_rate": 9.849514563106798e-06, "loss": 0.2185, "step": 8271 }, { "epoch": 80.21204819277108, "grad_norm": 5.034385681152344, "learning_rate": 9.844660194174757e-06, "loss": 0.3155, "step": 8272 }, { "epoch": 80.22168674698796, "grad_norm": 5.240912437438965, "learning_rate": 9.839805825242719e-06, "loss": 0.2537, "step": 8273 }, { "epoch": 80.23132530120482, "grad_norm": 6.828060150146484, "learning_rate": 9.83495145631068e-06, "loss": 0.2879, "step": 8274 }, { "epoch": 80.24096385542168, "grad_norm": 5.466142654418945, "learning_rate": 9.830097087378641e-06, "loss": 0.2505, "step": 8275 }, { "epoch": 80.25060240963856, "grad_norm": 7.563738822937012, "learning_rate": 9.825242718446602e-06, "loss": 0.1967, "step": 8276 }, { "epoch": 80.26024096385542, "grad_norm": 16.637832641601562, "learning_rate": 9.820388349514563e-06, "loss": 0.2213, "step": 8277 }, { "epoch": 80.26987951807229, "grad_norm": 4.066741943359375, "learning_rate": 9.815533980582526e-06, "loss": 0.1376, "step": 8278 }, { "epoch": 80.27951807228915, "grad_norm": 4.628542423248291, "learning_rate": 9.810679611650486e-06, "loss": 0.2688, "step": 8279 }, { "epoch": 80.28915662650603, "grad_norm": 4.586380958557129, "learning_rate": 9.805825242718447e-06, "loss": 0.2875, "step": 8280 }, { "epoch": 80.29879518072289, "grad_norm": 5.043645858764648, "learning_rate": 9.800970873786408e-06, "loss": 0.2442, "step": 8281 }, { "epoch": 80.30843373493975, "grad_norm": 6.967073440551758, "learning_rate": 9.796116504854369e-06, "loss": 0.1643, "step": 8282 }, { "epoch": 80.31807228915663, "grad_norm": 4.7602715492248535, "learning_rate": 9.79126213592233e-06, "loss": 0.0931, "step": 8283 }, { "epoch": 80.3277108433735, "grad_norm": 4.7427496910095215, "learning_rate": 9.786407766990293e-06, "loss": 0.261, "step": 8284 }, { "epoch": 80.33734939759036, "grad_norm": 6.136763572692871, "learning_rate": 9.781553398058254e-06, "loss": 0.1762, "step": 8285 }, { "epoch": 80.34698795180722, "grad_norm": 5.30985689163208, "learning_rate": 9.776699029126214e-06, "loss": 0.2899, "step": 8286 }, { "epoch": 80.3566265060241, "grad_norm": 2.4252002239227295, "learning_rate": 9.771844660194175e-06, "loss": 0.1591, "step": 8287 }, { "epoch": 80.36626506024096, "grad_norm": 4.86883020401001, "learning_rate": 9.766990291262136e-06, "loss": 0.2169, "step": 8288 }, { "epoch": 80.37590361445783, "grad_norm": 10.509451866149902, "learning_rate": 9.762135922330097e-06, "loss": 0.3498, "step": 8289 }, { "epoch": 80.3855421686747, "grad_norm": 5.740994453430176, "learning_rate": 9.75728155339806e-06, "loss": 0.2102, "step": 8290 }, { "epoch": 80.39518072289157, "grad_norm": 5.679507732391357, "learning_rate": 9.752427184466021e-06, "loss": 0.1218, "step": 8291 }, { "epoch": 80.40481927710843, "grad_norm": 3.896580696105957, "learning_rate": 9.747572815533982e-06, "loss": 0.2595, "step": 8292 }, { "epoch": 80.41445783132531, "grad_norm": 5.966920375823975, "learning_rate": 9.742718446601942e-06, "loss": 0.267, "step": 8293 }, { "epoch": 80.42409638554217, "grad_norm": 6.384464263916016, "learning_rate": 9.737864077669903e-06, "loss": 0.2053, "step": 8294 }, { "epoch": 80.43373493975903, "grad_norm": 5.4510178565979, "learning_rate": 9.733009708737864e-06, "loss": 0.3164, "step": 8295 }, { "epoch": 80.4433734939759, "grad_norm": 9.702303886413574, "learning_rate": 9.728155339805825e-06, "loss": 0.2464, "step": 8296 }, { "epoch": 80.45301204819278, "grad_norm": 6.060948848724365, "learning_rate": 9.723300970873788e-06, "loss": 0.1379, "step": 8297 }, { "epoch": 80.46265060240964, "grad_norm": 3.8976891040802, "learning_rate": 9.718446601941749e-06, "loss": 0.2063, "step": 8298 }, { "epoch": 80.4722891566265, "grad_norm": 7.964089870452881, "learning_rate": 9.71359223300971e-06, "loss": 0.2689, "step": 8299 }, { "epoch": 80.48192771084338, "grad_norm": 3.1308038234710693, "learning_rate": 9.70873786407767e-06, "loss": 0.0827, "step": 8300 }, { "epoch": 80.49156626506024, "grad_norm": 4.361719608306885, "learning_rate": 9.70388349514563e-06, "loss": 0.4151, "step": 8301 }, { "epoch": 80.5012048192771, "grad_norm": 5.2600908279418945, "learning_rate": 9.699029126213592e-06, "loss": 0.2193, "step": 8302 }, { "epoch": 80.51084337349397, "grad_norm": 9.664650917053223, "learning_rate": 9.694174757281555e-06, "loss": 0.2581, "step": 8303 }, { "epoch": 80.52048192771085, "grad_norm": 5.529742240905762, "learning_rate": 9.689320388349516e-06, "loss": 0.1743, "step": 8304 }, { "epoch": 80.53012048192771, "grad_norm": 6.093108177185059, "learning_rate": 9.684466019417477e-06, "loss": 0.2801, "step": 8305 }, { "epoch": 80.53975903614457, "grad_norm": 11.816067695617676, "learning_rate": 9.679611650485436e-06, "loss": 0.4695, "step": 8306 }, { "epoch": 80.54939759036145, "grad_norm": 4.005472660064697, "learning_rate": 9.674757281553398e-06, "loss": 0.242, "step": 8307 }, { "epoch": 80.55903614457831, "grad_norm": 5.0937628746032715, "learning_rate": 9.669902912621359e-06, "loss": 0.2609, "step": 8308 }, { "epoch": 80.56867469879518, "grad_norm": 9.615945816040039, "learning_rate": 9.665048543689322e-06, "loss": 0.2521, "step": 8309 }, { "epoch": 80.57831325301204, "grad_norm": 2.246129035949707, "learning_rate": 9.660194174757283e-06, "loss": 0.1372, "step": 8310 }, { "epoch": 80.58795180722892, "grad_norm": 6.111739158630371, "learning_rate": 9.655339805825244e-06, "loss": 0.1604, "step": 8311 }, { "epoch": 80.59759036144578, "grad_norm": 6.562386512756348, "learning_rate": 9.650485436893205e-06, "loss": 0.4219, "step": 8312 }, { "epoch": 80.60722891566265, "grad_norm": 7.9265899658203125, "learning_rate": 9.645631067961164e-06, "loss": 0.4581, "step": 8313 }, { "epoch": 80.61686746987952, "grad_norm": 2.9972856044769287, "learning_rate": 9.640776699029126e-06, "loss": 0.1527, "step": 8314 }, { "epoch": 80.62650602409639, "grad_norm": 5.670166969299316, "learning_rate": 9.635922330097087e-06, "loss": 0.2008, "step": 8315 }, { "epoch": 80.63614457831325, "grad_norm": 3.2616381645202637, "learning_rate": 9.63106796116505e-06, "loss": 0.1154, "step": 8316 }, { "epoch": 80.64578313253013, "grad_norm": 4.585093975067139, "learning_rate": 9.62621359223301e-06, "loss": 0.2137, "step": 8317 }, { "epoch": 80.65542168674699, "grad_norm": 5.506716251373291, "learning_rate": 9.621359223300972e-06, "loss": 0.4012, "step": 8318 }, { "epoch": 80.66506024096385, "grad_norm": 3.1682608127593994, "learning_rate": 9.616504854368933e-06, "loss": 0.1456, "step": 8319 }, { "epoch": 80.67469879518072, "grad_norm": 4.664010524749756, "learning_rate": 9.611650485436892e-06, "loss": 0.141, "step": 8320 }, { "epoch": 80.6843373493976, "grad_norm": 5.788939476013184, "learning_rate": 9.606796116504854e-06, "loss": 0.211, "step": 8321 }, { "epoch": 80.69397590361446, "grad_norm": 2.0256848335266113, "learning_rate": 9.601941747572816e-06, "loss": 0.1646, "step": 8322 }, { "epoch": 80.70361445783132, "grad_norm": 18.238460540771484, "learning_rate": 9.597087378640778e-06, "loss": 0.2336, "step": 8323 }, { "epoch": 80.7132530120482, "grad_norm": 6.119390487670898, "learning_rate": 9.592233009708739e-06, "loss": 0.2173, "step": 8324 }, { "epoch": 80.72289156626506, "grad_norm": 10.69161319732666, "learning_rate": 9.5873786407767e-06, "loss": 0.1569, "step": 8325 }, { "epoch": 80.73253012048193, "grad_norm": 3.352313756942749, "learning_rate": 9.582524271844661e-06, "loss": 0.1899, "step": 8326 }, { "epoch": 80.74216867469879, "grad_norm": 4.485864639282227, "learning_rate": 9.57766990291262e-06, "loss": 0.228, "step": 8327 }, { "epoch": 80.75180722891567, "grad_norm": 7.078031063079834, "learning_rate": 9.572815533980583e-06, "loss": 0.1665, "step": 8328 }, { "epoch": 80.76144578313253, "grad_norm": 5.7333831787109375, "learning_rate": 9.567961165048544e-06, "loss": 0.1815, "step": 8329 }, { "epoch": 80.7710843373494, "grad_norm": 6.620950698852539, "learning_rate": 9.563106796116506e-06, "loss": 0.3737, "step": 8330 }, { "epoch": 80.78072289156627, "grad_norm": 4.0599541664123535, "learning_rate": 9.558252427184467e-06, "loss": 0.2706, "step": 8331 }, { "epoch": 80.79036144578313, "grad_norm": 4.65987491607666, "learning_rate": 9.553398058252428e-06, "loss": 0.1683, "step": 8332 }, { "epoch": 80.8, "grad_norm": 5.146819114685059, "learning_rate": 9.548543689320389e-06, "loss": 0.1762, "step": 8333 }, { "epoch": 80.80963855421686, "grad_norm": 5.385499477386475, "learning_rate": 9.543689320388349e-06, "loss": 0.2299, "step": 8334 }, { "epoch": 80.81927710843374, "grad_norm": 4.400372505187988, "learning_rate": 9.538834951456311e-06, "loss": 0.2011, "step": 8335 }, { "epoch": 80.8289156626506, "grad_norm": 4.137125015258789, "learning_rate": 9.533980582524273e-06, "loss": 0.2042, "step": 8336 }, { "epoch": 80.83855421686746, "grad_norm": 6.702450752258301, "learning_rate": 9.529126213592234e-06, "loss": 0.1592, "step": 8337 }, { "epoch": 80.84819277108434, "grad_norm": 11.344932556152344, "learning_rate": 9.524271844660195e-06, "loss": 0.2958, "step": 8338 }, { "epoch": 80.8578313253012, "grad_norm": 5.7635955810546875, "learning_rate": 9.519417475728156e-06, "loss": 0.2398, "step": 8339 }, { "epoch": 80.86746987951807, "grad_norm": 1.7630761861801147, "learning_rate": 9.514563106796117e-06, "loss": 0.1016, "step": 8340 }, { "epoch": 80.87710843373495, "grad_norm": 7.961099147796631, "learning_rate": 9.509708737864078e-06, "loss": 0.5344, "step": 8341 }, { "epoch": 80.88674698795181, "grad_norm": 5.9422383308410645, "learning_rate": 9.50485436893204e-06, "loss": 0.2932, "step": 8342 }, { "epoch": 80.89638554216867, "grad_norm": 4.017196178436279, "learning_rate": 9.5e-06, "loss": 0.1107, "step": 8343 }, { "epoch": 80.90602409638554, "grad_norm": 5.175506114959717, "learning_rate": 9.495145631067962e-06, "loss": 0.1429, "step": 8344 }, { "epoch": 80.91566265060241, "grad_norm": 4.222281455993652, "learning_rate": 9.490291262135923e-06, "loss": 0.148, "step": 8345 }, { "epoch": 80.92530120481928, "grad_norm": 4.962661266326904, "learning_rate": 9.485436893203884e-06, "loss": 0.1777, "step": 8346 }, { "epoch": 80.93493975903614, "grad_norm": 12.005940437316895, "learning_rate": 9.480582524271845e-06, "loss": 0.1608, "step": 8347 }, { "epoch": 80.94457831325302, "grad_norm": 5.722222328186035, "learning_rate": 9.475728155339806e-06, "loss": 0.1563, "step": 8348 }, { "epoch": 80.95421686746988, "grad_norm": 8.274589538574219, "learning_rate": 9.470873786407767e-06, "loss": 0.1861, "step": 8349 }, { "epoch": 80.96385542168674, "grad_norm": 4.819894790649414, "learning_rate": 9.466019417475729e-06, "loss": 0.1091, "step": 8350 }, { "epoch": 80.97349397590361, "grad_norm": 1.6539700031280518, "learning_rate": 9.46116504854369e-06, "loss": 0.066, "step": 8351 }, { "epoch": 80.98313253012049, "grad_norm": 5.366960048675537, "learning_rate": 9.45631067961165e-06, "loss": 0.1909, "step": 8352 }, { "epoch": 80.99277108433735, "grad_norm": 5.282814025878906, "learning_rate": 9.451456310679612e-06, "loss": 0.4246, "step": 8353 }, { "epoch": 81.00843373493976, "grad_norm": 3.6023824214935303, "learning_rate": 9.446601941747573e-06, "loss": 0.0974, "step": 8354 }, { "epoch": 81.01807228915662, "grad_norm": 7.192405700683594, "learning_rate": 9.441747572815534e-06, "loss": 0.1902, "step": 8355 }, { "epoch": 81.0277108433735, "grad_norm": 15.557638168334961, "learning_rate": 9.436893203883495e-06, "loss": 0.1966, "step": 8356 }, { "epoch": 81.03734939759036, "grad_norm": 4.368383407592773, "learning_rate": 9.432038834951457e-06, "loss": 0.2179, "step": 8357 }, { "epoch": 81.04698795180722, "grad_norm": 14.957863807678223, "learning_rate": 9.427184466019418e-06, "loss": 0.1928, "step": 8358 }, { "epoch": 81.0566265060241, "grad_norm": 9.930399894714355, "learning_rate": 9.422330097087379e-06, "loss": 0.4355, "step": 8359 }, { "epoch": 81.06626506024097, "grad_norm": 3.2460031509399414, "learning_rate": 9.41747572815534e-06, "loss": 0.3241, "step": 8360 }, { "epoch": 81.07590361445783, "grad_norm": 11.186789512634277, "learning_rate": 9.412621359223301e-06, "loss": 0.1443, "step": 8361 }, { "epoch": 81.08554216867469, "grad_norm": 4.26484489440918, "learning_rate": 9.407766990291262e-06, "loss": 0.2218, "step": 8362 }, { "epoch": 81.09518072289157, "grad_norm": 22.039783477783203, "learning_rate": 9.402912621359223e-06, "loss": 0.3253, "step": 8363 }, { "epoch": 81.10481927710843, "grad_norm": 7.690617084503174, "learning_rate": 9.398058252427185e-06, "loss": 0.4511, "step": 8364 }, { "epoch": 81.1144578313253, "grad_norm": 41.133766174316406, "learning_rate": 9.393203883495146e-06, "loss": 0.3794, "step": 8365 }, { "epoch": 81.12409638554217, "grad_norm": 14.02897834777832, "learning_rate": 9.388349514563109e-06, "loss": 0.275, "step": 8366 }, { "epoch": 81.13373493975904, "grad_norm": 2.785370349884033, "learning_rate": 9.383495145631068e-06, "loss": 0.1921, "step": 8367 }, { "epoch": 81.1433734939759, "grad_norm": 1.105489730834961, "learning_rate": 9.37864077669903e-06, "loss": 0.0924, "step": 8368 }, { "epoch": 81.15301204819278, "grad_norm": 5.964929103851318, "learning_rate": 9.37378640776699e-06, "loss": 0.1582, "step": 8369 }, { "epoch": 81.16265060240964, "grad_norm": 5.545677661895752, "learning_rate": 9.368932038834951e-06, "loss": 0.1994, "step": 8370 }, { "epoch": 81.1722891566265, "grad_norm": 15.876132011413574, "learning_rate": 9.364077669902913e-06, "loss": 0.0847, "step": 8371 }, { "epoch": 81.18192771084337, "grad_norm": 18.92386245727539, "learning_rate": 9.359223300970875e-06, "loss": 0.3899, "step": 8372 }, { "epoch": 81.19156626506025, "grad_norm": 10.682351112365723, "learning_rate": 9.354368932038837e-06, "loss": 0.2483, "step": 8373 }, { "epoch": 81.20120481927711, "grad_norm": 3.247563362121582, "learning_rate": 9.349514563106796e-06, "loss": 0.25, "step": 8374 }, { "epoch": 81.21084337349397, "grad_norm": 8.061240196228027, "learning_rate": 9.344660194174757e-06, "loss": 0.2659, "step": 8375 }, { "epoch": 81.22048192771085, "grad_norm": 18.146703720092773, "learning_rate": 9.339805825242718e-06, "loss": 0.4271, "step": 8376 }, { "epoch": 81.23012048192771, "grad_norm": 2.274855852127075, "learning_rate": 9.33495145631068e-06, "loss": 0.1338, "step": 8377 }, { "epoch": 81.23975903614458, "grad_norm": 6.944721221923828, "learning_rate": 9.33009708737864e-06, "loss": 0.1291, "step": 8378 }, { "epoch": 81.24939759036144, "grad_norm": 9.96254825592041, "learning_rate": 9.325242718446603e-06, "loss": 0.276, "step": 8379 }, { "epoch": 81.25903614457832, "grad_norm": 8.449057579040527, "learning_rate": 9.320388349514565e-06, "loss": 0.1573, "step": 8380 }, { "epoch": 81.26867469879518, "grad_norm": 2.676598072052002, "learning_rate": 9.315533980582524e-06, "loss": 0.0922, "step": 8381 }, { "epoch": 81.27831325301204, "grad_norm": 6.547378063201904, "learning_rate": 9.310679611650485e-06, "loss": 0.1233, "step": 8382 }, { "epoch": 81.28795180722892, "grad_norm": 10.9637451171875, "learning_rate": 9.305825242718446e-06, "loss": 0.308, "step": 8383 }, { "epoch": 81.29759036144578, "grad_norm": 18.19309425354004, "learning_rate": 9.300970873786407e-06, "loss": 0.0833, "step": 8384 }, { "epoch": 81.30722891566265, "grad_norm": 19.302831649780273, "learning_rate": 9.29611650485437e-06, "loss": 0.2116, "step": 8385 }, { "epoch": 81.31686746987951, "grad_norm": 1.4254310131072998, "learning_rate": 9.291262135922331e-06, "loss": 0.2199, "step": 8386 }, { "epoch": 81.32650602409639, "grad_norm": 11.09190845489502, "learning_rate": 9.286407766990293e-06, "loss": 0.2127, "step": 8387 }, { "epoch": 81.33614457831325, "grad_norm": 15.883523941040039, "learning_rate": 9.281553398058252e-06, "loss": 0.2884, "step": 8388 }, { "epoch": 81.34578313253012, "grad_norm": 15.769649505615234, "learning_rate": 9.276699029126213e-06, "loss": 0.5046, "step": 8389 }, { "epoch": 81.355421686747, "grad_norm": 4.2813639640808105, "learning_rate": 9.271844660194174e-06, "loss": 0.1582, "step": 8390 }, { "epoch": 81.36506024096386, "grad_norm": 2.1484711170196533, "learning_rate": 9.266990291262137e-06, "loss": 0.1027, "step": 8391 }, { "epoch": 81.37469879518072, "grad_norm": 11.107769966125488, "learning_rate": 9.262135922330098e-06, "loss": 0.2815, "step": 8392 }, { "epoch": 81.38433734939758, "grad_norm": 2.454982280731201, "learning_rate": 9.25728155339806e-06, "loss": 0.212, "step": 8393 }, { "epoch": 81.39397590361446, "grad_norm": 2.7400968074798584, "learning_rate": 9.25242718446602e-06, "loss": 0.1596, "step": 8394 }, { "epoch": 81.40361445783132, "grad_norm": 4.136708736419678, "learning_rate": 9.24757281553398e-06, "loss": 0.0996, "step": 8395 }, { "epoch": 81.41325301204819, "grad_norm": 14.051470756530762, "learning_rate": 9.242718446601941e-06, "loss": 0.213, "step": 8396 }, { "epoch": 81.42289156626506, "grad_norm": 6.362167835235596, "learning_rate": 9.237864077669902e-06, "loss": 0.4194, "step": 8397 }, { "epoch": 81.43253012048193, "grad_norm": 5.760159492492676, "learning_rate": 9.233009708737865e-06, "loss": 0.2244, "step": 8398 }, { "epoch": 81.44216867469879, "grad_norm": 3.1528501510620117, "learning_rate": 9.228155339805826e-06, "loss": 0.2393, "step": 8399 }, { "epoch": 81.45180722891567, "grad_norm": 4.286283016204834, "learning_rate": 9.223300970873788e-06, "loss": 0.3816, "step": 8400 }, { "epoch": 81.46144578313253, "grad_norm": 5.615791320800781, "learning_rate": 9.218446601941749e-06, "loss": 0.1397, "step": 8401 }, { "epoch": 81.4710843373494, "grad_norm": 1.6862367391586304, "learning_rate": 9.213592233009708e-06, "loss": 0.1506, "step": 8402 }, { "epoch": 81.48072289156626, "grad_norm": 11.247086524963379, "learning_rate": 9.20873786407767e-06, "loss": 0.303, "step": 8403 }, { "epoch": 81.49036144578314, "grad_norm": 5.88152551651001, "learning_rate": 9.203883495145632e-06, "loss": 0.3238, "step": 8404 }, { "epoch": 81.5, "grad_norm": 5.212777614593506, "learning_rate": 9.199029126213593e-06, "loss": 0.0926, "step": 8405 }, { "epoch": 81.50963855421686, "grad_norm": 10.73431396484375, "learning_rate": 9.194174757281554e-06, "loss": 0.1406, "step": 8406 }, { "epoch": 81.51927710843374, "grad_norm": 2.119425058364868, "learning_rate": 9.189320388349516e-06, "loss": 0.1809, "step": 8407 }, { "epoch": 81.5289156626506, "grad_norm": 8.781450271606445, "learning_rate": 9.184466019417477e-06, "loss": 0.168, "step": 8408 }, { "epoch": 81.53855421686747, "grad_norm": 5.330880165100098, "learning_rate": 9.179611650485436e-06, "loss": 0.1942, "step": 8409 }, { "epoch": 81.54819277108433, "grad_norm": 2.9195199012756348, "learning_rate": 9.174757281553399e-06, "loss": 0.1115, "step": 8410 }, { "epoch": 81.55783132530121, "grad_norm": 10.049982070922852, "learning_rate": 9.16990291262136e-06, "loss": 0.4151, "step": 8411 }, { "epoch": 81.56746987951807, "grad_norm": 2.466557502746582, "learning_rate": 9.165048543689321e-06, "loss": 0.163, "step": 8412 }, { "epoch": 81.57710843373494, "grad_norm": 17.544315338134766, "learning_rate": 9.160194174757282e-06, "loss": 0.2815, "step": 8413 }, { "epoch": 81.58674698795181, "grad_norm": 3.8025825023651123, "learning_rate": 9.155339805825244e-06, "loss": 0.2539, "step": 8414 }, { "epoch": 81.59638554216868, "grad_norm": 16.51182746887207, "learning_rate": 9.150485436893205e-06, "loss": 0.1985, "step": 8415 }, { "epoch": 81.60602409638554, "grad_norm": 6.628283977508545, "learning_rate": 9.145631067961164e-06, "loss": 0.2204, "step": 8416 }, { "epoch": 81.61566265060242, "grad_norm": 16.762983322143555, "learning_rate": 9.140776699029127e-06, "loss": 0.4486, "step": 8417 }, { "epoch": 81.62530120481928, "grad_norm": 9.249780654907227, "learning_rate": 9.135922330097088e-06, "loss": 0.4623, "step": 8418 }, { "epoch": 81.63493975903614, "grad_norm": 4.65339994430542, "learning_rate": 9.13106796116505e-06, "loss": 0.1449, "step": 8419 }, { "epoch": 81.644578313253, "grad_norm": 15.410870552062988, "learning_rate": 9.12621359223301e-06, "loss": 0.1542, "step": 8420 }, { "epoch": 81.65421686746988, "grad_norm": 6.497442245483398, "learning_rate": 9.121359223300972e-06, "loss": 0.1117, "step": 8421 }, { "epoch": 81.66385542168675, "grad_norm": 7.067667484283447, "learning_rate": 9.116504854368931e-06, "loss": 0.212, "step": 8422 }, { "epoch": 81.67349397590361, "grad_norm": 3.0840280055999756, "learning_rate": 9.111650485436894e-06, "loss": 0.2082, "step": 8423 }, { "epoch": 81.68313253012049, "grad_norm": 3.4893109798431396, "learning_rate": 9.106796116504855e-06, "loss": 0.2688, "step": 8424 }, { "epoch": 81.69277108433735, "grad_norm": 15.785407066345215, "learning_rate": 9.101941747572816e-06, "loss": 0.1275, "step": 8425 }, { "epoch": 81.70240963855422, "grad_norm": 10.21713638305664, "learning_rate": 9.097087378640777e-06, "loss": 0.2377, "step": 8426 }, { "epoch": 81.71204819277108, "grad_norm": 2.3387045860290527, "learning_rate": 9.092233009708738e-06, "loss": 0.107, "step": 8427 }, { "epoch": 81.72168674698796, "grad_norm": 3.716979742050171, "learning_rate": 9.0873786407767e-06, "loss": 0.4672, "step": 8428 }, { "epoch": 81.73132530120482, "grad_norm": 8.13883113861084, "learning_rate": 9.08252427184466e-06, "loss": 0.4148, "step": 8429 }, { "epoch": 81.74096385542168, "grad_norm": 3.513099193572998, "learning_rate": 9.077669902912622e-06, "loss": 0.1382, "step": 8430 }, { "epoch": 81.75060240963856, "grad_norm": 2.564511775970459, "learning_rate": 9.072815533980583e-06, "loss": 0.1501, "step": 8431 }, { "epoch": 81.76024096385542, "grad_norm": 12.684806823730469, "learning_rate": 9.067961165048544e-06, "loss": 0.2164, "step": 8432 }, { "epoch": 81.76987951807229, "grad_norm": 1.0730119943618774, "learning_rate": 9.063106796116505e-06, "loss": 0.3093, "step": 8433 }, { "epoch": 81.77951807228915, "grad_norm": 11.05924129486084, "learning_rate": 9.058252427184466e-06, "loss": 0.2376, "step": 8434 }, { "epoch": 81.78915662650603, "grad_norm": 7.404289722442627, "learning_rate": 9.053398058252428e-06, "loss": 0.0936, "step": 8435 }, { "epoch": 81.79879518072289, "grad_norm": 3.562257766723633, "learning_rate": 9.048543689320389e-06, "loss": 0.1942, "step": 8436 }, { "epoch": 81.80843373493975, "grad_norm": 1.8420904874801636, "learning_rate": 9.04368932038835e-06, "loss": 0.0805, "step": 8437 }, { "epoch": 81.81807228915663, "grad_norm": 68.59202575683594, "learning_rate": 9.038834951456311e-06, "loss": 0.2738, "step": 8438 }, { "epoch": 81.8277108433735, "grad_norm": 3.5411410331726074, "learning_rate": 9.033980582524272e-06, "loss": 0.2218, "step": 8439 }, { "epoch": 81.83734939759036, "grad_norm": 25.544416427612305, "learning_rate": 9.029126213592233e-06, "loss": 0.277, "step": 8440 }, { "epoch": 81.84698795180722, "grad_norm": 1.951387643814087, "learning_rate": 9.024271844660194e-06, "loss": 0.2962, "step": 8441 }, { "epoch": 81.8566265060241, "grad_norm": 3.988605260848999, "learning_rate": 9.019417475728156e-06, "loss": 0.2387, "step": 8442 }, { "epoch": 81.86626506024096, "grad_norm": 3.81500244140625, "learning_rate": 9.014563106796117e-06, "loss": 0.1774, "step": 8443 }, { "epoch": 81.87590361445783, "grad_norm": 9.004777908325195, "learning_rate": 9.009708737864078e-06, "loss": 0.2228, "step": 8444 }, { "epoch": 81.8855421686747, "grad_norm": 22.806123733520508, "learning_rate": 9.004854368932039e-06, "loss": 0.1795, "step": 8445 }, { "epoch": 81.89518072289157, "grad_norm": 2.7355289459228516, "learning_rate": 9e-06, "loss": 0.3746, "step": 8446 }, { "epoch": 81.90481927710843, "grad_norm": 9.25251579284668, "learning_rate": 8.995145631067961e-06, "loss": 0.1829, "step": 8447 }, { "epoch": 81.91445783132531, "grad_norm": 5.539860725402832, "learning_rate": 8.990291262135924e-06, "loss": 0.275, "step": 8448 }, { "epoch": 81.92409638554217, "grad_norm": 6.571382522583008, "learning_rate": 8.985436893203884e-06, "loss": 0.2623, "step": 8449 }, { "epoch": 81.93373493975903, "grad_norm": 11.498503684997559, "learning_rate": 8.980582524271845e-06, "loss": 0.4278, "step": 8450 }, { "epoch": 81.9433734939759, "grad_norm": 5.810333728790283, "learning_rate": 8.975728155339806e-06, "loss": 0.3813, "step": 8451 }, { "epoch": 81.95301204819278, "grad_norm": 8.533224105834961, "learning_rate": 8.970873786407767e-06, "loss": 0.1892, "step": 8452 }, { "epoch": 81.96265060240964, "grad_norm": 10.395757675170898, "learning_rate": 8.966019417475728e-06, "loss": 0.1527, "step": 8453 }, { "epoch": 81.9722891566265, "grad_norm": 7.096504211425781, "learning_rate": 8.961165048543691e-06, "loss": 0.2146, "step": 8454 }, { "epoch": 81.98192771084338, "grad_norm": 4.809026718139648, "learning_rate": 8.95631067961165e-06, "loss": 0.2768, "step": 8455 }, { "epoch": 81.99156626506024, "grad_norm": 5.8129143714904785, "learning_rate": 8.951456310679612e-06, "loss": 0.1126, "step": 8456 }, { "epoch": 82.00722891566265, "grad_norm": 21.09406089782715, "learning_rate": 8.946601941747573e-06, "loss": 0.1897, "step": 8457 }, { "epoch": 82.01686746987951, "grad_norm": 3.82377552986145, "learning_rate": 8.941747572815534e-06, "loss": 0.1406, "step": 8458 }, { "epoch": 82.02650602409639, "grad_norm": 6.606848239898682, "learning_rate": 8.936893203883495e-06, "loss": 0.1671, "step": 8459 }, { "epoch": 82.03614457831326, "grad_norm": 11.254494667053223, "learning_rate": 8.932038834951456e-06, "loss": 0.3802, "step": 8460 }, { "epoch": 82.04578313253012, "grad_norm": 25.41483497619629, "learning_rate": 8.927184466019419e-06, "loss": 0.3991, "step": 8461 }, { "epoch": 82.05542168674698, "grad_norm": 4.583070278167725, "learning_rate": 8.922330097087379e-06, "loss": 0.2513, "step": 8462 }, { "epoch": 82.06506024096386, "grad_norm": 4.293338298797607, "learning_rate": 8.91747572815534e-06, "loss": 0.2126, "step": 8463 }, { "epoch": 82.07469879518072, "grad_norm": 7.7957987785339355, "learning_rate": 8.912621359223301e-06, "loss": 0.2218, "step": 8464 }, { "epoch": 82.08433734939759, "grad_norm": 4.554274082183838, "learning_rate": 8.907766990291262e-06, "loss": 0.0872, "step": 8465 }, { "epoch": 82.09397590361446, "grad_norm": 0.7574566602706909, "learning_rate": 8.902912621359223e-06, "loss": 0.157, "step": 8466 }, { "epoch": 82.10361445783133, "grad_norm": 2.063736915588379, "learning_rate": 8.898058252427186e-06, "loss": 0.1504, "step": 8467 }, { "epoch": 82.11325301204819, "grad_norm": 2.744490385055542, "learning_rate": 8.893203883495147e-06, "loss": 0.0779, "step": 8468 }, { "epoch": 82.12289156626505, "grad_norm": 19.878435134887695, "learning_rate": 8.888349514563107e-06, "loss": 0.1832, "step": 8469 }, { "epoch": 82.13253012048193, "grad_norm": 4.05751371383667, "learning_rate": 8.883495145631068e-06, "loss": 0.1702, "step": 8470 }, { "epoch": 82.1421686746988, "grad_norm": 4.97549295425415, "learning_rate": 8.878640776699029e-06, "loss": 0.1321, "step": 8471 }, { "epoch": 82.15180722891566, "grad_norm": 9.981078147888184, "learning_rate": 8.87378640776699e-06, "loss": 0.35, "step": 8472 }, { "epoch": 82.16144578313254, "grad_norm": 6.964618682861328, "learning_rate": 8.868932038834953e-06, "loss": 0.3949, "step": 8473 }, { "epoch": 82.1710843373494, "grad_norm": 9.435388565063477, "learning_rate": 8.864077669902914e-06, "loss": 0.1806, "step": 8474 }, { "epoch": 82.18072289156626, "grad_norm": 2.2613680362701416, "learning_rate": 8.859223300970875e-06, "loss": 0.0985, "step": 8475 }, { "epoch": 82.19036144578314, "grad_norm": 13.116402626037598, "learning_rate": 8.854368932038835e-06, "loss": 0.3109, "step": 8476 }, { "epoch": 82.2, "grad_norm": 6.148343086242676, "learning_rate": 8.849514563106796e-06, "loss": 0.2434, "step": 8477 }, { "epoch": 82.20963855421687, "grad_norm": 11.525871276855469, "learning_rate": 8.844660194174757e-06, "loss": 0.2056, "step": 8478 }, { "epoch": 82.21927710843373, "grad_norm": 5.111670970916748, "learning_rate": 8.839805825242718e-06, "loss": 0.2682, "step": 8479 }, { "epoch": 82.2289156626506, "grad_norm": 2.6666512489318848, "learning_rate": 8.834951456310681e-06, "loss": 0.1836, "step": 8480 }, { "epoch": 82.23855421686747, "grad_norm": 2.8047828674316406, "learning_rate": 8.830097087378642e-06, "loss": 0.1684, "step": 8481 }, { "epoch": 82.24819277108433, "grad_norm": 9.845353126525879, "learning_rate": 8.825242718446603e-06, "loss": 0.134, "step": 8482 }, { "epoch": 82.25783132530121, "grad_norm": 1.9633530378341675, "learning_rate": 8.820388349514563e-06, "loss": 0.1357, "step": 8483 }, { "epoch": 82.26746987951807, "grad_norm": 3.5076510906219482, "learning_rate": 8.815533980582524e-06, "loss": 0.1005, "step": 8484 }, { "epoch": 82.27710843373494, "grad_norm": 8.87814712524414, "learning_rate": 8.810679611650485e-06, "loss": 0.1922, "step": 8485 }, { "epoch": 82.2867469879518, "grad_norm": 3.5954067707061768, "learning_rate": 8.805825242718448e-06, "loss": 0.0412, "step": 8486 }, { "epoch": 82.29638554216868, "grad_norm": 5.329345703125, "learning_rate": 8.800970873786409e-06, "loss": 0.2821, "step": 8487 }, { "epoch": 82.30602409638554, "grad_norm": 17.21076011657715, "learning_rate": 8.79611650485437e-06, "loss": 0.2813, "step": 8488 }, { "epoch": 82.3156626506024, "grad_norm": 32.11248016357422, "learning_rate": 8.791262135922331e-06, "loss": 0.223, "step": 8489 }, { "epoch": 82.32530120481928, "grad_norm": 2.464977502822876, "learning_rate": 8.78640776699029e-06, "loss": 0.2842, "step": 8490 }, { "epoch": 82.33493975903615, "grad_norm": 11.887269973754883, "learning_rate": 8.781553398058252e-06, "loss": 0.3091, "step": 8491 }, { "epoch": 82.34457831325301, "grad_norm": 13.532197952270508, "learning_rate": 8.776699029126215e-06, "loss": 0.1777, "step": 8492 }, { "epoch": 82.35421686746987, "grad_norm": 11.450349807739258, "learning_rate": 8.771844660194176e-06, "loss": 0.194, "step": 8493 }, { "epoch": 82.36385542168675, "grad_norm": 1.927236795425415, "learning_rate": 8.766990291262137e-06, "loss": 0.1407, "step": 8494 }, { "epoch": 82.37349397590361, "grad_norm": 8.540337562561035, "learning_rate": 8.762135922330098e-06, "loss": 0.3211, "step": 8495 }, { "epoch": 82.38313253012048, "grad_norm": 1.3189520835876465, "learning_rate": 8.75728155339806e-06, "loss": 0.2039, "step": 8496 }, { "epoch": 82.39277108433735, "grad_norm": 10.37840747833252, "learning_rate": 8.752427184466019e-06, "loss": 0.3305, "step": 8497 }, { "epoch": 82.40240963855422, "grad_norm": 1.4174748659133911, "learning_rate": 8.74757281553398e-06, "loss": 0.1109, "step": 8498 }, { "epoch": 82.41204819277108, "grad_norm": 5.327950477600098, "learning_rate": 8.742718446601943e-06, "loss": 0.1817, "step": 8499 }, { "epoch": 82.42168674698796, "grad_norm": 7.111686706542969, "learning_rate": 8.737864077669904e-06, "loss": 0.2051, "step": 8500 }, { "epoch": 82.43132530120482, "grad_norm": 13.103438377380371, "learning_rate": 8.733009708737865e-06, "loss": 0.1878, "step": 8501 }, { "epoch": 82.44096385542169, "grad_norm": 2.0577425956726074, "learning_rate": 8.728155339805826e-06, "loss": 0.1651, "step": 8502 }, { "epoch": 82.45060240963855, "grad_norm": 3.2531776428222656, "learning_rate": 8.723300970873787e-06, "loss": 0.1214, "step": 8503 }, { "epoch": 82.46024096385543, "grad_norm": 2.3940072059631348, "learning_rate": 8.718446601941747e-06, "loss": 0.1305, "step": 8504 }, { "epoch": 82.46987951807229, "grad_norm": 2.1956846714019775, "learning_rate": 8.71359223300971e-06, "loss": 0.2856, "step": 8505 }, { "epoch": 82.47951807228915, "grad_norm": 5.353875637054443, "learning_rate": 8.70873786407767e-06, "loss": 0.1369, "step": 8506 }, { "epoch": 82.48915662650603, "grad_norm": 5.585976600646973, "learning_rate": 8.703883495145632e-06, "loss": 0.1249, "step": 8507 }, { "epoch": 82.4987951807229, "grad_norm": 2.9031405448913574, "learning_rate": 8.699029126213593e-06, "loss": 0.1851, "step": 8508 }, { "epoch": 82.50843373493976, "grad_norm": 15.336003303527832, "learning_rate": 8.694174757281554e-06, "loss": 0.3359, "step": 8509 }, { "epoch": 82.51807228915662, "grad_norm": 9.488112449645996, "learning_rate": 8.689320388349515e-06, "loss": 0.1336, "step": 8510 }, { "epoch": 82.5277108433735, "grad_norm": 4.688533306121826, "learning_rate": 8.684466019417476e-06, "loss": 0.1312, "step": 8511 }, { "epoch": 82.53734939759036, "grad_norm": 5.961021900177002, "learning_rate": 8.679611650485438e-06, "loss": 0.1682, "step": 8512 }, { "epoch": 82.54698795180722, "grad_norm": 11.33554458618164, "learning_rate": 8.674757281553399e-06, "loss": 0.322, "step": 8513 }, { "epoch": 82.5566265060241, "grad_norm": 1.8398687839508057, "learning_rate": 8.66990291262136e-06, "loss": 0.1857, "step": 8514 }, { "epoch": 82.56626506024097, "grad_norm": 27.00824546813965, "learning_rate": 8.665048543689321e-06, "loss": 0.2686, "step": 8515 }, { "epoch": 82.57590361445783, "grad_norm": 4.665283679962158, "learning_rate": 8.660194174757282e-06, "loss": 0.2375, "step": 8516 }, { "epoch": 82.58554216867469, "grad_norm": 4.324519634246826, "learning_rate": 8.655339805825243e-06, "loss": 0.334, "step": 8517 }, { "epoch": 82.59518072289157, "grad_norm": 15.618755340576172, "learning_rate": 8.650485436893204e-06, "loss": 0.1812, "step": 8518 }, { "epoch": 82.60481927710843, "grad_norm": 2.2736124992370605, "learning_rate": 8.645631067961166e-06, "loss": 0.0748, "step": 8519 }, { "epoch": 82.6144578313253, "grad_norm": 23.856359481811523, "learning_rate": 8.640776699029127e-06, "loss": 0.1479, "step": 8520 }, { "epoch": 82.62409638554217, "grad_norm": 16.28910255432129, "learning_rate": 8.635922330097088e-06, "loss": 0.2995, "step": 8521 }, { "epoch": 82.63373493975904, "grad_norm": 20.42687225341797, "learning_rate": 8.631067961165049e-06, "loss": 0.3746, "step": 8522 }, { "epoch": 82.6433734939759, "grad_norm": 20.052433013916016, "learning_rate": 8.62621359223301e-06, "loss": 0.3035, "step": 8523 }, { "epoch": 82.65301204819278, "grad_norm": 17.60202980041504, "learning_rate": 8.621359223300971e-06, "loss": 0.2548, "step": 8524 }, { "epoch": 82.66265060240964, "grad_norm": 10.270172119140625, "learning_rate": 8.616504854368932e-06, "loss": 0.2893, "step": 8525 }, { "epoch": 82.6722891566265, "grad_norm": 1.2716221809387207, "learning_rate": 8.611650485436894e-06, "loss": 0.1954, "step": 8526 }, { "epoch": 82.68192771084337, "grad_norm": 3.5915567874908447, "learning_rate": 8.606796116504855e-06, "loss": 0.1558, "step": 8527 }, { "epoch": 82.69156626506025, "grad_norm": 3.9113457202911377, "learning_rate": 8.601941747572816e-06, "loss": 0.2339, "step": 8528 }, { "epoch": 82.70120481927711, "grad_norm": 2.8827311992645264, "learning_rate": 8.597087378640777e-06, "loss": 0.132, "step": 8529 }, { "epoch": 82.71084337349397, "grad_norm": 8.383031845092773, "learning_rate": 8.592233009708738e-06, "loss": 0.23, "step": 8530 }, { "epoch": 82.72048192771085, "grad_norm": 9.624371528625488, "learning_rate": 8.5873786407767e-06, "loss": 0.0807, "step": 8531 }, { "epoch": 82.73012048192771, "grad_norm": 3.2646050453186035, "learning_rate": 8.58252427184466e-06, "loss": 0.1358, "step": 8532 }, { "epoch": 82.73975903614458, "grad_norm": 35.03271484375, "learning_rate": 8.577669902912622e-06, "loss": 0.1963, "step": 8533 }, { "epoch": 82.74939759036144, "grad_norm": 9.025707244873047, "learning_rate": 8.572815533980583e-06, "loss": 0.2026, "step": 8534 }, { "epoch": 82.75903614457832, "grad_norm": 3.4961841106414795, "learning_rate": 8.567961165048544e-06, "loss": 0.3666, "step": 8535 }, { "epoch": 82.76867469879518, "grad_norm": 3.6602635383605957, "learning_rate": 8.563106796116507e-06, "loss": 0.3226, "step": 8536 }, { "epoch": 82.77831325301204, "grad_norm": 8.850235939025879, "learning_rate": 8.558252427184466e-06, "loss": 0.1745, "step": 8537 }, { "epoch": 82.78795180722892, "grad_norm": 7.798396587371826, "learning_rate": 8.553398058252427e-06, "loss": 0.4367, "step": 8538 }, { "epoch": 82.79759036144578, "grad_norm": 9.489347457885742, "learning_rate": 8.548543689320388e-06, "loss": 0.4018, "step": 8539 }, { "epoch": 82.80722891566265, "grad_norm": 11.3488130569458, "learning_rate": 8.54368932038835e-06, "loss": 0.3074, "step": 8540 }, { "epoch": 82.81686746987951, "grad_norm": 4.127200126647949, "learning_rate": 8.53883495145631e-06, "loss": 0.1438, "step": 8541 }, { "epoch": 82.82650602409639, "grad_norm": 7.795212268829346, "learning_rate": 8.533980582524272e-06, "loss": 0.3072, "step": 8542 }, { "epoch": 82.83614457831325, "grad_norm": 32.681549072265625, "learning_rate": 8.529126213592235e-06, "loss": 0.247, "step": 8543 }, { "epoch": 82.84578313253012, "grad_norm": 4.306479454040527, "learning_rate": 8.524271844660194e-06, "loss": 0.2978, "step": 8544 }, { "epoch": 82.855421686747, "grad_norm": 14.73320484161377, "learning_rate": 8.519417475728155e-06, "loss": 0.262, "step": 8545 }, { "epoch": 82.86506024096386, "grad_norm": 5.24418306350708, "learning_rate": 8.514563106796116e-06, "loss": 0.2749, "step": 8546 }, { "epoch": 82.87469879518072, "grad_norm": 9.623123168945312, "learning_rate": 8.509708737864078e-06, "loss": 0.3552, "step": 8547 }, { "epoch": 82.88433734939758, "grad_norm": 18.89712142944336, "learning_rate": 8.504854368932039e-06, "loss": 0.168, "step": 8548 }, { "epoch": 82.89397590361446, "grad_norm": 5.091564178466797, "learning_rate": 8.500000000000002e-06, "loss": 0.3049, "step": 8549 }, { "epoch": 82.90361445783132, "grad_norm": 4.277631759643555, "learning_rate": 8.495145631067963e-06, "loss": 0.2132, "step": 8550 }, { "epoch": 82.91325301204819, "grad_norm": 20.395227432250977, "learning_rate": 8.490291262135922e-06, "loss": 0.2095, "step": 8551 }, { "epoch": 82.92289156626506, "grad_norm": 15.753437995910645, "learning_rate": 8.485436893203883e-06, "loss": 0.1452, "step": 8552 }, { "epoch": 82.93253012048193, "grad_norm": 2.6902778148651123, "learning_rate": 8.480582524271845e-06, "loss": 0.134, "step": 8553 }, { "epoch": 82.94216867469879, "grad_norm": 21.59983253479004, "learning_rate": 8.475728155339806e-06, "loss": 0.4466, "step": 8554 }, { "epoch": 82.95180722891567, "grad_norm": 3.5648608207702637, "learning_rate": 8.470873786407769e-06, "loss": 0.2817, "step": 8555 }, { "epoch": 82.96144578313253, "grad_norm": 12.257619857788086, "learning_rate": 8.46601941747573e-06, "loss": 0.2211, "step": 8556 }, { "epoch": 82.9710843373494, "grad_norm": 7.8044114112854, "learning_rate": 8.46116504854369e-06, "loss": 0.2358, "step": 8557 }, { "epoch": 82.98072289156626, "grad_norm": 2.6340386867523193, "learning_rate": 8.45631067961165e-06, "loss": 0.4166, "step": 8558 }, { "epoch": 82.99036144578314, "grad_norm": 5.8239874839782715, "learning_rate": 8.451456310679611e-06, "loss": 0.2274, "step": 8559 }, { "epoch": 83.00602409638554, "grad_norm": 4.338447570800781, "learning_rate": 8.446601941747573e-06, "loss": 0.1367, "step": 8560 }, { "epoch": 83.01566265060241, "grad_norm": 4.4681596755981445, "learning_rate": 8.441747572815534e-06, "loss": 0.2869, "step": 8561 }, { "epoch": 83.02530120481927, "grad_norm": 9.090775489807129, "learning_rate": 8.436893203883497e-06, "loss": 0.3782, "step": 8562 }, { "epoch": 83.03493975903615, "grad_norm": 21.422632217407227, "learning_rate": 8.432038834951458e-06, "loss": 0.3153, "step": 8563 }, { "epoch": 83.04457831325301, "grad_norm": 4.314651012420654, "learning_rate": 8.427184466019419e-06, "loss": 0.1973, "step": 8564 }, { "epoch": 83.05421686746988, "grad_norm": 11.187982559204102, "learning_rate": 8.422330097087378e-06, "loss": 0.2393, "step": 8565 }, { "epoch": 83.06385542168675, "grad_norm": 11.568060874938965, "learning_rate": 8.41747572815534e-06, "loss": 0.2636, "step": 8566 }, { "epoch": 83.07349397590362, "grad_norm": 20.879060745239258, "learning_rate": 8.4126213592233e-06, "loss": 0.5028, "step": 8567 }, { "epoch": 83.08313253012048, "grad_norm": 35.394107818603516, "learning_rate": 8.407766990291263e-06, "loss": 0.2865, "step": 8568 }, { "epoch": 83.09277108433734, "grad_norm": 3.6136276721954346, "learning_rate": 8.402912621359225e-06, "loss": 0.1491, "step": 8569 }, { "epoch": 83.10240963855422, "grad_norm": 2.0858724117279053, "learning_rate": 8.398058252427186e-06, "loss": 0.2163, "step": 8570 }, { "epoch": 83.11204819277108, "grad_norm": 1.6191308498382568, "learning_rate": 8.393203883495145e-06, "loss": 0.1102, "step": 8571 }, { "epoch": 83.12168674698795, "grad_norm": 0.9746342301368713, "learning_rate": 8.388349514563106e-06, "loss": 0.1651, "step": 8572 }, { "epoch": 83.13132530120482, "grad_norm": 3.454157829284668, "learning_rate": 8.383495145631067e-06, "loss": 0.1169, "step": 8573 }, { "epoch": 83.14096385542169, "grad_norm": 3.4030916690826416, "learning_rate": 8.37864077669903e-06, "loss": 0.2825, "step": 8574 }, { "epoch": 83.15060240963855, "grad_norm": 17.608642578125, "learning_rate": 8.373786407766991e-06, "loss": 0.3439, "step": 8575 }, { "epoch": 83.16024096385541, "grad_norm": 5.388158321380615, "learning_rate": 8.368932038834953e-06, "loss": 0.2623, "step": 8576 }, { "epoch": 83.16987951807229, "grad_norm": 7.4443039894104, "learning_rate": 8.364077669902914e-06, "loss": 0.2791, "step": 8577 }, { "epoch": 83.17951807228916, "grad_norm": 12.522856712341309, "learning_rate": 8.359223300970873e-06, "loss": 0.2243, "step": 8578 }, { "epoch": 83.18915662650602, "grad_norm": 6.076354026794434, "learning_rate": 8.354368932038834e-06, "loss": 0.1996, "step": 8579 }, { "epoch": 83.1987951807229, "grad_norm": 1.0250729322433472, "learning_rate": 8.349514563106795e-06, "loss": 0.2374, "step": 8580 }, { "epoch": 83.20843373493976, "grad_norm": 5.1223063468933105, "learning_rate": 8.344660194174758e-06, "loss": 0.2052, "step": 8581 }, { "epoch": 83.21807228915662, "grad_norm": 25.475435256958008, "learning_rate": 8.33980582524272e-06, "loss": 0.3327, "step": 8582 }, { "epoch": 83.2277108433735, "grad_norm": 9.501846313476562, "learning_rate": 8.33495145631068e-06, "loss": 0.4726, "step": 8583 }, { "epoch": 83.23734939759036, "grad_norm": 3.4577341079711914, "learning_rate": 8.330097087378642e-06, "loss": 0.2824, "step": 8584 }, { "epoch": 83.24698795180723, "grad_norm": 6.198853492736816, "learning_rate": 8.325242718446601e-06, "loss": 0.1569, "step": 8585 }, { "epoch": 83.25662650602409, "grad_norm": 15.168728828430176, "learning_rate": 8.320388349514562e-06, "loss": 0.5904, "step": 8586 }, { "epoch": 83.26626506024097, "grad_norm": 10.845004081726074, "learning_rate": 8.315533980582525e-06, "loss": 0.2192, "step": 8587 }, { "epoch": 83.27590361445783, "grad_norm": 20.563922882080078, "learning_rate": 8.310679611650486e-06, "loss": 0.2571, "step": 8588 }, { "epoch": 83.2855421686747, "grad_norm": 15.695964813232422, "learning_rate": 8.305825242718447e-06, "loss": 0.3139, "step": 8589 }, { "epoch": 83.29518072289157, "grad_norm": 4.4110493659973145, "learning_rate": 8.300970873786409e-06, "loss": 0.0819, "step": 8590 }, { "epoch": 83.30481927710844, "grad_norm": 1.9479280710220337, "learning_rate": 8.29611650485437e-06, "loss": 0.1394, "step": 8591 }, { "epoch": 83.3144578313253, "grad_norm": 4.465966701507568, "learning_rate": 8.29126213592233e-06, "loss": 0.1867, "step": 8592 }, { "epoch": 83.32409638554216, "grad_norm": 5.729354381561279, "learning_rate": 8.286407766990292e-06, "loss": 0.2554, "step": 8593 }, { "epoch": 83.33373493975904, "grad_norm": 7.936323165893555, "learning_rate": 8.281553398058253e-06, "loss": 0.1647, "step": 8594 }, { "epoch": 83.3433734939759, "grad_norm": 2.388909101486206, "learning_rate": 8.276699029126214e-06, "loss": 0.2798, "step": 8595 }, { "epoch": 83.35301204819277, "grad_norm": 5.50264835357666, "learning_rate": 8.271844660194175e-06, "loss": 0.2564, "step": 8596 }, { "epoch": 83.36265060240964, "grad_norm": 2.680763006210327, "learning_rate": 8.266990291262137e-06, "loss": 0.2751, "step": 8597 }, { "epoch": 83.37228915662651, "grad_norm": 3.541557788848877, "learning_rate": 8.262135922330098e-06, "loss": 0.108, "step": 8598 }, { "epoch": 83.38192771084337, "grad_norm": 3.779405117034912, "learning_rate": 8.257281553398057e-06, "loss": 0.2544, "step": 8599 }, { "epoch": 83.39156626506023, "grad_norm": 1.3294014930725098, "learning_rate": 8.25242718446602e-06, "loss": 0.1018, "step": 8600 }, { "epoch": 83.40120481927711, "grad_norm": 5.118149757385254, "learning_rate": 8.247572815533981e-06, "loss": 0.2987, "step": 8601 }, { "epoch": 83.41084337349398, "grad_norm": 17.104175567626953, "learning_rate": 8.242718446601942e-06, "loss": 0.2245, "step": 8602 }, { "epoch": 83.42048192771084, "grad_norm": 16.856388092041016, "learning_rate": 8.237864077669903e-06, "loss": 0.2381, "step": 8603 }, { "epoch": 83.43012048192772, "grad_norm": 3.861004590988159, "learning_rate": 8.233009708737865e-06, "loss": 0.2725, "step": 8604 }, { "epoch": 83.43975903614458, "grad_norm": 7.485888481140137, "learning_rate": 8.228155339805826e-06, "loss": 0.3289, "step": 8605 }, { "epoch": 83.44939759036144, "grad_norm": 5.596003532409668, "learning_rate": 8.223300970873787e-06, "loss": 0.1706, "step": 8606 }, { "epoch": 83.45903614457832, "grad_norm": 6.733198642730713, "learning_rate": 8.218446601941748e-06, "loss": 0.2446, "step": 8607 }, { "epoch": 83.46867469879518, "grad_norm": 3.553225040435791, "learning_rate": 8.21359223300971e-06, "loss": 0.2263, "step": 8608 }, { "epoch": 83.47831325301205, "grad_norm": 5.086806774139404, "learning_rate": 8.20873786407767e-06, "loss": 0.2342, "step": 8609 }, { "epoch": 83.48795180722891, "grad_norm": 3.448906421661377, "learning_rate": 8.203883495145632e-06, "loss": 0.158, "step": 8610 }, { "epoch": 83.49759036144579, "grad_norm": 1.1494386196136475, "learning_rate": 8.199029126213593e-06, "loss": 0.135, "step": 8611 }, { "epoch": 83.50722891566265, "grad_norm": 7.523488521575928, "learning_rate": 8.194174757281554e-06, "loss": 0.1733, "step": 8612 }, { "epoch": 83.51686746987951, "grad_norm": 2.855605125427246, "learning_rate": 8.189320388349515e-06, "loss": 0.258, "step": 8613 }, { "epoch": 83.52650602409639, "grad_norm": 2.247659921646118, "learning_rate": 8.184466019417476e-06, "loss": 0.1808, "step": 8614 }, { "epoch": 83.53614457831326, "grad_norm": 9.699551582336426, "learning_rate": 8.179611650485437e-06, "loss": 0.1815, "step": 8615 }, { "epoch": 83.54578313253012, "grad_norm": 8.19952392578125, "learning_rate": 8.174757281553398e-06, "loss": 0.1816, "step": 8616 }, { "epoch": 83.55542168674698, "grad_norm": 3.731407880783081, "learning_rate": 8.16990291262136e-06, "loss": 0.1941, "step": 8617 }, { "epoch": 83.56506024096386, "grad_norm": 2.1543893814086914, "learning_rate": 8.16504854368932e-06, "loss": 0.111, "step": 8618 }, { "epoch": 83.57469879518072, "grad_norm": 10.713313102722168, "learning_rate": 8.160194174757282e-06, "loss": 0.1723, "step": 8619 }, { "epoch": 83.58433734939759, "grad_norm": 13.477102279663086, "learning_rate": 8.155339805825243e-06, "loss": 0.2486, "step": 8620 }, { "epoch": 83.59397590361446, "grad_norm": 26.1234130859375, "learning_rate": 8.150485436893204e-06, "loss": 0.2614, "step": 8621 }, { "epoch": 83.60361445783133, "grad_norm": 1.8659766912460327, "learning_rate": 8.145631067961165e-06, "loss": 0.2273, "step": 8622 }, { "epoch": 83.61325301204819, "grad_norm": 4.732008934020996, "learning_rate": 8.140776699029126e-06, "loss": 0.2243, "step": 8623 }, { "epoch": 83.62289156626505, "grad_norm": 9.335100173950195, "learning_rate": 8.135922330097088e-06, "loss": 0.1583, "step": 8624 }, { "epoch": 83.63253012048193, "grad_norm": 3.997619152069092, "learning_rate": 8.131067961165049e-06, "loss": 0.1695, "step": 8625 }, { "epoch": 83.6421686746988, "grad_norm": 5.2244696617126465, "learning_rate": 8.12621359223301e-06, "loss": 0.0906, "step": 8626 }, { "epoch": 83.65180722891566, "grad_norm": 12.669160842895508, "learning_rate": 8.121359223300971e-06, "loss": 0.2122, "step": 8627 }, { "epoch": 83.66144578313254, "grad_norm": 2.0146141052246094, "learning_rate": 8.116504854368932e-06, "loss": 0.1243, "step": 8628 }, { "epoch": 83.6710843373494, "grad_norm": 21.151187896728516, "learning_rate": 8.111650485436893e-06, "loss": 0.1795, "step": 8629 }, { "epoch": 83.68072289156626, "grad_norm": 3.8896281719207764, "learning_rate": 8.106796116504854e-06, "loss": 0.1718, "step": 8630 }, { "epoch": 83.69036144578314, "grad_norm": 4.517177104949951, "learning_rate": 8.101941747572817e-06, "loss": 0.4004, "step": 8631 }, { "epoch": 83.7, "grad_norm": 8.428537368774414, "learning_rate": 8.097087378640777e-06, "loss": 0.1535, "step": 8632 }, { "epoch": 83.70963855421687, "grad_norm": 20.81406021118164, "learning_rate": 8.092233009708738e-06, "loss": 0.1952, "step": 8633 }, { "epoch": 83.71927710843373, "grad_norm": 19.19021224975586, "learning_rate": 8.087378640776699e-06, "loss": 0.1672, "step": 8634 }, { "epoch": 83.7289156626506, "grad_norm": 2.5603864192962646, "learning_rate": 8.08252427184466e-06, "loss": 0.2038, "step": 8635 }, { "epoch": 83.73855421686747, "grad_norm": 5.767894744873047, "learning_rate": 8.077669902912621e-06, "loss": 0.1411, "step": 8636 }, { "epoch": 83.74819277108433, "grad_norm": 14.344307899475098, "learning_rate": 8.072815533980584e-06, "loss": 0.1567, "step": 8637 }, { "epoch": 83.75783132530121, "grad_norm": 3.4427273273468018, "learning_rate": 8.067961165048545e-06, "loss": 0.3201, "step": 8638 }, { "epoch": 83.76746987951807, "grad_norm": 12.011984825134277, "learning_rate": 8.063106796116505e-06, "loss": 0.2138, "step": 8639 }, { "epoch": 83.77710843373494, "grad_norm": 4.117263317108154, "learning_rate": 8.058252427184466e-06, "loss": 0.1967, "step": 8640 }, { "epoch": 83.7867469879518, "grad_norm": 8.871482849121094, "learning_rate": 8.053398058252427e-06, "loss": 0.2109, "step": 8641 }, { "epoch": 83.79638554216868, "grad_norm": 3.618748426437378, "learning_rate": 8.048543689320388e-06, "loss": 0.2279, "step": 8642 }, { "epoch": 83.80602409638554, "grad_norm": 11.025774955749512, "learning_rate": 8.04368932038835e-06, "loss": 0.2195, "step": 8643 }, { "epoch": 83.8156626506024, "grad_norm": 32.540977478027344, "learning_rate": 8.038834951456312e-06, "loss": 0.7278, "step": 8644 }, { "epoch": 83.82530120481928, "grad_norm": 4.6733903884887695, "learning_rate": 8.033980582524273e-06, "loss": 0.2247, "step": 8645 }, { "epoch": 83.83493975903615, "grad_norm": 5.265789985656738, "learning_rate": 8.029126213592233e-06, "loss": 0.1291, "step": 8646 }, { "epoch": 83.84457831325301, "grad_norm": 5.342320442199707, "learning_rate": 8.024271844660194e-06, "loss": 0.3294, "step": 8647 }, { "epoch": 83.85421686746987, "grad_norm": 7.555809020996094, "learning_rate": 8.019417475728155e-06, "loss": 0.2026, "step": 8648 }, { "epoch": 83.86385542168675, "grad_norm": 7.59323263168335, "learning_rate": 8.014563106796116e-06, "loss": 0.291, "step": 8649 }, { "epoch": 83.87349397590361, "grad_norm": 2.5293290615081787, "learning_rate": 8.009708737864079e-06, "loss": 0.2359, "step": 8650 }, { "epoch": 83.88313253012048, "grad_norm": 8.133174896240234, "learning_rate": 8.00485436893204e-06, "loss": 0.4847, "step": 8651 }, { "epoch": 83.89277108433735, "grad_norm": 1.299669861793518, "learning_rate": 8.000000000000001e-06, "loss": 0.1453, "step": 8652 }, { "epoch": 83.90240963855422, "grad_norm": 21.515613555908203, "learning_rate": 7.99514563106796e-06, "loss": 0.2203, "step": 8653 }, { "epoch": 83.91204819277108, "grad_norm": 8.799643516540527, "learning_rate": 7.990291262135922e-06, "loss": 0.2904, "step": 8654 }, { "epoch": 83.92168674698796, "grad_norm": 22.738235473632812, "learning_rate": 7.985436893203883e-06, "loss": 0.2886, "step": 8655 }, { "epoch": 83.93132530120482, "grad_norm": 25.209604263305664, "learning_rate": 7.980582524271846e-06, "loss": 0.1992, "step": 8656 }, { "epoch": 83.94096385542169, "grad_norm": 2.801968574523926, "learning_rate": 7.975728155339807e-06, "loss": 0.1417, "step": 8657 }, { "epoch": 83.95060240963855, "grad_norm": 1.5475196838378906, "learning_rate": 7.970873786407768e-06, "loss": 0.0777, "step": 8658 }, { "epoch": 83.96024096385543, "grad_norm": 5.296849250793457, "learning_rate": 7.96601941747573e-06, "loss": 0.232, "step": 8659 }, { "epoch": 83.96987951807229, "grad_norm": 6.840432643890381, "learning_rate": 7.961165048543689e-06, "loss": 0.1295, "step": 8660 }, { "epoch": 83.97951807228915, "grad_norm": 23.071229934692383, "learning_rate": 7.95631067961165e-06, "loss": 0.2352, "step": 8661 }, { "epoch": 83.98915662650603, "grad_norm": 11.624582290649414, "learning_rate": 7.951456310679611e-06, "loss": 0.3842, "step": 8662 }, { "epoch": 84.00481927710844, "grad_norm": 10.438249588012695, "learning_rate": 7.946601941747574e-06, "loss": 0.115, "step": 8663 }, { "epoch": 84.0144578313253, "grad_norm": 11.922714233398438, "learning_rate": 7.941747572815535e-06, "loss": 0.0993, "step": 8664 }, { "epoch": 84.02409638554217, "grad_norm": 2.823395252227783, "learning_rate": 7.936893203883496e-06, "loss": 0.1017, "step": 8665 }, { "epoch": 84.03373493975904, "grad_norm": 49.070716857910156, "learning_rate": 7.932038834951457e-06, "loss": 0.2481, "step": 8666 }, { "epoch": 84.0433734939759, "grad_norm": 5.367282867431641, "learning_rate": 7.927184466019417e-06, "loss": 0.3405, "step": 8667 }, { "epoch": 84.05301204819277, "grad_norm": 10.132085800170898, "learning_rate": 7.922330097087378e-06, "loss": 0.3158, "step": 8668 }, { "epoch": 84.06265060240963, "grad_norm": 2.294102430343628, "learning_rate": 7.91747572815534e-06, "loss": 0.1396, "step": 8669 }, { "epoch": 84.07228915662651, "grad_norm": 5.004843235015869, "learning_rate": 7.912621359223302e-06, "loss": 0.2871, "step": 8670 }, { "epoch": 84.08192771084337, "grad_norm": 14.435986518859863, "learning_rate": 7.907766990291263e-06, "loss": 0.1278, "step": 8671 }, { "epoch": 84.09156626506024, "grad_norm": 9.169046401977539, "learning_rate": 7.902912621359224e-06, "loss": 0.3755, "step": 8672 }, { "epoch": 84.10120481927711, "grad_norm": 1.546820878982544, "learning_rate": 7.898058252427185e-06, "loss": 0.235, "step": 8673 }, { "epoch": 84.11084337349398, "grad_norm": 2.4012486934661865, "learning_rate": 7.893203883495145e-06, "loss": 0.1239, "step": 8674 }, { "epoch": 84.12048192771084, "grad_norm": 9.236271858215332, "learning_rate": 7.888349514563108e-06, "loss": 0.2867, "step": 8675 }, { "epoch": 84.1301204819277, "grad_norm": 2.929255247116089, "learning_rate": 7.883495145631069e-06, "loss": 0.345, "step": 8676 }, { "epoch": 84.13975903614458, "grad_norm": 3.824936866760254, "learning_rate": 7.87864077669903e-06, "loss": 0.1793, "step": 8677 }, { "epoch": 84.14939759036145, "grad_norm": 6.855861186981201, "learning_rate": 7.873786407766991e-06, "loss": 0.2151, "step": 8678 }, { "epoch": 84.15903614457831, "grad_norm": 22.121530532836914, "learning_rate": 7.868932038834952e-06, "loss": 0.2413, "step": 8679 }, { "epoch": 84.16867469879519, "grad_norm": 4.537125587463379, "learning_rate": 7.864077669902912e-06, "loss": 0.1815, "step": 8680 }, { "epoch": 84.17831325301205, "grad_norm": 4.735527515411377, "learning_rate": 7.859223300970873e-06, "loss": 0.3916, "step": 8681 }, { "epoch": 84.18795180722891, "grad_norm": 6.4364542961120605, "learning_rate": 7.854368932038836e-06, "loss": 0.3382, "step": 8682 }, { "epoch": 84.19759036144578, "grad_norm": 4.748987197875977, "learning_rate": 7.849514563106797e-06, "loss": 0.3828, "step": 8683 }, { "epoch": 84.20722891566265, "grad_norm": 3.984933853149414, "learning_rate": 7.844660194174758e-06, "loss": 0.2975, "step": 8684 }, { "epoch": 84.21686746987952, "grad_norm": 2.0726137161254883, "learning_rate": 7.839805825242719e-06, "loss": 0.0904, "step": 8685 }, { "epoch": 84.22650602409638, "grad_norm": 8.741596221923828, "learning_rate": 7.83495145631068e-06, "loss": 0.2295, "step": 8686 }, { "epoch": 84.23614457831326, "grad_norm": 12.810903549194336, "learning_rate": 7.83009708737864e-06, "loss": 0.3039, "step": 8687 }, { "epoch": 84.24578313253012, "grad_norm": 9.695737838745117, "learning_rate": 7.825242718446603e-06, "loss": 0.1625, "step": 8688 }, { "epoch": 84.25542168674698, "grad_norm": 2.648864984512329, "learning_rate": 7.820388349514564e-06, "loss": 0.0766, "step": 8689 }, { "epoch": 84.26506024096386, "grad_norm": 3.5126912593841553, "learning_rate": 7.815533980582525e-06, "loss": 0.3704, "step": 8690 }, { "epoch": 84.27469879518073, "grad_norm": 7.258105754852295, "learning_rate": 7.810679611650486e-06, "loss": 0.2541, "step": 8691 }, { "epoch": 84.28433734939759, "grad_norm": 11.080565452575684, "learning_rate": 7.805825242718447e-06, "loss": 0.3313, "step": 8692 }, { "epoch": 84.29397590361445, "grad_norm": 44.97907257080078, "learning_rate": 7.800970873786408e-06, "loss": 0.2468, "step": 8693 }, { "epoch": 84.30361445783133, "grad_norm": 6.527723789215088, "learning_rate": 7.79611650485437e-06, "loss": 0.3544, "step": 8694 }, { "epoch": 84.3132530120482, "grad_norm": 6.568812847137451, "learning_rate": 7.79126213592233e-06, "loss": 0.2524, "step": 8695 }, { "epoch": 84.32289156626506, "grad_norm": 3.7099690437316895, "learning_rate": 7.786407766990292e-06, "loss": 0.3127, "step": 8696 }, { "epoch": 84.33253012048193, "grad_norm": 2.4661705493927, "learning_rate": 7.781553398058253e-06, "loss": 0.1761, "step": 8697 }, { "epoch": 84.3421686746988, "grad_norm": 5.474889278411865, "learning_rate": 7.776699029126214e-06, "loss": 0.2736, "step": 8698 }, { "epoch": 84.35180722891566, "grad_norm": 11.13907527923584, "learning_rate": 7.771844660194175e-06, "loss": 0.4065, "step": 8699 }, { "epoch": 84.36144578313252, "grad_norm": 4.518246650695801, "learning_rate": 7.766990291262136e-06, "loss": 0.2241, "step": 8700 }, { "epoch": 84.3710843373494, "grad_norm": 10.066510200500488, "learning_rate": 7.762135922330097e-06, "loss": 0.2353, "step": 8701 }, { "epoch": 84.38072289156626, "grad_norm": 9.547158241271973, "learning_rate": 7.757281553398059e-06, "loss": 0.1657, "step": 8702 }, { "epoch": 84.39036144578313, "grad_norm": 2.926076889038086, "learning_rate": 7.75242718446602e-06, "loss": 0.1005, "step": 8703 }, { "epoch": 84.4, "grad_norm": 3.672950267791748, "learning_rate": 7.747572815533981e-06, "loss": 0.5375, "step": 8704 }, { "epoch": 84.40963855421687, "grad_norm": 3.875946521759033, "learning_rate": 7.742718446601942e-06, "loss": 0.102, "step": 8705 }, { "epoch": 84.41927710843373, "grad_norm": 5.202737331390381, "learning_rate": 7.737864077669903e-06, "loss": 0.2114, "step": 8706 }, { "epoch": 84.4289156626506, "grad_norm": 28.511810302734375, "learning_rate": 7.733009708737864e-06, "loss": 0.2974, "step": 8707 }, { "epoch": 84.43855421686747, "grad_norm": 4.872511863708496, "learning_rate": 7.728155339805825e-06, "loss": 0.2054, "step": 8708 }, { "epoch": 84.44819277108434, "grad_norm": 11.94895076751709, "learning_rate": 7.723300970873787e-06, "loss": 0.276, "step": 8709 }, { "epoch": 84.4578313253012, "grad_norm": 5.509463310241699, "learning_rate": 7.718446601941748e-06, "loss": 0.4452, "step": 8710 }, { "epoch": 84.46746987951808, "grad_norm": 9.16263198852539, "learning_rate": 7.713592233009709e-06, "loss": 0.2377, "step": 8711 }, { "epoch": 84.47710843373494, "grad_norm": 2.4310271739959717, "learning_rate": 7.70873786407767e-06, "loss": 0.2581, "step": 8712 }, { "epoch": 84.4867469879518, "grad_norm": 9.40433120727539, "learning_rate": 7.703883495145631e-06, "loss": 0.23, "step": 8713 }, { "epoch": 84.49638554216868, "grad_norm": 2.476527690887451, "learning_rate": 7.699029126213592e-06, "loss": 0.0708, "step": 8714 }, { "epoch": 84.50602409638554, "grad_norm": 12.804997444152832, "learning_rate": 7.694174757281554e-06, "loss": 0.2148, "step": 8715 }, { "epoch": 84.51566265060241, "grad_norm": 4.105074882507324, "learning_rate": 7.689320388349515e-06, "loss": 0.1908, "step": 8716 }, { "epoch": 84.52530120481927, "grad_norm": 6.647518157958984, "learning_rate": 7.684466019417476e-06, "loss": 0.3521, "step": 8717 }, { "epoch": 84.53493975903615, "grad_norm": 5.336258888244629, "learning_rate": 7.679611650485437e-06, "loss": 0.1688, "step": 8718 }, { "epoch": 84.54457831325301, "grad_norm": 2.3378777503967285, "learning_rate": 7.674757281553398e-06, "loss": 0.3364, "step": 8719 }, { "epoch": 84.55421686746988, "grad_norm": 5.827540397644043, "learning_rate": 7.66990291262136e-06, "loss": 0.2547, "step": 8720 }, { "epoch": 84.56385542168675, "grad_norm": 11.620465278625488, "learning_rate": 7.66504854368932e-06, "loss": 0.2476, "step": 8721 }, { "epoch": 84.57349397590362, "grad_norm": 3.4501030445098877, "learning_rate": 7.660194174757282e-06, "loss": 0.2318, "step": 8722 }, { "epoch": 84.58313253012048, "grad_norm": 5.788028717041016, "learning_rate": 7.655339805825243e-06, "loss": 0.3172, "step": 8723 }, { "epoch": 84.59277108433734, "grad_norm": 7.544977188110352, "learning_rate": 7.650485436893204e-06, "loss": 0.1837, "step": 8724 }, { "epoch": 84.60240963855422, "grad_norm": 22.822538375854492, "learning_rate": 7.645631067961165e-06, "loss": 0.2668, "step": 8725 }, { "epoch": 84.61204819277108, "grad_norm": 17.95459747314453, "learning_rate": 7.640776699029128e-06, "loss": 0.357, "step": 8726 }, { "epoch": 84.62168674698795, "grad_norm": 12.063337326049805, "learning_rate": 7.635922330097087e-06, "loss": 0.4308, "step": 8727 }, { "epoch": 84.63132530120482, "grad_norm": 5.234798908233643, "learning_rate": 7.631067961165048e-06, "loss": 0.2746, "step": 8728 }, { "epoch": 84.64096385542169, "grad_norm": 2.8675215244293213, "learning_rate": 7.6262135922330096e-06, "loss": 0.1311, "step": 8729 }, { "epoch": 84.65060240963855, "grad_norm": 4.910152912139893, "learning_rate": 7.621359223300971e-06, "loss": 0.1185, "step": 8730 }, { "epoch": 84.66024096385541, "grad_norm": 19.7794246673584, "learning_rate": 7.616504854368932e-06, "loss": 0.2244, "step": 8731 }, { "epoch": 84.66987951807229, "grad_norm": 12.178522109985352, "learning_rate": 7.611650485436894e-06, "loss": 0.2093, "step": 8732 }, { "epoch": 84.67951807228916, "grad_norm": 5.7014479637146, "learning_rate": 7.606796116504855e-06, "loss": 0.2769, "step": 8733 }, { "epoch": 84.68915662650602, "grad_norm": 5.6093621253967285, "learning_rate": 7.601941747572816e-06, "loss": 0.0812, "step": 8734 }, { "epoch": 84.6987951807229, "grad_norm": 14.405464172363281, "learning_rate": 7.597087378640777e-06, "loss": 0.2314, "step": 8735 }, { "epoch": 84.70843373493976, "grad_norm": 6.463356971740723, "learning_rate": 7.5922330097087376e-06, "loss": 0.1056, "step": 8736 }, { "epoch": 84.71807228915662, "grad_norm": 1.7318450212478638, "learning_rate": 7.587378640776699e-06, "loss": 0.216, "step": 8737 }, { "epoch": 84.7277108433735, "grad_norm": 2.8241312503814697, "learning_rate": 7.5825242718446616e-06, "loss": 0.2858, "step": 8738 }, { "epoch": 84.73734939759036, "grad_norm": 6.627094268798828, "learning_rate": 7.577669902912622e-06, "loss": 0.347, "step": 8739 }, { "epoch": 84.74698795180723, "grad_norm": 11.016295433044434, "learning_rate": 7.572815533980583e-06, "loss": 0.1714, "step": 8740 }, { "epoch": 84.75662650602409, "grad_norm": 1.2336605787277222, "learning_rate": 7.567961165048544e-06, "loss": 0.0853, "step": 8741 }, { "epoch": 84.76626506024097, "grad_norm": 3.1903951168060303, "learning_rate": 7.563106796116505e-06, "loss": 0.2582, "step": 8742 }, { "epoch": 84.77590361445783, "grad_norm": 3.477459669113159, "learning_rate": 7.558252427184466e-06, "loss": 0.147, "step": 8743 }, { "epoch": 84.7855421686747, "grad_norm": 6.185791015625, "learning_rate": 7.553398058252427e-06, "loss": 0.1622, "step": 8744 }, { "epoch": 84.79518072289157, "grad_norm": 3.328003168106079, "learning_rate": 7.54854368932039e-06, "loss": 0.2116, "step": 8745 }, { "epoch": 84.80481927710844, "grad_norm": 4.178992748260498, "learning_rate": 7.54368932038835e-06, "loss": 0.2447, "step": 8746 }, { "epoch": 84.8144578313253, "grad_norm": 4.925743103027344, "learning_rate": 7.538834951456311e-06, "loss": 0.1481, "step": 8747 }, { "epoch": 84.82409638554216, "grad_norm": 14.394537925720215, "learning_rate": 7.533980582524272e-06, "loss": 0.2861, "step": 8748 }, { "epoch": 84.83373493975904, "grad_norm": 8.029168128967285, "learning_rate": 7.529126213592233e-06, "loss": 0.2126, "step": 8749 }, { "epoch": 84.8433734939759, "grad_norm": 3.404594659805298, "learning_rate": 7.524271844660194e-06, "loss": 0.1791, "step": 8750 }, { "epoch": 84.85301204819277, "grad_norm": 1.2904841899871826, "learning_rate": 7.5194174757281565e-06, "loss": 0.129, "step": 8751 }, { "epoch": 84.86265060240964, "grad_norm": 3.1163065433502197, "learning_rate": 7.514563106796118e-06, "loss": 0.434, "step": 8752 }, { "epoch": 84.87228915662651, "grad_norm": 7.759037971496582, "learning_rate": 7.509708737864078e-06, "loss": 0.1644, "step": 8753 }, { "epoch": 84.88192771084337, "grad_norm": 8.0667085647583, "learning_rate": 7.504854368932039e-06, "loss": 0.153, "step": 8754 }, { "epoch": 84.89156626506023, "grad_norm": 18.793254852294922, "learning_rate": 7.5e-06, "loss": 0.345, "step": 8755 }, { "epoch": 84.90120481927711, "grad_norm": 10.336963653564453, "learning_rate": 7.495145631067961e-06, "loss": 0.1466, "step": 8756 }, { "epoch": 84.91084337349398, "grad_norm": 6.811376094818115, "learning_rate": 7.490291262135923e-06, "loss": 0.289, "step": 8757 }, { "epoch": 84.92048192771084, "grad_norm": 24.929025650024414, "learning_rate": 7.4854368932038845e-06, "loss": 0.2242, "step": 8758 }, { "epoch": 84.93012048192772, "grad_norm": 11.321327209472656, "learning_rate": 7.480582524271846e-06, "loss": 0.3273, "step": 8759 }, { "epoch": 84.93975903614458, "grad_norm": 12.801066398620605, "learning_rate": 7.475728155339806e-06, "loss": 0.2494, "step": 8760 }, { "epoch": 84.94939759036144, "grad_norm": 12.334271430969238, "learning_rate": 7.470873786407767e-06, "loss": 0.1612, "step": 8761 }, { "epoch": 84.95903614457832, "grad_norm": 22.30399513244629, "learning_rate": 7.466019417475728e-06, "loss": 0.2224, "step": 8762 }, { "epoch": 84.96867469879518, "grad_norm": 4.753525257110596, "learning_rate": 7.461165048543689e-06, "loss": 0.173, "step": 8763 }, { "epoch": 84.97831325301205, "grad_norm": 3.7041091918945312, "learning_rate": 7.456310679611651e-06, "loss": 0.0883, "step": 8764 }, { "epoch": 84.98795180722891, "grad_norm": 50.70457077026367, "learning_rate": 7.4514563106796125e-06, "loss": 0.108, "step": 8765 }, { "epoch": 85.00361445783132, "grad_norm": 5.913308143615723, "learning_rate": 7.446601941747574e-06, "loss": 0.2213, "step": 8766 }, { "epoch": 85.0132530120482, "grad_norm": 4.682151794433594, "learning_rate": 7.441747572815534e-06, "loss": 0.1308, "step": 8767 }, { "epoch": 85.02289156626506, "grad_norm": 2.8838372230529785, "learning_rate": 7.436893203883495e-06, "loss": 0.1021, "step": 8768 }, { "epoch": 85.03253012048192, "grad_norm": 5.150898456573486, "learning_rate": 7.432038834951456e-06, "loss": 0.2369, "step": 8769 }, { "epoch": 85.0421686746988, "grad_norm": 3.9136226177215576, "learning_rate": 7.427184466019418e-06, "loss": 0.1067, "step": 8770 }, { "epoch": 85.05180722891566, "grad_norm": 5.034511089324951, "learning_rate": 7.422330097087379e-06, "loss": 0.4923, "step": 8771 }, { "epoch": 85.06144578313253, "grad_norm": 5.131843090057373, "learning_rate": 7.4174757281553405e-06, "loss": 0.399, "step": 8772 }, { "epoch": 85.0710843373494, "grad_norm": 4.1552510261535645, "learning_rate": 7.412621359223302e-06, "loss": 0.2829, "step": 8773 }, { "epoch": 85.08072289156627, "grad_norm": 6.1092848777771, "learning_rate": 7.407766990291262e-06, "loss": 0.1283, "step": 8774 }, { "epoch": 85.09036144578313, "grad_norm": 25.11745834350586, "learning_rate": 7.402912621359223e-06, "loss": 0.2184, "step": 8775 }, { "epoch": 85.1, "grad_norm": 4.3667826652526855, "learning_rate": 7.398058252427185e-06, "loss": 0.3232, "step": 8776 }, { "epoch": 85.10963855421687, "grad_norm": 2.7902133464813232, "learning_rate": 7.393203883495146e-06, "loss": 0.2124, "step": 8777 }, { "epoch": 85.11927710843374, "grad_norm": 4.853402137756348, "learning_rate": 7.388349514563107e-06, "loss": 0.2199, "step": 8778 }, { "epoch": 85.1289156626506, "grad_norm": 3.292342185974121, "learning_rate": 7.3834951456310685e-06, "loss": 0.4678, "step": 8779 }, { "epoch": 85.13855421686748, "grad_norm": 4.135878562927246, "learning_rate": 7.378640776699029e-06, "loss": 0.1683, "step": 8780 }, { "epoch": 85.14819277108434, "grad_norm": 3.4501566886901855, "learning_rate": 7.37378640776699e-06, "loss": 0.3185, "step": 8781 }, { "epoch": 85.1578313253012, "grad_norm": 2.831664800643921, "learning_rate": 7.368932038834951e-06, "loss": 0.1604, "step": 8782 }, { "epoch": 85.16746987951807, "grad_norm": 5.5144171714782715, "learning_rate": 7.364077669902913e-06, "loss": 0.3214, "step": 8783 }, { "epoch": 85.17710843373494, "grad_norm": 14.48825740814209, "learning_rate": 7.359223300970874e-06, "loss": 0.2494, "step": 8784 }, { "epoch": 85.1867469879518, "grad_norm": 6.75835657119751, "learning_rate": 7.354368932038835e-06, "loss": 0.1488, "step": 8785 }, { "epoch": 85.19638554216867, "grad_norm": 3.9628713130950928, "learning_rate": 7.3495145631067966e-06, "loss": 0.1348, "step": 8786 }, { "epoch": 85.20602409638555, "grad_norm": 7.791067600250244, "learning_rate": 7.344660194174757e-06, "loss": 0.118, "step": 8787 }, { "epoch": 85.21566265060241, "grad_norm": 16.55714988708496, "learning_rate": 7.339805825242718e-06, "loss": 0.1754, "step": 8788 }, { "epoch": 85.22530120481927, "grad_norm": 10.370168685913086, "learning_rate": 7.334951456310681e-06, "loss": 0.2217, "step": 8789 }, { "epoch": 85.23493975903614, "grad_norm": 3.4975969791412354, "learning_rate": 7.330097087378641e-06, "loss": 0.1782, "step": 8790 }, { "epoch": 85.24457831325302, "grad_norm": 10.872856140136719, "learning_rate": 7.325242718446602e-06, "loss": 0.3204, "step": 8791 }, { "epoch": 85.25421686746988, "grad_norm": 4.534432411193848, "learning_rate": 7.3203883495145634e-06, "loss": 0.1041, "step": 8792 }, { "epoch": 85.26385542168674, "grad_norm": 8.917067527770996, "learning_rate": 7.3155339805825246e-06, "loss": 0.1946, "step": 8793 }, { "epoch": 85.27349397590362, "grad_norm": 7.223762035369873, "learning_rate": 7.310679611650485e-06, "loss": 0.1038, "step": 8794 }, { "epoch": 85.28313253012048, "grad_norm": 10.845683097839355, "learning_rate": 7.305825242718448e-06, "loss": 0.3147, "step": 8795 }, { "epoch": 85.29277108433735, "grad_norm": 2.8224215507507324, "learning_rate": 7.300970873786409e-06, "loss": 0.2836, "step": 8796 }, { "epoch": 85.30240963855422, "grad_norm": 37.166927337646484, "learning_rate": 7.296116504854369e-06, "loss": 0.2575, "step": 8797 }, { "epoch": 85.31204819277109, "grad_norm": 7.355382919311523, "learning_rate": 7.29126213592233e-06, "loss": 0.3292, "step": 8798 }, { "epoch": 85.32168674698795, "grad_norm": 20.881481170654297, "learning_rate": 7.2864077669902914e-06, "loss": 0.3432, "step": 8799 }, { "epoch": 85.33132530120481, "grad_norm": 10.647732734680176, "learning_rate": 7.281553398058253e-06, "loss": 0.3753, "step": 8800 }, { "epoch": 85.34096385542169, "grad_norm": 2.02424955368042, "learning_rate": 7.276699029126213e-06, "loss": 0.1562, "step": 8801 }, { "epoch": 85.35060240963855, "grad_norm": 7.613134860992432, "learning_rate": 7.271844660194176e-06, "loss": 0.2165, "step": 8802 }, { "epoch": 85.36024096385542, "grad_norm": 7.004857063293457, "learning_rate": 7.266990291262137e-06, "loss": 0.1576, "step": 8803 }, { "epoch": 85.3698795180723, "grad_norm": 15.529205322265625, "learning_rate": 7.262135922330097e-06, "loss": 0.2808, "step": 8804 }, { "epoch": 85.37951807228916, "grad_norm": 3.7681148052215576, "learning_rate": 7.257281553398058e-06, "loss": 0.2212, "step": 8805 }, { "epoch": 85.38915662650602, "grad_norm": 10.593921661376953, "learning_rate": 7.2524271844660195e-06, "loss": 0.3242, "step": 8806 }, { "epoch": 85.39879518072289, "grad_norm": 4.700215816497803, "learning_rate": 7.247572815533981e-06, "loss": 0.2345, "step": 8807 }, { "epoch": 85.40843373493976, "grad_norm": 13.74832820892334, "learning_rate": 7.242718446601943e-06, "loss": 0.1421, "step": 8808 }, { "epoch": 85.41807228915663, "grad_norm": 3.9739067554473877, "learning_rate": 7.237864077669904e-06, "loss": 0.1903, "step": 8809 }, { "epoch": 85.42771084337349, "grad_norm": 15.429389953613281, "learning_rate": 7.233009708737865e-06, "loss": 0.394, "step": 8810 }, { "epoch": 85.43734939759037, "grad_norm": 7.2786407470703125, "learning_rate": 7.228155339805825e-06, "loss": 0.2423, "step": 8811 }, { "epoch": 85.44698795180723, "grad_norm": 1.2183736562728882, "learning_rate": 7.223300970873786e-06, "loss": 0.0671, "step": 8812 }, { "epoch": 85.4566265060241, "grad_norm": 5.5077338218688965, "learning_rate": 7.2184466019417475e-06, "loss": 0.2606, "step": 8813 }, { "epoch": 85.46626506024096, "grad_norm": 3.187403678894043, "learning_rate": 7.2135922330097095e-06, "loss": 0.2218, "step": 8814 }, { "epoch": 85.47590361445783, "grad_norm": 13.570640563964844, "learning_rate": 7.208737864077671e-06, "loss": 0.2707, "step": 8815 }, { "epoch": 85.4855421686747, "grad_norm": 4.779664516448975, "learning_rate": 7.203883495145632e-06, "loss": 0.1831, "step": 8816 }, { "epoch": 85.49518072289156, "grad_norm": 4.843388557434082, "learning_rate": 7.199029126213593e-06, "loss": 0.2049, "step": 8817 }, { "epoch": 85.50481927710844, "grad_norm": 3.7618420124053955, "learning_rate": 7.194174757281553e-06, "loss": 0.1669, "step": 8818 }, { "epoch": 85.5144578313253, "grad_norm": 11.304701805114746, "learning_rate": 7.189320388349514e-06, "loss": 0.2927, "step": 8819 }, { "epoch": 85.52409638554217, "grad_norm": 7.423953533172607, "learning_rate": 7.1844660194174755e-06, "loss": 0.3085, "step": 8820 }, { "epoch": 85.53373493975904, "grad_norm": 4.594228744506836, "learning_rate": 7.1796116504854375e-06, "loss": 0.1934, "step": 8821 }, { "epoch": 85.5433734939759, "grad_norm": 8.786551475524902, "learning_rate": 7.174757281553399e-06, "loss": 0.1985, "step": 8822 }, { "epoch": 85.55301204819277, "grad_norm": 4.907008647918701, "learning_rate": 7.16990291262136e-06, "loss": 0.3629, "step": 8823 }, { "epoch": 85.56265060240963, "grad_norm": 3.836548089981079, "learning_rate": 7.165048543689321e-06, "loss": 0.2109, "step": 8824 }, { "epoch": 85.57228915662651, "grad_norm": 7.069586753845215, "learning_rate": 7.160194174757281e-06, "loss": 0.2068, "step": 8825 }, { "epoch": 85.58192771084337, "grad_norm": 18.1781005859375, "learning_rate": 7.155339805825242e-06, "loss": 0.3992, "step": 8826 }, { "epoch": 85.59156626506024, "grad_norm": 7.627037525177002, "learning_rate": 7.150485436893204e-06, "loss": 0.1793, "step": 8827 }, { "epoch": 85.60120481927711, "grad_norm": 5.112251281738281, "learning_rate": 7.1456310679611655e-06, "loss": 0.1358, "step": 8828 }, { "epoch": 85.61084337349398, "grad_norm": 6.309733867645264, "learning_rate": 7.140776699029127e-06, "loss": 0.241, "step": 8829 }, { "epoch": 85.62048192771084, "grad_norm": 1.6593527793884277, "learning_rate": 7.135922330097088e-06, "loss": 0.1722, "step": 8830 }, { "epoch": 85.6301204819277, "grad_norm": 5.95855712890625, "learning_rate": 7.131067961165049e-06, "loss": 0.2401, "step": 8831 }, { "epoch": 85.63975903614458, "grad_norm": 2.8072192668914795, "learning_rate": 7.126213592233009e-06, "loss": 0.3099, "step": 8832 }, { "epoch": 85.64939759036145, "grad_norm": 5.430965423583984, "learning_rate": 7.121359223300972e-06, "loss": 0.2359, "step": 8833 }, { "epoch": 85.65903614457831, "grad_norm": 4.383037090301514, "learning_rate": 7.116504854368932e-06, "loss": 0.1639, "step": 8834 }, { "epoch": 85.66867469879519, "grad_norm": 3.8684866428375244, "learning_rate": 7.1116504854368935e-06, "loss": 0.2186, "step": 8835 }, { "epoch": 85.67831325301205, "grad_norm": 1.3868862390518188, "learning_rate": 7.106796116504855e-06, "loss": 0.2021, "step": 8836 }, { "epoch": 85.68795180722891, "grad_norm": 4.883203029632568, "learning_rate": 7.101941747572816e-06, "loss": 0.1038, "step": 8837 }, { "epoch": 85.69759036144578, "grad_norm": 6.856483459472656, "learning_rate": 7.097087378640776e-06, "loss": 0.1322, "step": 8838 }, { "epoch": 85.70722891566265, "grad_norm": 5.697210311889648, "learning_rate": 7.092233009708739e-06, "loss": 0.4136, "step": 8839 }, { "epoch": 85.71686746987952, "grad_norm": 3.4210281372070312, "learning_rate": 7.0873786407767e-06, "loss": 0.1377, "step": 8840 }, { "epoch": 85.72650602409638, "grad_norm": 3.9397473335266113, "learning_rate": 7.08252427184466e-06, "loss": 0.2321, "step": 8841 }, { "epoch": 85.73614457831326, "grad_norm": 4.010299205780029, "learning_rate": 7.0776699029126216e-06, "loss": 0.2118, "step": 8842 }, { "epoch": 85.74578313253012, "grad_norm": 5.098282814025879, "learning_rate": 7.072815533980583e-06, "loss": 0.3068, "step": 8843 }, { "epoch": 85.75542168674698, "grad_norm": 10.650067329406738, "learning_rate": 7.067961165048544e-06, "loss": 0.2891, "step": 8844 }, { "epoch": 85.76506024096386, "grad_norm": 3.6213135719299316, "learning_rate": 7.063106796116504e-06, "loss": 0.3505, "step": 8845 }, { "epoch": 85.77469879518073, "grad_norm": 6.966928005218506, "learning_rate": 7.058252427184467e-06, "loss": 0.2113, "step": 8846 }, { "epoch": 85.78433734939759, "grad_norm": 4.20459508895874, "learning_rate": 7.053398058252428e-06, "loss": 0.1699, "step": 8847 }, { "epoch": 85.79397590361445, "grad_norm": 4.389486789703369, "learning_rate": 7.0485436893203884e-06, "loss": 0.322, "step": 8848 }, { "epoch": 85.80361445783133, "grad_norm": 7.3050665855407715, "learning_rate": 7.04368932038835e-06, "loss": 0.3317, "step": 8849 }, { "epoch": 85.8132530120482, "grad_norm": 4.489408016204834, "learning_rate": 7.038834951456311e-06, "loss": 0.3221, "step": 8850 }, { "epoch": 85.82289156626506, "grad_norm": 4.127713680267334, "learning_rate": 7.033980582524272e-06, "loss": 0.1992, "step": 8851 }, { "epoch": 85.83253012048193, "grad_norm": 10.713728904724121, "learning_rate": 7.029126213592234e-06, "loss": 0.2663, "step": 8852 }, { "epoch": 85.8421686746988, "grad_norm": 3.4385502338409424, "learning_rate": 7.024271844660195e-06, "loss": 0.0824, "step": 8853 }, { "epoch": 85.85180722891566, "grad_norm": 7.369065761566162, "learning_rate": 7.019417475728156e-06, "loss": 0.2989, "step": 8854 }, { "epoch": 85.86144578313252, "grad_norm": 5.076020240783691, "learning_rate": 7.0145631067961165e-06, "loss": 0.1772, "step": 8855 }, { "epoch": 85.8710843373494, "grad_norm": 3.6105353832244873, "learning_rate": 7.009708737864078e-06, "loss": 0.2478, "step": 8856 }, { "epoch": 85.88072289156626, "grad_norm": 5.530332088470459, "learning_rate": 7.004854368932039e-06, "loss": 0.1483, "step": 8857 }, { "epoch": 85.89036144578313, "grad_norm": 3.9274139404296875, "learning_rate": 7.000000000000001e-06, "loss": 0.2736, "step": 8858 }, { "epoch": 85.9, "grad_norm": 16.477602005004883, "learning_rate": 6.995145631067962e-06, "loss": 0.1872, "step": 8859 }, { "epoch": 85.90963855421687, "grad_norm": 4.191192626953125, "learning_rate": 6.990291262135923e-06, "loss": 0.1925, "step": 8860 }, { "epoch": 85.91927710843373, "grad_norm": 15.359193801879883, "learning_rate": 6.985436893203884e-06, "loss": 0.0612, "step": 8861 }, { "epoch": 85.9289156626506, "grad_norm": 11.906237602233887, "learning_rate": 6.9805825242718445e-06, "loss": 0.4133, "step": 8862 }, { "epoch": 85.93855421686747, "grad_norm": 2.840131998062134, "learning_rate": 6.975728155339806e-06, "loss": 0.1517, "step": 8863 }, { "epoch": 85.94819277108434, "grad_norm": 4.37116003036499, "learning_rate": 6.970873786407767e-06, "loss": 0.3042, "step": 8864 }, { "epoch": 85.9578313253012, "grad_norm": 6.709815502166748, "learning_rate": 6.966019417475729e-06, "loss": 0.4152, "step": 8865 }, { "epoch": 85.96746987951808, "grad_norm": 3.5729928016662598, "learning_rate": 6.96116504854369e-06, "loss": 0.1698, "step": 8866 }, { "epoch": 85.97710843373494, "grad_norm": 6.093888282775879, "learning_rate": 6.956310679611651e-06, "loss": 0.2537, "step": 8867 }, { "epoch": 85.9867469879518, "grad_norm": 1.6415830850601196, "learning_rate": 6.951456310679612e-06, "loss": 0.1086, "step": 8868 }, { "epoch": 86.00240963855421, "grad_norm": 5.314176082611084, "learning_rate": 6.9466019417475725e-06, "loss": 0.1901, "step": 8869 }, { "epoch": 86.01204819277109, "grad_norm": 6.42833948135376, "learning_rate": 6.941747572815534e-06, "loss": 0.3794, "step": 8870 }, { "epoch": 86.02168674698795, "grad_norm": 6.702212333679199, "learning_rate": 6.9368932038834965e-06, "loss": 0.2094, "step": 8871 }, { "epoch": 86.03132530120482, "grad_norm": 7.220757961273193, "learning_rate": 6.932038834951457e-06, "loss": 0.2746, "step": 8872 }, { "epoch": 86.04096385542168, "grad_norm": 7.150310516357422, "learning_rate": 6.927184466019418e-06, "loss": 0.2259, "step": 8873 }, { "epoch": 86.05060240963856, "grad_norm": 2.1712491512298584, "learning_rate": 6.922330097087379e-06, "loss": 0.1294, "step": 8874 }, { "epoch": 86.06024096385542, "grad_norm": 5.355276107788086, "learning_rate": 6.91747572815534e-06, "loss": 0.247, "step": 8875 }, { "epoch": 86.06987951807228, "grad_norm": 5.143417835235596, "learning_rate": 6.9126213592233005e-06, "loss": 0.2523, "step": 8876 }, { "epoch": 86.07951807228916, "grad_norm": 8.679823875427246, "learning_rate": 6.907766990291263e-06, "loss": 0.2251, "step": 8877 }, { "epoch": 86.08915662650602, "grad_norm": 3.7015349864959717, "learning_rate": 6.902912621359224e-06, "loss": 0.1802, "step": 8878 }, { "epoch": 86.09879518072289, "grad_norm": 5.94582986831665, "learning_rate": 6.898058252427185e-06, "loss": 0.2825, "step": 8879 }, { "epoch": 86.10843373493977, "grad_norm": 3.8063759803771973, "learning_rate": 6.893203883495146e-06, "loss": 0.2992, "step": 8880 }, { "epoch": 86.11807228915663, "grad_norm": 5.60200309753418, "learning_rate": 6.888349514563107e-06, "loss": 0.376, "step": 8881 }, { "epoch": 86.12771084337349, "grad_norm": 10.956771850585938, "learning_rate": 6.883495145631068e-06, "loss": 0.5902, "step": 8882 }, { "epoch": 86.13734939759036, "grad_norm": 6.152043342590332, "learning_rate": 6.8786407766990285e-06, "loss": 0.1082, "step": 8883 }, { "epoch": 86.14698795180723, "grad_norm": 2.359463691711426, "learning_rate": 6.873786407766991e-06, "loss": 0.1732, "step": 8884 }, { "epoch": 86.1566265060241, "grad_norm": 3.9907822608947754, "learning_rate": 6.868932038834952e-06, "loss": 0.2223, "step": 8885 }, { "epoch": 86.16626506024096, "grad_norm": 2.122860908508301, "learning_rate": 6.864077669902913e-06, "loss": 0.0868, "step": 8886 }, { "epoch": 86.17590361445784, "grad_norm": 30.13500213623047, "learning_rate": 6.859223300970874e-06, "loss": 0.3634, "step": 8887 }, { "epoch": 86.1855421686747, "grad_norm": 3.3982932567596436, "learning_rate": 6.854368932038835e-06, "loss": 0.1691, "step": 8888 }, { "epoch": 86.19518072289156, "grad_norm": 4.780479907989502, "learning_rate": 6.849514563106796e-06, "loss": 0.2197, "step": 8889 }, { "epoch": 86.20481927710843, "grad_norm": 4.907381534576416, "learning_rate": 6.844660194174758e-06, "loss": 0.2278, "step": 8890 }, { "epoch": 86.2144578313253, "grad_norm": 2.504526376724243, "learning_rate": 6.839805825242719e-06, "loss": 0.1261, "step": 8891 }, { "epoch": 86.22409638554217, "grad_norm": 18.234067916870117, "learning_rate": 6.83495145631068e-06, "loss": 0.4175, "step": 8892 }, { "epoch": 86.23373493975903, "grad_norm": 6.072651386260986, "learning_rate": 6.830097087378641e-06, "loss": 0.2619, "step": 8893 }, { "epoch": 86.24337349397591, "grad_norm": 6.574750900268555, "learning_rate": 6.825242718446602e-06, "loss": 0.1763, "step": 8894 }, { "epoch": 86.25301204819277, "grad_norm": 3.133511781692505, "learning_rate": 6.820388349514563e-06, "loss": 0.1444, "step": 8895 }, { "epoch": 86.26265060240964, "grad_norm": 11.347007751464844, "learning_rate": 6.815533980582525e-06, "loss": 0.232, "step": 8896 }, { "epoch": 86.2722891566265, "grad_norm": 5.695080280303955, "learning_rate": 6.810679611650486e-06, "loss": 0.3206, "step": 8897 }, { "epoch": 86.28192771084338, "grad_norm": 8.315596580505371, "learning_rate": 6.805825242718447e-06, "loss": 0.31, "step": 8898 }, { "epoch": 86.29156626506024, "grad_norm": 5.469447612762451, "learning_rate": 6.800970873786408e-06, "loss": 0.1669, "step": 8899 }, { "epoch": 86.3012048192771, "grad_norm": 2.770524263381958, "learning_rate": 6.796116504854369e-06, "loss": 0.3262, "step": 8900 }, { "epoch": 86.31084337349398, "grad_norm": 5.183696269989014, "learning_rate": 6.79126213592233e-06, "loss": 0.3271, "step": 8901 }, { "epoch": 86.32048192771084, "grad_norm": 13.29344367980957, "learning_rate": 6.786407766990291e-06, "loss": 0.2656, "step": 8902 }, { "epoch": 86.33012048192771, "grad_norm": 3.936887264251709, "learning_rate": 6.781553398058253e-06, "loss": 0.1393, "step": 8903 }, { "epoch": 86.33975903614459, "grad_norm": 4.6297502517700195, "learning_rate": 6.776699029126214e-06, "loss": 0.3023, "step": 8904 }, { "epoch": 86.34939759036145, "grad_norm": 4.692882061004639, "learning_rate": 6.7718446601941754e-06, "loss": 0.131, "step": 8905 }, { "epoch": 86.35903614457831, "grad_norm": 5.3759002685546875, "learning_rate": 6.766990291262136e-06, "loss": 0.3193, "step": 8906 }, { "epoch": 86.36867469879518, "grad_norm": 7.099549770355225, "learning_rate": 6.762135922330097e-06, "loss": 0.1698, "step": 8907 }, { "epoch": 86.37831325301205, "grad_norm": 1.0912896394729614, "learning_rate": 6.757281553398058e-06, "loss": 0.0686, "step": 8908 }, { "epoch": 86.38795180722892, "grad_norm": 4.71210241317749, "learning_rate": 6.75242718446602e-06, "loss": 0.1746, "step": 8909 }, { "epoch": 86.39759036144578, "grad_norm": 3.8803935050964355, "learning_rate": 6.747572815533981e-06, "loss": 0.2064, "step": 8910 }, { "epoch": 86.40722891566266, "grad_norm": 5.8067402839660645, "learning_rate": 6.742718446601942e-06, "loss": 0.1953, "step": 8911 }, { "epoch": 86.41686746987952, "grad_norm": 18.02000617980957, "learning_rate": 6.7378640776699035e-06, "loss": 0.3959, "step": 8912 }, { "epoch": 86.42650602409638, "grad_norm": 18.879539489746094, "learning_rate": 6.733009708737864e-06, "loss": 0.2978, "step": 8913 }, { "epoch": 86.43614457831325, "grad_norm": 16.380420684814453, "learning_rate": 6.728155339805825e-06, "loss": 0.2841, "step": 8914 }, { "epoch": 86.44578313253012, "grad_norm": 32.65647888183594, "learning_rate": 6.723300970873788e-06, "loss": 0.4679, "step": 8915 }, { "epoch": 86.45542168674699, "grad_norm": 3.9362452030181885, "learning_rate": 6.718446601941748e-06, "loss": 0.146, "step": 8916 }, { "epoch": 86.46506024096385, "grad_norm": 4.510190486907959, "learning_rate": 6.713592233009709e-06, "loss": 0.331, "step": 8917 }, { "epoch": 86.47469879518073, "grad_norm": 5.477273941040039, "learning_rate": 6.70873786407767e-06, "loss": 0.182, "step": 8918 }, { "epoch": 86.48433734939759, "grad_norm": 5.27304744720459, "learning_rate": 6.7038834951456315e-06, "loss": 0.2024, "step": 8919 }, { "epoch": 86.49397590361446, "grad_norm": 9.05268383026123, "learning_rate": 6.699029126213592e-06, "loss": 0.2915, "step": 8920 }, { "epoch": 86.50361445783132, "grad_norm": 5.9119181632995605, "learning_rate": 6.694174757281555e-06, "loss": 0.3069, "step": 8921 }, { "epoch": 86.5132530120482, "grad_norm": 5.363487720489502, "learning_rate": 6.689320388349516e-06, "loss": 0.2105, "step": 8922 }, { "epoch": 86.52289156626506, "grad_norm": 7.183166980743408, "learning_rate": 6.684466019417476e-06, "loss": 0.1676, "step": 8923 }, { "epoch": 86.53253012048192, "grad_norm": 25.862897872924805, "learning_rate": 6.679611650485437e-06, "loss": 0.3523, "step": 8924 }, { "epoch": 86.5421686746988, "grad_norm": 7.738072395324707, "learning_rate": 6.674757281553398e-06, "loss": 0.3327, "step": 8925 }, { "epoch": 86.55180722891566, "grad_norm": 27.00457000732422, "learning_rate": 6.6699029126213595e-06, "loss": 0.224, "step": 8926 }, { "epoch": 86.56144578313253, "grad_norm": 3.6927075386047363, "learning_rate": 6.66504854368932e-06, "loss": 0.1648, "step": 8927 }, { "epoch": 86.5710843373494, "grad_norm": 2.351292133331299, "learning_rate": 6.660194174757283e-06, "loss": 0.1731, "step": 8928 }, { "epoch": 86.58072289156627, "grad_norm": 12.941028594970703, "learning_rate": 6.655339805825243e-06, "loss": 0.1592, "step": 8929 }, { "epoch": 86.59036144578313, "grad_norm": 3.6305992603302, "learning_rate": 6.650485436893204e-06, "loss": 0.2463, "step": 8930 }, { "epoch": 86.6, "grad_norm": 5.262389183044434, "learning_rate": 6.645631067961165e-06, "loss": 0.1696, "step": 8931 }, { "epoch": 86.60963855421687, "grad_norm": 7.612245082855225, "learning_rate": 6.640776699029126e-06, "loss": 0.2993, "step": 8932 }, { "epoch": 86.61927710843374, "grad_norm": 3.028320074081421, "learning_rate": 6.6359223300970875e-06, "loss": 0.2337, "step": 8933 }, { "epoch": 86.6289156626506, "grad_norm": 2.067368745803833, "learning_rate": 6.6310679611650495e-06, "loss": 0.0835, "step": 8934 }, { "epoch": 86.63855421686748, "grad_norm": 7.572112083435059, "learning_rate": 6.626213592233011e-06, "loss": 0.3388, "step": 8935 }, { "epoch": 86.64819277108434, "grad_norm": 3.5790865421295166, "learning_rate": 6.621359223300971e-06, "loss": 0.1434, "step": 8936 }, { "epoch": 86.6578313253012, "grad_norm": 3.7808542251586914, "learning_rate": 6.616504854368932e-06, "loss": 0.1554, "step": 8937 }, { "epoch": 86.66746987951807, "grad_norm": 9.293631553649902, "learning_rate": 6.611650485436893e-06, "loss": 0.3504, "step": 8938 }, { "epoch": 86.67710843373494, "grad_norm": 3.950855016708374, "learning_rate": 6.606796116504854e-06, "loss": 0.153, "step": 8939 }, { "epoch": 86.6867469879518, "grad_norm": 3.5646843910217285, "learning_rate": 6.601941747572816e-06, "loss": 0.2164, "step": 8940 }, { "epoch": 86.69638554216867, "grad_norm": 6.462567329406738, "learning_rate": 6.5970873786407775e-06, "loss": 0.3373, "step": 8941 }, { "epoch": 86.70602409638555, "grad_norm": 2.8064687252044678, "learning_rate": 6.592233009708739e-06, "loss": 0.1338, "step": 8942 }, { "epoch": 86.71566265060241, "grad_norm": 5.532369613647461, "learning_rate": 6.587378640776699e-06, "loss": 0.0985, "step": 8943 }, { "epoch": 86.72530120481927, "grad_norm": 3.7051761150360107, "learning_rate": 6.58252427184466e-06, "loss": 0.327, "step": 8944 }, { "epoch": 86.73493975903614, "grad_norm": 4.8838887214660645, "learning_rate": 6.577669902912621e-06, "loss": 0.4264, "step": 8945 }, { "epoch": 86.74457831325302, "grad_norm": 9.80720329284668, "learning_rate": 6.572815533980582e-06, "loss": 0.3549, "step": 8946 }, { "epoch": 86.75421686746988, "grad_norm": 15.164005279541016, "learning_rate": 6.567961165048544e-06, "loss": 0.1505, "step": 8947 }, { "epoch": 86.76385542168674, "grad_norm": 24.156801223754883, "learning_rate": 6.5631067961165056e-06, "loss": 0.2874, "step": 8948 }, { "epoch": 86.77349397590362, "grad_norm": 2.4646553993225098, "learning_rate": 6.558252427184467e-06, "loss": 0.083, "step": 8949 }, { "epoch": 86.78313253012048, "grad_norm": 2.0680692195892334, "learning_rate": 6.553398058252427e-06, "loss": 0.1556, "step": 8950 }, { "epoch": 86.79277108433735, "grad_norm": 10.10271167755127, "learning_rate": 6.548543689320388e-06, "loss": 0.2992, "step": 8951 }, { "epoch": 86.80240963855422, "grad_norm": 4.839662551879883, "learning_rate": 6.543689320388349e-06, "loss": 0.1439, "step": 8952 }, { "epoch": 86.81204819277109, "grad_norm": 3.013401508331299, "learning_rate": 6.538834951456311e-06, "loss": 0.1696, "step": 8953 }, { "epoch": 86.82168674698795, "grad_norm": 3.8791537284851074, "learning_rate": 6.5339805825242724e-06, "loss": 0.2884, "step": 8954 }, { "epoch": 86.83132530120481, "grad_norm": 2.525352954864502, "learning_rate": 6.5291262135922336e-06, "loss": 0.1458, "step": 8955 }, { "epoch": 86.84096385542169, "grad_norm": 5.784839153289795, "learning_rate": 6.524271844660195e-06, "loss": 0.2531, "step": 8956 }, { "epoch": 86.85060240963855, "grad_norm": 14.42796516418457, "learning_rate": 6.519417475728155e-06, "loss": 0.2539, "step": 8957 }, { "epoch": 86.86024096385542, "grad_norm": 6.860935688018799, "learning_rate": 6.514563106796116e-06, "loss": 0.2514, "step": 8958 }, { "epoch": 86.8698795180723, "grad_norm": 4.940143585205078, "learning_rate": 6.509708737864079e-06, "loss": 0.1968, "step": 8959 }, { "epoch": 86.87951807228916, "grad_norm": 38.24976348876953, "learning_rate": 6.504854368932039e-06, "loss": 0.2204, "step": 8960 }, { "epoch": 86.88915662650602, "grad_norm": 4.018143653869629, "learning_rate": 6.5000000000000004e-06, "loss": 0.2509, "step": 8961 }, { "epoch": 86.89879518072289, "grad_norm": 2.5388336181640625, "learning_rate": 6.495145631067962e-06, "loss": 0.1659, "step": 8962 }, { "epoch": 86.90843373493976, "grad_norm": 2.0487210750579834, "learning_rate": 6.490291262135923e-06, "loss": 0.1624, "step": 8963 }, { "epoch": 86.91807228915663, "grad_norm": 6.092740535736084, "learning_rate": 6.485436893203883e-06, "loss": 0.3638, "step": 8964 }, { "epoch": 86.92771084337349, "grad_norm": 8.223438262939453, "learning_rate": 6.480582524271844e-06, "loss": 0.3445, "step": 8965 }, { "epoch": 86.93734939759037, "grad_norm": 6.709930419921875, "learning_rate": 6.475728155339807e-06, "loss": 0.2294, "step": 8966 }, { "epoch": 86.94698795180723, "grad_norm": 3.9794793128967285, "learning_rate": 6.470873786407767e-06, "loss": 0.209, "step": 8967 }, { "epoch": 86.9566265060241, "grad_norm": 4.993165969848633, "learning_rate": 6.4660194174757285e-06, "loss": 0.2062, "step": 8968 }, { "epoch": 86.96626506024096, "grad_norm": 4.3853678703308105, "learning_rate": 6.46116504854369e-06, "loss": 0.1533, "step": 8969 }, { "epoch": 86.97590361445783, "grad_norm": 8.149925231933594, "learning_rate": 6.456310679611651e-06, "loss": 0.3204, "step": 8970 }, { "epoch": 86.9855421686747, "grad_norm": 5.171445846557617, "learning_rate": 6.451456310679611e-06, "loss": 0.2523, "step": 8971 }, { "epoch": 87.0012048192771, "grad_norm": 5.785940647125244, "learning_rate": 6.446601941747574e-06, "loss": 0.2738, "step": 8972 }, { "epoch": 87.01084337349397, "grad_norm": 4.690576553344727, "learning_rate": 6.441747572815535e-06, "loss": 0.1629, "step": 8973 }, { "epoch": 87.02048192771085, "grad_norm": 9.391647338867188, "learning_rate": 6.436893203883495e-06, "loss": 0.4181, "step": 8974 }, { "epoch": 87.03012048192771, "grad_norm": 4.562902927398682, "learning_rate": 6.4320388349514565e-06, "loss": 0.2106, "step": 8975 }, { "epoch": 87.03975903614457, "grad_norm": 7.509241580963135, "learning_rate": 6.427184466019418e-06, "loss": 0.2895, "step": 8976 }, { "epoch": 87.04939759036145, "grad_norm": 6.358453273773193, "learning_rate": 6.422330097087379e-06, "loss": 0.4024, "step": 8977 }, { "epoch": 87.05903614457831, "grad_norm": 2.561396837234497, "learning_rate": 6.417475728155341e-06, "loss": 0.0902, "step": 8978 }, { "epoch": 87.06867469879518, "grad_norm": 6.533747673034668, "learning_rate": 6.412621359223302e-06, "loss": 0.0906, "step": 8979 }, { "epoch": 87.07831325301204, "grad_norm": 5.207728862762451, "learning_rate": 6.407766990291263e-06, "loss": 0.2687, "step": 8980 }, { "epoch": 87.08795180722892, "grad_norm": 8.636122703552246, "learning_rate": 6.402912621359223e-06, "loss": 0.3338, "step": 8981 }, { "epoch": 87.09759036144578, "grad_norm": 4.482191562652588, "learning_rate": 6.3980582524271845e-06, "loss": 0.185, "step": 8982 }, { "epoch": 87.10722891566265, "grad_norm": 9.203112602233887, "learning_rate": 6.393203883495146e-06, "loss": 0.4139, "step": 8983 }, { "epoch": 87.11686746987952, "grad_norm": 4.7183332443237305, "learning_rate": 6.388349514563107e-06, "loss": 0.2261, "step": 8984 }, { "epoch": 87.12650602409639, "grad_norm": 4.247933864593506, "learning_rate": 6.383495145631069e-06, "loss": 0.2896, "step": 8985 }, { "epoch": 87.13614457831325, "grad_norm": 3.0488052368164062, "learning_rate": 6.37864077669903e-06, "loss": 0.0809, "step": 8986 }, { "epoch": 87.14578313253013, "grad_norm": 14.243282318115234, "learning_rate": 6.37378640776699e-06, "loss": 0.26, "step": 8987 }, { "epoch": 87.15542168674699, "grad_norm": 2.9737231731414795, "learning_rate": 6.368932038834951e-06, "loss": 0.0967, "step": 8988 }, { "epoch": 87.16506024096385, "grad_norm": 2.6860713958740234, "learning_rate": 6.3640776699029125e-06, "loss": 0.2405, "step": 8989 }, { "epoch": 87.17469879518072, "grad_norm": 9.216546058654785, "learning_rate": 6.359223300970874e-06, "loss": 0.2395, "step": 8990 }, { "epoch": 87.1843373493976, "grad_norm": 6.471457481384277, "learning_rate": 6.354368932038836e-06, "loss": 0.2738, "step": 8991 }, { "epoch": 87.19397590361446, "grad_norm": 2.2775282859802246, "learning_rate": 6.349514563106797e-06, "loss": 0.1591, "step": 8992 }, { "epoch": 87.20361445783132, "grad_norm": 1.9471466541290283, "learning_rate": 6.344660194174758e-06, "loss": 0.1493, "step": 8993 }, { "epoch": 87.2132530120482, "grad_norm": 10.213122367858887, "learning_rate": 6.339805825242718e-06, "loss": 0.2227, "step": 8994 }, { "epoch": 87.22289156626506, "grad_norm": 2.4427969455718994, "learning_rate": 6.334951456310679e-06, "loss": 0.1713, "step": 8995 }, { "epoch": 87.23253012048193, "grad_norm": 3.516077756881714, "learning_rate": 6.3300970873786405e-06, "loss": 0.1765, "step": 8996 }, { "epoch": 87.24216867469879, "grad_norm": 6.845458507537842, "learning_rate": 6.3252427184466025e-06, "loss": 0.3658, "step": 8997 }, { "epoch": 87.25180722891567, "grad_norm": 2.3005354404449463, "learning_rate": 6.320388349514564e-06, "loss": 0.159, "step": 8998 }, { "epoch": 87.26144578313253, "grad_norm": 10.77999210357666, "learning_rate": 6.315533980582525e-06, "loss": 0.3709, "step": 8999 }, { "epoch": 87.2710843373494, "grad_norm": 9.434168815612793, "learning_rate": 6.310679611650486e-06, "loss": 0.2052, "step": 9000 }, { "epoch": 87.28072289156627, "grad_norm": 4.619119167327881, "learning_rate": 6.305825242718446e-06, "loss": 0.2221, "step": 9001 }, { "epoch": 87.29036144578313, "grad_norm": 9.615081787109375, "learning_rate": 6.300970873786407e-06, "loss": 0.3754, "step": 9002 }, { "epoch": 87.3, "grad_norm": 8.674799919128418, "learning_rate": 6.2961165048543686e-06, "loss": 0.2965, "step": 9003 }, { "epoch": 87.30963855421686, "grad_norm": 6.399058818817139, "learning_rate": 6.2912621359223306e-06, "loss": 0.2223, "step": 9004 }, { "epoch": 87.31927710843374, "grad_norm": 2.3215456008911133, "learning_rate": 6.286407766990292e-06, "loss": 0.1429, "step": 9005 }, { "epoch": 87.3289156626506, "grad_norm": 7.3313212394714355, "learning_rate": 6.281553398058253e-06, "loss": 0.2933, "step": 9006 }, { "epoch": 87.33855421686746, "grad_norm": 6.631053447723389, "learning_rate": 6.276699029126214e-06, "loss": 0.3494, "step": 9007 }, { "epoch": 87.34819277108434, "grad_norm": 4.30879020690918, "learning_rate": 6.271844660194174e-06, "loss": 0.2675, "step": 9008 }, { "epoch": 87.3578313253012, "grad_norm": 20.42593765258789, "learning_rate": 6.2669902912621354e-06, "loss": 0.4722, "step": 9009 }, { "epoch": 87.36746987951807, "grad_norm": 13.665727615356445, "learning_rate": 6.262135922330098e-06, "loss": 0.1593, "step": 9010 }, { "epoch": 87.37710843373495, "grad_norm": 3.8755311965942383, "learning_rate": 6.257281553398059e-06, "loss": 0.2328, "step": 9011 }, { "epoch": 87.38674698795181, "grad_norm": 6.981295108795166, "learning_rate": 6.25242718446602e-06, "loss": 0.3661, "step": 9012 }, { "epoch": 87.39638554216867, "grad_norm": 3.7328250408172607, "learning_rate": 6.247572815533981e-06, "loss": 0.1482, "step": 9013 }, { "epoch": 87.40602409638554, "grad_norm": 5.913026332855225, "learning_rate": 6.242718446601942e-06, "loss": 0.4159, "step": 9014 }, { "epoch": 87.41566265060241, "grad_norm": 3.5817508697509766, "learning_rate": 6.237864077669903e-06, "loss": 0.1926, "step": 9015 }, { "epoch": 87.42530120481928, "grad_norm": 3.778346300125122, "learning_rate": 6.233009708737864e-06, "loss": 0.0628, "step": 9016 }, { "epoch": 87.43493975903614, "grad_norm": 8.251323699951172, "learning_rate": 6.2281553398058255e-06, "loss": 0.1954, "step": 9017 }, { "epoch": 87.44457831325302, "grad_norm": 9.398868560791016, "learning_rate": 6.223300970873787e-06, "loss": 0.2742, "step": 9018 }, { "epoch": 87.45421686746988, "grad_norm": 5.8313212394714355, "learning_rate": 6.218446601941748e-06, "loss": 0.3597, "step": 9019 }, { "epoch": 87.46385542168674, "grad_norm": 1.4158661365509033, "learning_rate": 6.213592233009709e-06, "loss": 0.0684, "step": 9020 }, { "epoch": 87.47349397590361, "grad_norm": 5.8169121742248535, "learning_rate": 6.20873786407767e-06, "loss": 0.2751, "step": 9021 }, { "epoch": 87.48313253012049, "grad_norm": 12.401540756225586, "learning_rate": 6.203883495145631e-06, "loss": 0.3266, "step": 9022 }, { "epoch": 87.49277108433735, "grad_norm": 7.964782238006592, "learning_rate": 6.199029126213592e-06, "loss": 0.3908, "step": 9023 }, { "epoch": 87.50240963855421, "grad_norm": 2.190201759338379, "learning_rate": 6.194174757281554e-06, "loss": 0.1663, "step": 9024 }, { "epoch": 87.51204819277109, "grad_norm": 4.943521499633789, "learning_rate": 6.189320388349515e-06, "loss": 0.1763, "step": 9025 }, { "epoch": 87.52168674698795, "grad_norm": 3.295660972595215, "learning_rate": 6.184466019417476e-06, "loss": 0.1613, "step": 9026 }, { "epoch": 87.53132530120482, "grad_norm": 1.9818321466445923, "learning_rate": 6.179611650485438e-06, "loss": 0.179, "step": 9027 }, { "epoch": 87.54096385542168, "grad_norm": 3.2409849166870117, "learning_rate": 6.174757281553398e-06, "loss": 0.1898, "step": 9028 }, { "epoch": 87.55060240963856, "grad_norm": 7.844687461853027, "learning_rate": 6.169902912621359e-06, "loss": 0.2604, "step": 9029 }, { "epoch": 87.56024096385542, "grad_norm": 5.6587700843811035, "learning_rate": 6.165048543689321e-06, "loss": 0.1901, "step": 9030 }, { "epoch": 87.56987951807228, "grad_norm": 5.476395130157471, "learning_rate": 6.160194174757282e-06, "loss": 0.1757, "step": 9031 }, { "epoch": 87.57951807228916, "grad_norm": 4.759237289428711, "learning_rate": 6.155339805825243e-06, "loss": 0.265, "step": 9032 }, { "epoch": 87.58915662650602, "grad_norm": 8.178153038024902, "learning_rate": 6.150485436893204e-06, "loss": 0.1976, "step": 9033 }, { "epoch": 87.59879518072289, "grad_norm": 3.535121440887451, "learning_rate": 6.145631067961166e-06, "loss": 0.2588, "step": 9034 }, { "epoch": 87.60843373493977, "grad_norm": 2.6181631088256836, "learning_rate": 6.140776699029126e-06, "loss": 0.1516, "step": 9035 }, { "epoch": 87.61807228915663, "grad_norm": 5.099794864654541, "learning_rate": 6.135922330097087e-06, "loss": 0.3303, "step": 9036 }, { "epoch": 87.62771084337349, "grad_norm": 2.30679988861084, "learning_rate": 6.131067961165049e-06, "loss": 0.0837, "step": 9037 }, { "epoch": 87.63734939759036, "grad_norm": 1.873969316482544, "learning_rate": 6.12621359223301e-06, "loss": 0.048, "step": 9038 }, { "epoch": 87.64698795180723, "grad_norm": 2.7509469985961914, "learning_rate": 6.121359223300971e-06, "loss": 0.0979, "step": 9039 }, { "epoch": 87.6566265060241, "grad_norm": 12.593734741210938, "learning_rate": 6.116504854368933e-06, "loss": 0.3182, "step": 9040 }, { "epoch": 87.66626506024096, "grad_norm": 6.584641933441162, "learning_rate": 6.111650485436894e-06, "loss": 0.3279, "step": 9041 }, { "epoch": 87.67590361445784, "grad_norm": 4.140229225158691, "learning_rate": 6.106796116504854e-06, "loss": 0.2228, "step": 9042 }, { "epoch": 87.6855421686747, "grad_norm": 23.643131256103516, "learning_rate": 6.101941747572816e-06, "loss": 0.2506, "step": 9043 }, { "epoch": 87.69518072289156, "grad_norm": 3.449925184249878, "learning_rate": 6.097087378640777e-06, "loss": 0.2321, "step": 9044 }, { "epoch": 87.70481927710843, "grad_norm": 12.032380104064941, "learning_rate": 6.0922330097087375e-06, "loss": 0.3068, "step": 9045 }, { "epoch": 87.7144578313253, "grad_norm": 6.319117069244385, "learning_rate": 6.0873786407766995e-06, "loss": 0.0973, "step": 9046 }, { "epoch": 87.72409638554217, "grad_norm": 8.64385986328125, "learning_rate": 6.082524271844661e-06, "loss": 0.3858, "step": 9047 }, { "epoch": 87.73373493975903, "grad_norm": 6.5997114181518555, "learning_rate": 6.077669902912622e-06, "loss": 0.2816, "step": 9048 }, { "epoch": 87.74337349397591, "grad_norm": 4.121993064880371, "learning_rate": 6.072815533980583e-06, "loss": 0.2534, "step": 9049 }, { "epoch": 87.75301204819277, "grad_norm": 4.484282970428467, "learning_rate": 6.067961165048544e-06, "loss": 0.2166, "step": 9050 }, { "epoch": 87.76265060240964, "grad_norm": 7.037291526794434, "learning_rate": 6.063106796116505e-06, "loss": 0.3995, "step": 9051 }, { "epoch": 87.7722891566265, "grad_norm": 5.988597393035889, "learning_rate": 6.058252427184466e-06, "loss": 0.2913, "step": 9052 }, { "epoch": 87.78192771084338, "grad_norm": 18.052425384521484, "learning_rate": 6.0533980582524275e-06, "loss": 0.2787, "step": 9053 }, { "epoch": 87.79156626506024, "grad_norm": 19.296913146972656, "learning_rate": 6.048543689320389e-06, "loss": 0.2453, "step": 9054 }, { "epoch": 87.8012048192771, "grad_norm": 3.997375249862671, "learning_rate": 6.04368932038835e-06, "loss": 0.2034, "step": 9055 }, { "epoch": 87.81084337349398, "grad_norm": 3.5719330310821533, "learning_rate": 6.038834951456311e-06, "loss": 0.2327, "step": 9056 }, { "epoch": 87.82048192771084, "grad_norm": 6.842221260070801, "learning_rate": 6.033980582524272e-06, "loss": 0.3335, "step": 9057 }, { "epoch": 87.83012048192771, "grad_norm": 5.100204944610596, "learning_rate": 6.029126213592233e-06, "loss": 0.2641, "step": 9058 }, { "epoch": 87.83975903614459, "grad_norm": 2.0833160877227783, "learning_rate": 6.024271844660194e-06, "loss": 0.1985, "step": 9059 }, { "epoch": 87.84939759036145, "grad_norm": 3.187932252883911, "learning_rate": 6.0194174757281556e-06, "loss": 0.1305, "step": 9060 }, { "epoch": 87.85903614457831, "grad_norm": 6.146856307983398, "learning_rate": 6.014563106796117e-06, "loss": 0.2658, "step": 9061 }, { "epoch": 87.86867469879518, "grad_norm": 6.11286735534668, "learning_rate": 6.009708737864078e-06, "loss": 0.1544, "step": 9062 }, { "epoch": 87.87831325301205, "grad_norm": 17.051403045654297, "learning_rate": 6.004854368932039e-06, "loss": 0.2172, "step": 9063 }, { "epoch": 87.88795180722892, "grad_norm": 3.4392683506011963, "learning_rate": 6e-06, "loss": 0.2644, "step": 9064 }, { "epoch": 87.89759036144578, "grad_norm": 7.287097454071045, "learning_rate": 5.995145631067961e-06, "loss": 0.3106, "step": 9065 }, { "epoch": 87.90722891566266, "grad_norm": 3.5874907970428467, "learning_rate": 5.9902912621359224e-06, "loss": 0.3763, "step": 9066 }, { "epoch": 87.91686746987952, "grad_norm": 16.95748519897461, "learning_rate": 5.985436893203884e-06, "loss": 0.1832, "step": 9067 }, { "epoch": 87.92650602409638, "grad_norm": 2.738504409790039, "learning_rate": 5.980582524271846e-06, "loss": 0.3231, "step": 9068 }, { "epoch": 87.93614457831325, "grad_norm": 5.306879043579102, "learning_rate": 5.975728155339806e-06, "loss": 0.2309, "step": 9069 }, { "epoch": 87.94578313253012, "grad_norm": 4.421239852905273, "learning_rate": 5.970873786407767e-06, "loss": 0.2601, "step": 9070 }, { "epoch": 87.95542168674699, "grad_norm": 12.111988067626953, "learning_rate": 5.966019417475729e-06, "loss": 0.1538, "step": 9071 }, { "epoch": 87.96506024096385, "grad_norm": 2.9292287826538086, "learning_rate": 5.961165048543689e-06, "loss": 0.1661, "step": 9072 }, { "epoch": 87.97469879518073, "grad_norm": 12.379206657409668, "learning_rate": 5.9563106796116505e-06, "loss": 0.1338, "step": 9073 }, { "epoch": 87.98433734939759, "grad_norm": 2.4304020404815674, "learning_rate": 5.951456310679612e-06, "loss": 0.1048, "step": 9074 }, { "epoch": 87.99397590361446, "grad_norm": 5.398331165313721, "learning_rate": 5.946601941747574e-06, "loss": 0.1947, "step": 9075 }, { "epoch": 88.00963855421686, "grad_norm": 5.54235315322876, "learning_rate": 5.941747572815534e-06, "loss": 0.1549, "step": 9076 }, { "epoch": 88.01927710843374, "grad_norm": 5.334202289581299, "learning_rate": 5.936893203883495e-06, "loss": 0.3034, "step": 9077 }, { "epoch": 88.0289156626506, "grad_norm": 3.8757901191711426, "learning_rate": 5.932038834951457e-06, "loss": 0.151, "step": 9078 }, { "epoch": 88.03855421686747, "grad_norm": 7.17609977722168, "learning_rate": 5.927184466019417e-06, "loss": 0.3414, "step": 9079 }, { "epoch": 88.04819277108433, "grad_norm": 4.132307052612305, "learning_rate": 5.9223300970873785e-06, "loss": 0.3255, "step": 9080 }, { "epoch": 88.05783132530121, "grad_norm": 3.0744688510894775, "learning_rate": 5.9174757281553405e-06, "loss": 0.2823, "step": 9081 }, { "epoch": 88.06746987951807, "grad_norm": 3.677422285079956, "learning_rate": 5.912621359223302e-06, "loss": 0.4302, "step": 9082 }, { "epoch": 88.07710843373494, "grad_norm": 3.658198833465576, "learning_rate": 5.907766990291262e-06, "loss": 0.294, "step": 9083 }, { "epoch": 88.08674698795181, "grad_norm": 3.530280351638794, "learning_rate": 5.902912621359224e-06, "loss": 0.2029, "step": 9084 }, { "epoch": 88.09638554216868, "grad_norm": 6.355876445770264, "learning_rate": 5.898058252427185e-06, "loss": 0.3546, "step": 9085 }, { "epoch": 88.10602409638554, "grad_norm": 3.7112767696380615, "learning_rate": 5.893203883495145e-06, "loss": 0.323, "step": 9086 }, { "epoch": 88.1156626506024, "grad_norm": 10.556384086608887, "learning_rate": 5.888349514563107e-06, "loss": 0.4889, "step": 9087 }, { "epoch": 88.12530120481928, "grad_norm": 6.760700702667236, "learning_rate": 5.8834951456310685e-06, "loss": 0.3854, "step": 9088 }, { "epoch": 88.13493975903614, "grad_norm": 3.2760605812072754, "learning_rate": 5.87864077669903e-06, "loss": 0.2147, "step": 9089 }, { "epoch": 88.144578313253, "grad_norm": 4.609577655792236, "learning_rate": 5.873786407766991e-06, "loss": 0.2098, "step": 9090 }, { "epoch": 88.15421686746988, "grad_norm": 5.463103294372559, "learning_rate": 5.868932038834952e-06, "loss": 0.223, "step": 9091 }, { "epoch": 88.16385542168675, "grad_norm": 5.834300994873047, "learning_rate": 5.864077669902913e-06, "loss": 0.3838, "step": 9092 }, { "epoch": 88.17349397590361, "grad_norm": 5.3406805992126465, "learning_rate": 5.859223300970874e-06, "loss": 0.1344, "step": 9093 }, { "epoch": 88.18313253012049, "grad_norm": 2.8382184505462646, "learning_rate": 5.854368932038835e-06, "loss": 0.141, "step": 9094 }, { "epoch": 88.19277108433735, "grad_norm": 2.1841745376586914, "learning_rate": 5.8495145631067965e-06, "loss": 0.1377, "step": 9095 }, { "epoch": 88.20240963855422, "grad_norm": 8.44522762298584, "learning_rate": 5.844660194174758e-06, "loss": 0.3515, "step": 9096 }, { "epoch": 88.21204819277108, "grad_norm": 3.8557181358337402, "learning_rate": 5.839805825242719e-06, "loss": 0.1073, "step": 9097 }, { "epoch": 88.22168674698796, "grad_norm": 11.606746673583984, "learning_rate": 5.83495145631068e-06, "loss": 0.3121, "step": 9098 }, { "epoch": 88.23132530120482, "grad_norm": 7.048441410064697, "learning_rate": 5.830097087378641e-06, "loss": 0.2074, "step": 9099 }, { "epoch": 88.24096385542168, "grad_norm": 7.6677565574646, "learning_rate": 5.825242718446602e-06, "loss": 0.4004, "step": 9100 }, { "epoch": 88.25060240963856, "grad_norm": 5.216861724853516, "learning_rate": 5.820388349514563e-06, "loss": 0.3089, "step": 9101 }, { "epoch": 88.26024096385542, "grad_norm": 5.0423455238342285, "learning_rate": 5.8155339805825245e-06, "loss": 0.1254, "step": 9102 }, { "epoch": 88.26987951807229, "grad_norm": 7.5503644943237305, "learning_rate": 5.810679611650486e-06, "loss": 0.3805, "step": 9103 }, { "epoch": 88.27951807228915, "grad_norm": 3.198451280593872, "learning_rate": 5.805825242718447e-06, "loss": 0.2723, "step": 9104 }, { "epoch": 88.28915662650603, "grad_norm": 5.247959136962891, "learning_rate": 5.800970873786408e-06, "loss": 0.2104, "step": 9105 }, { "epoch": 88.29879518072289, "grad_norm": 5.393301963806152, "learning_rate": 5.796116504854369e-06, "loss": 0.248, "step": 9106 }, { "epoch": 88.30843373493975, "grad_norm": 6.089303016662598, "learning_rate": 5.79126213592233e-06, "loss": 0.2783, "step": 9107 }, { "epoch": 88.31807228915663, "grad_norm": 5.194924831390381, "learning_rate": 5.786407766990291e-06, "loss": 0.1891, "step": 9108 }, { "epoch": 88.3277108433735, "grad_norm": 6.1032938957214355, "learning_rate": 5.781553398058253e-06, "loss": 0.3499, "step": 9109 }, { "epoch": 88.33734939759036, "grad_norm": 4.723722457885742, "learning_rate": 5.776699029126214e-06, "loss": 0.205, "step": 9110 }, { "epoch": 88.34698795180722, "grad_norm": 4.7219624519348145, "learning_rate": 5.771844660194175e-06, "loss": 0.3475, "step": 9111 }, { "epoch": 88.3566265060241, "grad_norm": 4.986632347106934, "learning_rate": 5.766990291262137e-06, "loss": 0.2368, "step": 9112 }, { "epoch": 88.36626506024096, "grad_norm": 5.021252632141113, "learning_rate": 5.762135922330097e-06, "loss": 0.3717, "step": 9113 }, { "epoch": 88.37590361445783, "grad_norm": 4.492123126983643, "learning_rate": 5.757281553398058e-06, "loss": 0.216, "step": 9114 }, { "epoch": 88.3855421686747, "grad_norm": 3.544266700744629, "learning_rate": 5.7524271844660194e-06, "loss": 0.1538, "step": 9115 }, { "epoch": 88.39518072289157, "grad_norm": 2.0965514183044434, "learning_rate": 5.747572815533981e-06, "loss": 0.0774, "step": 9116 }, { "epoch": 88.40481927710843, "grad_norm": 6.741760730743408, "learning_rate": 5.742718446601942e-06, "loss": 0.2901, "step": 9117 }, { "epoch": 88.41445783132531, "grad_norm": 3.428051233291626, "learning_rate": 5.737864077669903e-06, "loss": 0.2617, "step": 9118 }, { "epoch": 88.42409638554217, "grad_norm": 9.948054313659668, "learning_rate": 5.733009708737865e-06, "loss": 0.4077, "step": 9119 }, { "epoch": 88.43373493975903, "grad_norm": 2.740189552307129, "learning_rate": 5.728155339805825e-06, "loss": 0.0777, "step": 9120 }, { "epoch": 88.4433734939759, "grad_norm": 6.332986831665039, "learning_rate": 5.723300970873786e-06, "loss": 0.3788, "step": 9121 }, { "epoch": 88.45301204819278, "grad_norm": 5.521244049072266, "learning_rate": 5.718446601941748e-06, "loss": 0.2854, "step": 9122 }, { "epoch": 88.46265060240964, "grad_norm": 9.75178050994873, "learning_rate": 5.713592233009709e-06, "loss": 0.2158, "step": 9123 }, { "epoch": 88.4722891566265, "grad_norm": 2.4658894538879395, "learning_rate": 5.70873786407767e-06, "loss": 0.139, "step": 9124 }, { "epoch": 88.48192771084338, "grad_norm": 6.006382942199707, "learning_rate": 5.703883495145632e-06, "loss": 0.2595, "step": 9125 }, { "epoch": 88.49156626506024, "grad_norm": 11.29118537902832, "learning_rate": 5.699029126213593e-06, "loss": 0.3058, "step": 9126 }, { "epoch": 88.5012048192771, "grad_norm": 8.546773910522461, "learning_rate": 5.694174757281553e-06, "loss": 0.2982, "step": 9127 }, { "epoch": 88.51084337349397, "grad_norm": 6.365299701690674, "learning_rate": 5.689320388349515e-06, "loss": 0.3206, "step": 9128 }, { "epoch": 88.52048192771085, "grad_norm": 2.4710745811462402, "learning_rate": 5.684466019417476e-06, "loss": 0.1226, "step": 9129 }, { "epoch": 88.53012048192771, "grad_norm": 5.906626224517822, "learning_rate": 5.679611650485437e-06, "loss": 0.1628, "step": 9130 }, { "epoch": 88.53975903614457, "grad_norm": 3.324169874191284, "learning_rate": 5.674757281553399e-06, "loss": 0.1836, "step": 9131 }, { "epoch": 88.54939759036145, "grad_norm": 5.500977993011475, "learning_rate": 5.66990291262136e-06, "loss": 0.1516, "step": 9132 }, { "epoch": 88.55903614457831, "grad_norm": 4.32704496383667, "learning_rate": 5.665048543689321e-06, "loss": 0.1209, "step": 9133 }, { "epoch": 88.56867469879518, "grad_norm": 5.157227516174316, "learning_rate": 5.660194174757282e-06, "loss": 0.1698, "step": 9134 }, { "epoch": 88.57831325301204, "grad_norm": 2.18861722946167, "learning_rate": 5.655339805825243e-06, "loss": 0.0785, "step": 9135 }, { "epoch": 88.58795180722892, "grad_norm": 9.501405715942383, "learning_rate": 5.650485436893204e-06, "loss": 0.3439, "step": 9136 }, { "epoch": 88.59759036144578, "grad_norm": 5.1530914306640625, "learning_rate": 5.645631067961165e-06, "loss": 0.2475, "step": 9137 }, { "epoch": 88.60722891566265, "grad_norm": 4.190701484680176, "learning_rate": 5.640776699029127e-06, "loss": 0.2098, "step": 9138 }, { "epoch": 88.61686746987952, "grad_norm": 5.059057712554932, "learning_rate": 5.635922330097088e-06, "loss": 0.1302, "step": 9139 }, { "epoch": 88.62650602409639, "grad_norm": 6.365688800811768, "learning_rate": 5.631067961165049e-06, "loss": 0.3874, "step": 9140 }, { "epoch": 88.63614457831325, "grad_norm": 3.4552934169769287, "learning_rate": 5.62621359223301e-06, "loss": 0.1647, "step": 9141 }, { "epoch": 88.64578313253013, "grad_norm": 4.05595064163208, "learning_rate": 5.621359223300971e-06, "loss": 0.1933, "step": 9142 }, { "epoch": 88.65542168674699, "grad_norm": 2.8980352878570557, "learning_rate": 5.616504854368932e-06, "loss": 0.2391, "step": 9143 }, { "epoch": 88.66506024096385, "grad_norm": 5.411526203155518, "learning_rate": 5.6116504854368935e-06, "loss": 0.2052, "step": 9144 }, { "epoch": 88.67469879518072, "grad_norm": 5.162094593048096, "learning_rate": 5.606796116504855e-06, "loss": 0.2401, "step": 9145 }, { "epoch": 88.6843373493976, "grad_norm": 9.403631210327148, "learning_rate": 5.601941747572816e-06, "loss": 0.2066, "step": 9146 }, { "epoch": 88.69397590361446, "grad_norm": 5.5153326988220215, "learning_rate": 5.597087378640777e-06, "loss": 0.2145, "step": 9147 }, { "epoch": 88.70361445783132, "grad_norm": 5.879195690155029, "learning_rate": 5.592233009708738e-06, "loss": 0.2575, "step": 9148 }, { "epoch": 88.7132530120482, "grad_norm": 6.967136383056641, "learning_rate": 5.587378640776699e-06, "loss": 0.2359, "step": 9149 }, { "epoch": 88.72289156626506, "grad_norm": 9.078034400939941, "learning_rate": 5.58252427184466e-06, "loss": 0.313, "step": 9150 }, { "epoch": 88.73253012048193, "grad_norm": 4.7850542068481445, "learning_rate": 5.5776699029126215e-06, "loss": 0.2285, "step": 9151 }, { "epoch": 88.74216867469879, "grad_norm": 6.549684047698975, "learning_rate": 5.572815533980583e-06, "loss": 0.2499, "step": 9152 }, { "epoch": 88.75180722891567, "grad_norm": 3.1963396072387695, "learning_rate": 5.567961165048545e-06, "loss": 0.1032, "step": 9153 }, { "epoch": 88.76144578313253, "grad_norm": 5.0193023681640625, "learning_rate": 5.563106796116505e-06, "loss": 0.2178, "step": 9154 }, { "epoch": 88.7710843373494, "grad_norm": 3.774132490158081, "learning_rate": 5.558252427184466e-06, "loss": 0.2244, "step": 9155 }, { "epoch": 88.78072289156627, "grad_norm": 5.805391788482666, "learning_rate": 5.553398058252427e-06, "loss": 0.3708, "step": 9156 }, { "epoch": 88.79036144578313, "grad_norm": 2.814786672592163, "learning_rate": 5.548543689320388e-06, "loss": 0.2042, "step": 9157 }, { "epoch": 88.8, "grad_norm": 3.407651901245117, "learning_rate": 5.5436893203883495e-06, "loss": 0.142, "step": 9158 }, { "epoch": 88.80963855421686, "grad_norm": 5.4529643058776855, "learning_rate": 5.538834951456311e-06, "loss": 0.1938, "step": 9159 }, { "epoch": 88.81927710843374, "grad_norm": 3.446589469909668, "learning_rate": 5.533980582524273e-06, "loss": 0.1569, "step": 9160 }, { "epoch": 88.8289156626506, "grad_norm": 6.065042495727539, "learning_rate": 5.529126213592233e-06, "loss": 0.2073, "step": 9161 }, { "epoch": 88.83855421686746, "grad_norm": 3.517559051513672, "learning_rate": 5.524271844660194e-06, "loss": 0.1864, "step": 9162 }, { "epoch": 88.84819277108434, "grad_norm": 4.280221939086914, "learning_rate": 5.519417475728156e-06, "loss": 0.2156, "step": 9163 }, { "epoch": 88.8578313253012, "grad_norm": 15.359978675842285, "learning_rate": 5.514563106796116e-06, "loss": 0.3042, "step": 9164 }, { "epoch": 88.86746987951807, "grad_norm": 5.277548313140869, "learning_rate": 5.5097087378640776e-06, "loss": 0.2426, "step": 9165 }, { "epoch": 88.87710843373495, "grad_norm": 8.335959434509277, "learning_rate": 5.5048543689320396e-06, "loss": 0.2112, "step": 9166 }, { "epoch": 88.88674698795181, "grad_norm": 3.883964776992798, "learning_rate": 5.500000000000001e-06, "loss": 0.2231, "step": 9167 }, { "epoch": 88.89638554216867, "grad_norm": 6.010040283203125, "learning_rate": 5.495145631067961e-06, "loss": 0.2831, "step": 9168 }, { "epoch": 88.90602409638554, "grad_norm": 5.708950519561768, "learning_rate": 5.490291262135923e-06, "loss": 0.2934, "step": 9169 }, { "epoch": 88.91566265060241, "grad_norm": 2.03997540473938, "learning_rate": 5.485436893203884e-06, "loss": 0.0924, "step": 9170 }, { "epoch": 88.92530120481928, "grad_norm": 10.323740005493164, "learning_rate": 5.4805825242718444e-06, "loss": 0.3632, "step": 9171 }, { "epoch": 88.93493975903614, "grad_norm": 7.333975791931152, "learning_rate": 5.4757281553398064e-06, "loss": 0.2621, "step": 9172 }, { "epoch": 88.94457831325302, "grad_norm": 6.99609899520874, "learning_rate": 5.470873786407768e-06, "loss": 0.2322, "step": 9173 }, { "epoch": 88.95421686746988, "grad_norm": 6.015202522277832, "learning_rate": 5.466019417475728e-06, "loss": 0.2091, "step": 9174 }, { "epoch": 88.96385542168674, "grad_norm": 6.148178577423096, "learning_rate": 5.461165048543689e-06, "loss": 0.2785, "step": 9175 }, { "epoch": 88.97349397590361, "grad_norm": 6.069455146789551, "learning_rate": 5.456310679611651e-06, "loss": 0.3778, "step": 9176 }, { "epoch": 88.98313253012049, "grad_norm": 2.2528269290924072, "learning_rate": 5.451456310679612e-06, "loss": 0.0937, "step": 9177 }, { "epoch": 88.99277108433735, "grad_norm": 6.0630950927734375, "learning_rate": 5.4466019417475725e-06, "loss": 0.4419, "step": 9178 }, { "epoch": 89.00843373493976, "grad_norm": 3.0959088802337646, "learning_rate": 5.4417475728155345e-06, "loss": 0.1185, "step": 9179 }, { "epoch": 89.01807228915662, "grad_norm": 2.0305371284484863, "learning_rate": 5.436893203883496e-06, "loss": 0.1283, "step": 9180 }, { "epoch": 89.0277108433735, "grad_norm": 1.8592982292175293, "learning_rate": 5.432038834951456e-06, "loss": 0.1758, "step": 9181 }, { "epoch": 89.03734939759036, "grad_norm": 13.027884483337402, "learning_rate": 5.427184466019418e-06, "loss": 0.2151, "step": 9182 }, { "epoch": 89.04698795180722, "grad_norm": 20.489151000976562, "learning_rate": 5.422330097087379e-06, "loss": 0.1121, "step": 9183 }, { "epoch": 89.0566265060241, "grad_norm": 12.835289001464844, "learning_rate": 5.41747572815534e-06, "loss": 0.1984, "step": 9184 }, { "epoch": 89.06626506024097, "grad_norm": 6.506276607513428, "learning_rate": 5.412621359223301e-06, "loss": 0.2445, "step": 9185 }, { "epoch": 89.07590361445783, "grad_norm": 1.6956897974014282, "learning_rate": 5.4077669902912625e-06, "loss": 0.1746, "step": 9186 }, { "epoch": 89.08554216867469, "grad_norm": 14.20126724243164, "learning_rate": 5.402912621359224e-06, "loss": 0.329, "step": 9187 }, { "epoch": 89.09518072289157, "grad_norm": 19.501052856445312, "learning_rate": 5.398058252427185e-06, "loss": 0.2248, "step": 9188 }, { "epoch": 89.10481927710843, "grad_norm": 3.1862387657165527, "learning_rate": 5.393203883495146e-06, "loss": 0.3751, "step": 9189 }, { "epoch": 89.1144578313253, "grad_norm": 6.487477779388428, "learning_rate": 5.388349514563107e-06, "loss": 0.3147, "step": 9190 }, { "epoch": 89.12409638554217, "grad_norm": 5.636814594268799, "learning_rate": 5.383495145631068e-06, "loss": 0.4042, "step": 9191 }, { "epoch": 89.13373493975904, "grad_norm": 7.975698947906494, "learning_rate": 5.378640776699029e-06, "loss": 0.2347, "step": 9192 }, { "epoch": 89.1433734939759, "grad_norm": 19.309049606323242, "learning_rate": 5.3737864077669905e-06, "loss": 0.4004, "step": 9193 }, { "epoch": 89.15301204819278, "grad_norm": 8.637360572814941, "learning_rate": 5.368932038834952e-06, "loss": 0.3534, "step": 9194 }, { "epoch": 89.16265060240964, "grad_norm": 31.281217575073242, "learning_rate": 5.364077669902913e-06, "loss": 0.3129, "step": 9195 }, { "epoch": 89.1722891566265, "grad_norm": 6.412160873413086, "learning_rate": 5.359223300970874e-06, "loss": 0.2553, "step": 9196 }, { "epoch": 89.18192771084337, "grad_norm": 20.67685317993164, "learning_rate": 5.354368932038835e-06, "loss": 0.3539, "step": 9197 }, { "epoch": 89.19156626506025, "grad_norm": 2.49350643157959, "learning_rate": 5.349514563106796e-06, "loss": 0.2234, "step": 9198 }, { "epoch": 89.20120481927711, "grad_norm": 12.051800727844238, "learning_rate": 5.344660194174757e-06, "loss": 0.2703, "step": 9199 }, { "epoch": 89.21084337349397, "grad_norm": 2.8981926441192627, "learning_rate": 5.3398058252427185e-06, "loss": 0.2772, "step": 9200 }, { "epoch": 89.22048192771085, "grad_norm": 32.104557037353516, "learning_rate": 5.33495145631068e-06, "loss": 0.3021, "step": 9201 }, { "epoch": 89.23012048192771, "grad_norm": 7.317893981933594, "learning_rate": 5.330097087378641e-06, "loss": 0.3185, "step": 9202 }, { "epoch": 89.23975903614458, "grad_norm": 7.867760181427002, "learning_rate": 5.325242718446602e-06, "loss": 0.2916, "step": 9203 }, { "epoch": 89.24939759036144, "grad_norm": 5.230817794799805, "learning_rate": 5.320388349514564e-06, "loss": 0.3518, "step": 9204 }, { "epoch": 89.25903614457832, "grad_norm": 5.365356922149658, "learning_rate": 5.315533980582524e-06, "loss": 0.0972, "step": 9205 }, { "epoch": 89.26867469879518, "grad_norm": 7.347655773162842, "learning_rate": 5.310679611650485e-06, "loss": 0.1954, "step": 9206 }, { "epoch": 89.27831325301204, "grad_norm": 11.082908630371094, "learning_rate": 5.305825242718447e-06, "loss": 0.2479, "step": 9207 }, { "epoch": 89.28795180722892, "grad_norm": 12.178773880004883, "learning_rate": 5.300970873786408e-06, "loss": 0.191, "step": 9208 }, { "epoch": 89.29759036144578, "grad_norm": 8.580105781555176, "learning_rate": 5.296116504854369e-06, "loss": 0.397, "step": 9209 }, { "epoch": 89.30722891566265, "grad_norm": 8.983187675476074, "learning_rate": 5.291262135922331e-06, "loss": 0.2036, "step": 9210 }, { "epoch": 89.31686746987951, "grad_norm": 2.5003280639648438, "learning_rate": 5.286407766990292e-06, "loss": 0.138, "step": 9211 }, { "epoch": 89.32650602409639, "grad_norm": 7.693542957305908, "learning_rate": 5.281553398058252e-06, "loss": 0.2449, "step": 9212 }, { "epoch": 89.33614457831325, "grad_norm": 22.853580474853516, "learning_rate": 5.276699029126214e-06, "loss": 0.2659, "step": 9213 }, { "epoch": 89.34578313253012, "grad_norm": 7.363069534301758, "learning_rate": 5.271844660194175e-06, "loss": 0.2883, "step": 9214 }, { "epoch": 89.355421686747, "grad_norm": 0.6121680736541748, "learning_rate": 5.266990291262136e-06, "loss": 0.1237, "step": 9215 }, { "epoch": 89.36506024096386, "grad_norm": 6.674782752990723, "learning_rate": 5.262135922330097e-06, "loss": 0.3553, "step": 9216 }, { "epoch": 89.37469879518072, "grad_norm": 14.467244148254395, "learning_rate": 5.257281553398059e-06, "loss": 0.3158, "step": 9217 }, { "epoch": 89.38433734939758, "grad_norm": 19.601160049438477, "learning_rate": 5.25242718446602e-06, "loss": 0.2461, "step": 9218 }, { "epoch": 89.39397590361446, "grad_norm": 5.381179332733154, "learning_rate": 5.24757281553398e-06, "loss": 0.1309, "step": 9219 }, { "epoch": 89.40361445783132, "grad_norm": 10.793109893798828, "learning_rate": 5.242718446601942e-06, "loss": 0.2884, "step": 9220 }, { "epoch": 89.41325301204819, "grad_norm": 28.142501831054688, "learning_rate": 5.237864077669903e-06, "loss": 0.2974, "step": 9221 }, { "epoch": 89.42289156626506, "grad_norm": 1.9220938682556152, "learning_rate": 5.233009708737864e-06, "loss": 0.3261, "step": 9222 }, { "epoch": 89.43253012048193, "grad_norm": 3.036344051361084, "learning_rate": 5.228155339805826e-06, "loss": 0.1537, "step": 9223 }, { "epoch": 89.44216867469879, "grad_norm": 14.890181541442871, "learning_rate": 5.223300970873787e-06, "loss": 0.3379, "step": 9224 }, { "epoch": 89.45180722891567, "grad_norm": 2.8081626892089844, "learning_rate": 5.218446601941748e-06, "loss": 0.2952, "step": 9225 }, { "epoch": 89.46144578313253, "grad_norm": 13.730218887329102, "learning_rate": 5.213592233009709e-06, "loss": 0.2003, "step": 9226 }, { "epoch": 89.4710843373494, "grad_norm": 15.248924255371094, "learning_rate": 5.20873786407767e-06, "loss": 0.1925, "step": 9227 }, { "epoch": 89.48072289156626, "grad_norm": 3.6688899993896484, "learning_rate": 5.2038834951456314e-06, "loss": 0.1644, "step": 9228 }, { "epoch": 89.49036144578314, "grad_norm": 14.18038272857666, "learning_rate": 5.199029126213593e-06, "loss": 0.2884, "step": 9229 }, { "epoch": 89.5, "grad_norm": 4.720516681671143, "learning_rate": 5.194174757281554e-06, "loss": 0.3243, "step": 9230 }, { "epoch": 89.50963855421686, "grad_norm": 10.003183364868164, "learning_rate": 5.189320388349515e-06, "loss": 0.0979, "step": 9231 }, { "epoch": 89.51927710843374, "grad_norm": 8.773980140686035, "learning_rate": 5.184466019417476e-06, "loss": 0.1987, "step": 9232 }, { "epoch": 89.5289156626506, "grad_norm": 3.020672082901001, "learning_rate": 5.179611650485437e-06, "loss": 0.2591, "step": 9233 }, { "epoch": 89.53855421686747, "grad_norm": 10.762358665466309, "learning_rate": 5.174757281553398e-06, "loss": 0.3022, "step": 9234 }, { "epoch": 89.54819277108433, "grad_norm": 4.44283390045166, "learning_rate": 5.1699029126213595e-06, "loss": 0.2224, "step": 9235 }, { "epoch": 89.55783132530121, "grad_norm": 18.372922897338867, "learning_rate": 5.165048543689321e-06, "loss": 0.2287, "step": 9236 }, { "epoch": 89.56746987951807, "grad_norm": 6.360177993774414, "learning_rate": 5.160194174757282e-06, "loss": 0.2154, "step": 9237 }, { "epoch": 89.57710843373494, "grad_norm": 12.237034797668457, "learning_rate": 5.155339805825243e-06, "loss": 0.1807, "step": 9238 }, { "epoch": 89.58674698795181, "grad_norm": 6.752268314361572, "learning_rate": 5.150485436893204e-06, "loss": 0.2418, "step": 9239 }, { "epoch": 89.59638554216868, "grad_norm": 6.942624568939209, "learning_rate": 5.145631067961165e-06, "loss": 0.2063, "step": 9240 }, { "epoch": 89.60602409638554, "grad_norm": 15.035760879516602, "learning_rate": 5.140776699029126e-06, "loss": 0.1866, "step": 9241 }, { "epoch": 89.61566265060242, "grad_norm": 5.836483955383301, "learning_rate": 5.1359223300970875e-06, "loss": 0.2043, "step": 9242 }, { "epoch": 89.62530120481928, "grad_norm": 15.449403762817383, "learning_rate": 5.131067961165049e-06, "loss": 0.283, "step": 9243 }, { "epoch": 89.63493975903614, "grad_norm": 2.266897678375244, "learning_rate": 5.12621359223301e-06, "loss": 0.286, "step": 9244 }, { "epoch": 89.644578313253, "grad_norm": 2.1839869022369385, "learning_rate": 5.121359223300972e-06, "loss": 0.0617, "step": 9245 }, { "epoch": 89.65421686746988, "grad_norm": 0.8843085765838623, "learning_rate": 5.116504854368932e-06, "loss": 0.1549, "step": 9246 }, { "epoch": 89.66385542168675, "grad_norm": 2.1094350814819336, "learning_rate": 5.111650485436893e-06, "loss": 0.1785, "step": 9247 }, { "epoch": 89.67349397590361, "grad_norm": 18.555858612060547, "learning_rate": 5.106796116504855e-06, "loss": 0.4224, "step": 9248 }, { "epoch": 89.68313253012049, "grad_norm": 2.501347303390503, "learning_rate": 5.1019417475728155e-06, "loss": 0.1701, "step": 9249 }, { "epoch": 89.69277108433735, "grad_norm": 8.52653694152832, "learning_rate": 5.097087378640777e-06, "loss": 0.186, "step": 9250 }, { "epoch": 89.70240963855422, "grad_norm": 17.790077209472656, "learning_rate": 5.092233009708739e-06, "loss": 0.295, "step": 9251 }, { "epoch": 89.71204819277108, "grad_norm": 11.282319068908691, "learning_rate": 5.087378640776699e-06, "loss": 0.4206, "step": 9252 }, { "epoch": 89.72168674698796, "grad_norm": 10.148119926452637, "learning_rate": 5.08252427184466e-06, "loss": 0.2256, "step": 9253 }, { "epoch": 89.73132530120482, "grad_norm": 3.9287149906158447, "learning_rate": 5.077669902912622e-06, "loss": 0.3127, "step": 9254 }, { "epoch": 89.74096385542168, "grad_norm": 14.379992485046387, "learning_rate": 5.072815533980583e-06, "loss": 0.3721, "step": 9255 }, { "epoch": 89.75060240963856, "grad_norm": 14.145182609558105, "learning_rate": 5.0679611650485435e-06, "loss": 0.1741, "step": 9256 }, { "epoch": 89.76024096385542, "grad_norm": 2.6233208179473877, "learning_rate": 5.063106796116505e-06, "loss": 0.3863, "step": 9257 }, { "epoch": 89.76987951807229, "grad_norm": 16.397323608398438, "learning_rate": 5.058252427184467e-06, "loss": 0.2664, "step": 9258 }, { "epoch": 89.77951807228915, "grad_norm": 8.40206241607666, "learning_rate": 5.053398058252427e-06, "loss": 0.3394, "step": 9259 }, { "epoch": 89.78915662650603, "grad_norm": 14.60588264465332, "learning_rate": 5.048543689320388e-06, "loss": 0.4135, "step": 9260 }, { "epoch": 89.79879518072289, "grad_norm": 4.6604814529418945, "learning_rate": 5.04368932038835e-06, "loss": 0.229, "step": 9261 }, { "epoch": 89.80843373493975, "grad_norm": 27.275222778320312, "learning_rate": 5.038834951456311e-06, "loss": 0.308, "step": 9262 }, { "epoch": 89.81807228915663, "grad_norm": 21.813343048095703, "learning_rate": 5.0339805825242715e-06, "loss": 0.3085, "step": 9263 }, { "epoch": 89.8277108433735, "grad_norm": 2.3077683448791504, "learning_rate": 5.0291262135922335e-06, "loss": 0.1228, "step": 9264 }, { "epoch": 89.83734939759036, "grad_norm": 3.863917827606201, "learning_rate": 5.024271844660195e-06, "loss": 0.3541, "step": 9265 }, { "epoch": 89.84698795180722, "grad_norm": 27.03558349609375, "learning_rate": 5.019417475728155e-06, "loss": 0.3727, "step": 9266 }, { "epoch": 89.8566265060241, "grad_norm": 3.6612231731414795, "learning_rate": 5.014563106796117e-06, "loss": 0.0707, "step": 9267 }, { "epoch": 89.86626506024096, "grad_norm": 14.582014083862305, "learning_rate": 5.009708737864078e-06, "loss": 0.3002, "step": 9268 }, { "epoch": 89.87590361445783, "grad_norm": 9.24314022064209, "learning_rate": 5.004854368932039e-06, "loss": 0.2332, "step": 9269 }, { "epoch": 89.8855421686747, "grad_norm": 1.2928812503814697, "learning_rate": 5e-06, "loss": 0.2113, "step": 9270 }, { "epoch": 89.89518072289157, "grad_norm": 9.587335586547852, "learning_rate": 4.9951456310679616e-06, "loss": 0.2435, "step": 9271 }, { "epoch": 89.90481927710843, "grad_norm": 25.900226593017578, "learning_rate": 4.990291262135923e-06, "loss": 0.1347, "step": 9272 }, { "epoch": 89.91445783132531, "grad_norm": 4.851014614105225, "learning_rate": 4.985436893203884e-06, "loss": 0.3021, "step": 9273 }, { "epoch": 89.92409638554217, "grad_norm": 11.533283233642578, "learning_rate": 4.980582524271845e-06, "loss": 0.2545, "step": 9274 }, { "epoch": 89.93373493975903, "grad_norm": 11.412980079650879, "learning_rate": 4.975728155339806e-06, "loss": 0.3062, "step": 9275 }, { "epoch": 89.9433734939759, "grad_norm": 27.485471725463867, "learning_rate": 4.970873786407767e-06, "loss": 0.2711, "step": 9276 }, { "epoch": 89.95301204819278, "grad_norm": 6.989186763763428, "learning_rate": 4.9660194174757284e-06, "loss": 0.1924, "step": 9277 }, { "epoch": 89.96265060240964, "grad_norm": 15.513516426086426, "learning_rate": 4.9611650485436896e-06, "loss": 0.1772, "step": 9278 }, { "epoch": 89.9722891566265, "grad_norm": 13.247013092041016, "learning_rate": 4.956310679611651e-06, "loss": 0.1077, "step": 9279 }, { "epoch": 89.98192771084338, "grad_norm": 13.343589782714844, "learning_rate": 4.951456310679612e-06, "loss": 0.5431, "step": 9280 }, { "epoch": 89.99156626506024, "grad_norm": 4.002095699310303, "learning_rate": 4.946601941747573e-06, "loss": 0.1697, "step": 9281 }, { "epoch": 90.00722891566265, "grad_norm": 10.582891464233398, "learning_rate": 4.941747572815534e-06, "loss": 0.219, "step": 9282 }, { "epoch": 90.01686746987951, "grad_norm": 10.940326690673828, "learning_rate": 4.936893203883495e-06, "loss": 0.2061, "step": 9283 }, { "epoch": 90.02650602409639, "grad_norm": 8.134584426879883, "learning_rate": 4.9320388349514564e-06, "loss": 0.4464, "step": 9284 }, { "epoch": 90.03614457831326, "grad_norm": 6.690576553344727, "learning_rate": 4.927184466019418e-06, "loss": 0.1653, "step": 9285 }, { "epoch": 90.04578313253012, "grad_norm": 17.63323211669922, "learning_rate": 4.922330097087379e-06, "loss": 0.2707, "step": 9286 }, { "epoch": 90.05542168674698, "grad_norm": 9.274825096130371, "learning_rate": 4.91747572815534e-06, "loss": 0.3341, "step": 9287 }, { "epoch": 90.06506024096386, "grad_norm": 10.361331939697266, "learning_rate": 4.912621359223301e-06, "loss": 0.4091, "step": 9288 }, { "epoch": 90.07469879518072, "grad_norm": 2.5664873123168945, "learning_rate": 4.907766990291263e-06, "loss": 0.224, "step": 9289 }, { "epoch": 90.08433734939759, "grad_norm": 21.386674880981445, "learning_rate": 4.902912621359223e-06, "loss": 0.2653, "step": 9290 }, { "epoch": 90.09397590361446, "grad_norm": 5.1516804695129395, "learning_rate": 4.8980582524271845e-06, "loss": 0.202, "step": 9291 }, { "epoch": 90.10361445783133, "grad_norm": 9.433101654052734, "learning_rate": 4.8932038834951465e-06, "loss": 0.3846, "step": 9292 }, { "epoch": 90.11325301204819, "grad_norm": 1.6192055940628052, "learning_rate": 4.888349514563107e-06, "loss": 0.1785, "step": 9293 }, { "epoch": 90.12289156626505, "grad_norm": 2.4634315967559814, "learning_rate": 4.883495145631068e-06, "loss": 0.3104, "step": 9294 }, { "epoch": 90.13253012048193, "grad_norm": 4.015556335449219, "learning_rate": 4.87864077669903e-06, "loss": 0.2664, "step": 9295 }, { "epoch": 90.1421686746988, "grad_norm": 4.894017219543457, "learning_rate": 4.873786407766991e-06, "loss": 0.2046, "step": 9296 }, { "epoch": 90.15180722891566, "grad_norm": 7.932631015777588, "learning_rate": 4.868932038834951e-06, "loss": 0.1223, "step": 9297 }, { "epoch": 90.16144578313254, "grad_norm": 1.9413363933563232, "learning_rate": 4.8640776699029125e-06, "loss": 0.1714, "step": 9298 }, { "epoch": 90.1710843373494, "grad_norm": 3.966090679168701, "learning_rate": 4.8592233009708745e-06, "loss": 0.2804, "step": 9299 }, { "epoch": 90.18072289156626, "grad_norm": 6.44911003112793, "learning_rate": 4.854368932038835e-06, "loss": 0.1339, "step": 9300 }, { "epoch": 90.19036144578314, "grad_norm": 7.503582000732422, "learning_rate": 4.849514563106796e-06, "loss": 0.3253, "step": 9301 }, { "epoch": 90.2, "grad_norm": 7.804775714874268, "learning_rate": 4.844660194174758e-06, "loss": 0.1274, "step": 9302 }, { "epoch": 90.20963855421687, "grad_norm": 23.305219650268555, "learning_rate": 4.839805825242718e-06, "loss": 0.3571, "step": 9303 }, { "epoch": 90.21927710843373, "grad_norm": 5.62533712387085, "learning_rate": 4.834951456310679e-06, "loss": 0.152, "step": 9304 }, { "epoch": 90.2289156626506, "grad_norm": 5.317061424255371, "learning_rate": 4.830097087378641e-06, "loss": 0.1386, "step": 9305 }, { "epoch": 90.23855421686747, "grad_norm": 13.161718368530273, "learning_rate": 4.8252427184466025e-06, "loss": 0.4295, "step": 9306 }, { "epoch": 90.24819277108433, "grad_norm": 18.926918029785156, "learning_rate": 4.820388349514563e-06, "loss": 0.1226, "step": 9307 }, { "epoch": 90.25783132530121, "grad_norm": 11.29629898071289, "learning_rate": 4.815533980582525e-06, "loss": 0.4261, "step": 9308 }, { "epoch": 90.26746987951807, "grad_norm": 3.3684885501861572, "learning_rate": 4.810679611650486e-06, "loss": 0.2573, "step": 9309 }, { "epoch": 90.27710843373494, "grad_norm": 2.1337993144989014, "learning_rate": 4.805825242718446e-06, "loss": 0.133, "step": 9310 }, { "epoch": 90.2867469879518, "grad_norm": 3.9967639446258545, "learning_rate": 4.800970873786408e-06, "loss": 0.1093, "step": 9311 }, { "epoch": 90.29638554216868, "grad_norm": 9.034079551696777, "learning_rate": 4.796116504854369e-06, "loss": 0.256, "step": 9312 }, { "epoch": 90.30602409638554, "grad_norm": 17.913673400878906, "learning_rate": 4.7912621359223305e-06, "loss": 0.2552, "step": 9313 }, { "epoch": 90.3156626506024, "grad_norm": 1.7707329988479614, "learning_rate": 4.786407766990292e-06, "loss": 0.0816, "step": 9314 }, { "epoch": 90.32530120481928, "grad_norm": 3.4835944175720215, "learning_rate": 4.781553398058253e-06, "loss": 0.1427, "step": 9315 }, { "epoch": 90.33493975903615, "grad_norm": 8.940157890319824, "learning_rate": 4.776699029126214e-06, "loss": 0.1723, "step": 9316 }, { "epoch": 90.34457831325301, "grad_norm": 2.6551289558410645, "learning_rate": 4.771844660194174e-06, "loss": 0.3138, "step": 9317 }, { "epoch": 90.35421686746987, "grad_norm": 23.692598342895508, "learning_rate": 4.766990291262136e-06, "loss": 0.2155, "step": 9318 }, { "epoch": 90.36385542168675, "grad_norm": 2.744321823120117, "learning_rate": 4.762135922330097e-06, "loss": 0.2131, "step": 9319 }, { "epoch": 90.37349397590361, "grad_norm": 5.8858466148376465, "learning_rate": 4.7572815533980585e-06, "loss": 0.3332, "step": 9320 }, { "epoch": 90.38313253012048, "grad_norm": 1.9537436962127686, "learning_rate": 4.75242718446602e-06, "loss": 0.2479, "step": 9321 }, { "epoch": 90.39277108433735, "grad_norm": 12.245424270629883, "learning_rate": 4.747572815533981e-06, "loss": 0.4633, "step": 9322 }, { "epoch": 90.40240963855422, "grad_norm": 5.3963141441345215, "learning_rate": 4.742718446601942e-06, "loss": 0.1942, "step": 9323 }, { "epoch": 90.41204819277108, "grad_norm": 6.503742694854736, "learning_rate": 4.737864077669903e-06, "loss": 0.3334, "step": 9324 }, { "epoch": 90.42168674698796, "grad_norm": 6.993358612060547, "learning_rate": 4.733009708737864e-06, "loss": 0.2643, "step": 9325 }, { "epoch": 90.43132530120482, "grad_norm": 5.517426013946533, "learning_rate": 4.728155339805825e-06, "loss": 0.0889, "step": 9326 }, { "epoch": 90.44096385542169, "grad_norm": 5.7262983322143555, "learning_rate": 4.7233009708737866e-06, "loss": 0.277, "step": 9327 }, { "epoch": 90.45060240963855, "grad_norm": 18.726987838745117, "learning_rate": 4.718446601941748e-06, "loss": 0.2375, "step": 9328 }, { "epoch": 90.46024096385543, "grad_norm": 11.223834037780762, "learning_rate": 4.713592233009709e-06, "loss": 0.4292, "step": 9329 }, { "epoch": 90.46987951807229, "grad_norm": 12.555519104003906, "learning_rate": 4.70873786407767e-06, "loss": 0.1646, "step": 9330 }, { "epoch": 90.47951807228915, "grad_norm": 6.3980584144592285, "learning_rate": 4.703883495145631e-06, "loss": 0.2751, "step": 9331 }, { "epoch": 90.48915662650603, "grad_norm": 8.821231842041016, "learning_rate": 4.699029126213592e-06, "loss": 0.188, "step": 9332 }, { "epoch": 90.4987951807229, "grad_norm": 26.147680282592773, "learning_rate": 4.694174757281554e-06, "loss": 0.1704, "step": 9333 }, { "epoch": 90.50843373493976, "grad_norm": 7.411286354064941, "learning_rate": 4.689320388349515e-06, "loss": 0.488, "step": 9334 }, { "epoch": 90.51807228915662, "grad_norm": 10.295848846435547, "learning_rate": 4.684466019417476e-06, "loss": 0.2633, "step": 9335 }, { "epoch": 90.5277108433735, "grad_norm": 5.298324108123779, "learning_rate": 4.679611650485438e-06, "loss": 0.3589, "step": 9336 }, { "epoch": 90.53734939759036, "grad_norm": 7.005333423614502, "learning_rate": 4.674757281553398e-06, "loss": 0.228, "step": 9337 }, { "epoch": 90.54698795180722, "grad_norm": 12.674779891967773, "learning_rate": 4.669902912621359e-06, "loss": 0.1766, "step": 9338 }, { "epoch": 90.5566265060241, "grad_norm": 3.090790271759033, "learning_rate": 4.66504854368932e-06, "loss": 0.4402, "step": 9339 }, { "epoch": 90.56626506024097, "grad_norm": 10.602559089660645, "learning_rate": 4.660194174757282e-06, "loss": 0.1859, "step": 9340 }, { "epoch": 90.57590361445783, "grad_norm": 20.759292602539062, "learning_rate": 4.655339805825243e-06, "loss": 0.1817, "step": 9341 }, { "epoch": 90.58554216867469, "grad_norm": 16.71925926208496, "learning_rate": 4.650485436893204e-06, "loss": 0.3086, "step": 9342 }, { "epoch": 90.59518072289157, "grad_norm": 10.44277572631836, "learning_rate": 4.645631067961166e-06, "loss": 0.2937, "step": 9343 }, { "epoch": 90.60481927710843, "grad_norm": 14.577775001525879, "learning_rate": 4.640776699029126e-06, "loss": 0.1806, "step": 9344 }, { "epoch": 90.6144578313253, "grad_norm": 27.671674728393555, "learning_rate": 4.635922330097087e-06, "loss": 0.1905, "step": 9345 }, { "epoch": 90.62409638554217, "grad_norm": 3.570523738861084, "learning_rate": 4.631067961165049e-06, "loss": 0.2173, "step": 9346 }, { "epoch": 90.63373493975904, "grad_norm": 13.06423568725586, "learning_rate": 4.62621359223301e-06, "loss": 0.3317, "step": 9347 }, { "epoch": 90.6433734939759, "grad_norm": 11.89234733581543, "learning_rate": 4.621359223300971e-06, "loss": 0.1574, "step": 9348 }, { "epoch": 90.65301204819278, "grad_norm": 18.92970848083496, "learning_rate": 4.616504854368933e-06, "loss": 0.4433, "step": 9349 }, { "epoch": 90.66265060240964, "grad_norm": 6.922398567199707, "learning_rate": 4.611650485436894e-06, "loss": 0.2884, "step": 9350 }, { "epoch": 90.6722891566265, "grad_norm": 2.6919522285461426, "learning_rate": 4.606796116504854e-06, "loss": 0.0816, "step": 9351 }, { "epoch": 90.68192771084337, "grad_norm": 4.189831256866455, "learning_rate": 4.601941747572816e-06, "loss": 0.3832, "step": 9352 }, { "epoch": 90.69156626506025, "grad_norm": 2.538168430328369, "learning_rate": 4.597087378640777e-06, "loss": 0.0872, "step": 9353 }, { "epoch": 90.70120481927711, "grad_norm": 5.19270658493042, "learning_rate": 4.592233009708738e-06, "loss": 0.2363, "step": 9354 }, { "epoch": 90.71084337349397, "grad_norm": 3.2071855068206787, "learning_rate": 4.5873786407766995e-06, "loss": 0.2336, "step": 9355 }, { "epoch": 90.72048192771085, "grad_norm": 6.774354457855225, "learning_rate": 4.582524271844661e-06, "loss": 0.1137, "step": 9356 }, { "epoch": 90.73012048192771, "grad_norm": 14.11506462097168, "learning_rate": 4.577669902912622e-06, "loss": 0.3259, "step": 9357 }, { "epoch": 90.73975903614458, "grad_norm": 12.3447904586792, "learning_rate": 4.572815533980582e-06, "loss": 0.3226, "step": 9358 }, { "epoch": 90.74939759036144, "grad_norm": 4.582664966583252, "learning_rate": 4.567961165048544e-06, "loss": 0.3295, "step": 9359 }, { "epoch": 90.75903614457832, "grad_norm": 3.3157944679260254, "learning_rate": 4.563106796116505e-06, "loss": 0.1805, "step": 9360 }, { "epoch": 90.76867469879518, "grad_norm": 16.44765281677246, "learning_rate": 4.5582524271844655e-06, "loss": 0.1461, "step": 9361 }, { "epoch": 90.77831325301204, "grad_norm": 8.580891609191895, "learning_rate": 4.5533980582524275e-06, "loss": 0.2887, "step": 9362 }, { "epoch": 90.78795180722892, "grad_norm": 146.99952697753906, "learning_rate": 4.548543689320389e-06, "loss": 0.3381, "step": 9363 }, { "epoch": 90.79759036144578, "grad_norm": 3.027134656906128, "learning_rate": 4.54368932038835e-06, "loss": 0.2092, "step": 9364 }, { "epoch": 90.80722891566265, "grad_norm": 13.914352416992188, "learning_rate": 4.538834951456311e-06, "loss": 0.2652, "step": 9365 }, { "epoch": 90.81686746987951, "grad_norm": 2.346851110458374, "learning_rate": 4.533980582524272e-06, "loss": 0.2128, "step": 9366 }, { "epoch": 90.82650602409639, "grad_norm": 18.997493743896484, "learning_rate": 4.529126213592233e-06, "loss": 0.2114, "step": 9367 }, { "epoch": 90.83614457831325, "grad_norm": 12.35663890838623, "learning_rate": 4.524271844660194e-06, "loss": 0.2427, "step": 9368 }, { "epoch": 90.84578313253012, "grad_norm": 9.574833869934082, "learning_rate": 4.5194174757281555e-06, "loss": 0.1307, "step": 9369 }, { "epoch": 90.855421686747, "grad_norm": 3.514063835144043, "learning_rate": 4.514563106796117e-06, "loss": 0.2102, "step": 9370 }, { "epoch": 90.86506024096386, "grad_norm": 13.515578269958496, "learning_rate": 4.509708737864078e-06, "loss": 0.3139, "step": 9371 }, { "epoch": 90.87469879518072, "grad_norm": 3.455960988998413, "learning_rate": 4.504854368932039e-06, "loss": 0.1627, "step": 9372 }, { "epoch": 90.88433734939758, "grad_norm": 16.649152755737305, "learning_rate": 4.5e-06, "loss": 0.2424, "step": 9373 }, { "epoch": 90.89397590361446, "grad_norm": 5.428730487823486, "learning_rate": 4.495145631067962e-06, "loss": 0.1022, "step": 9374 }, { "epoch": 90.90361445783132, "grad_norm": 16.816370010375977, "learning_rate": 4.490291262135922e-06, "loss": 0.2985, "step": 9375 }, { "epoch": 90.91325301204819, "grad_norm": 34.049312591552734, "learning_rate": 4.4854368932038836e-06, "loss": 0.3567, "step": 9376 }, { "epoch": 90.92289156626506, "grad_norm": 2.819611072540283, "learning_rate": 4.4805825242718455e-06, "loss": 0.1519, "step": 9377 }, { "epoch": 90.93253012048193, "grad_norm": 19.930721282958984, "learning_rate": 4.475728155339806e-06, "loss": 0.3283, "step": 9378 }, { "epoch": 90.94216867469879, "grad_norm": 6.590233325958252, "learning_rate": 4.470873786407767e-06, "loss": 0.1938, "step": 9379 }, { "epoch": 90.95180722891567, "grad_norm": 9.082258224487305, "learning_rate": 4.466019417475728e-06, "loss": 0.2124, "step": 9380 }, { "epoch": 90.96144578313253, "grad_norm": 3.9765543937683105, "learning_rate": 4.461165048543689e-06, "loss": 0.1306, "step": 9381 }, { "epoch": 90.9710843373494, "grad_norm": 2.5646939277648926, "learning_rate": 4.4563106796116504e-06, "loss": 0.2424, "step": 9382 }, { "epoch": 90.98072289156626, "grad_norm": 1.7587907314300537, "learning_rate": 4.4514563106796116e-06, "loss": 0.2123, "step": 9383 }, { "epoch": 90.99036144578314, "grad_norm": 14.15611743927002, "learning_rate": 4.4466019417475736e-06, "loss": 0.2723, "step": 9384 }, { "epoch": 91.00602409638554, "grad_norm": 7.267831802368164, "learning_rate": 4.441747572815534e-06, "loss": 0.2066, "step": 9385 }, { "epoch": 91.01566265060241, "grad_norm": 5.865718364715576, "learning_rate": 4.436893203883495e-06, "loss": 0.3296, "step": 9386 }, { "epoch": 91.02530120481927, "grad_norm": 4.68194580078125, "learning_rate": 4.432038834951457e-06, "loss": 0.2871, "step": 9387 }, { "epoch": 91.03493975903615, "grad_norm": 11.416540145874023, "learning_rate": 4.427184466019417e-06, "loss": 0.3528, "step": 9388 }, { "epoch": 91.04457831325301, "grad_norm": 7.324691295623779, "learning_rate": 4.4223300970873784e-06, "loss": 0.3369, "step": 9389 }, { "epoch": 91.05421686746988, "grad_norm": 3.2730093002319336, "learning_rate": 4.4174757281553404e-06, "loss": 0.1857, "step": 9390 }, { "epoch": 91.06385542168675, "grad_norm": 12.692235946655273, "learning_rate": 4.412621359223302e-06, "loss": 0.202, "step": 9391 }, { "epoch": 91.07349397590362, "grad_norm": 1.4963624477386475, "learning_rate": 4.407766990291262e-06, "loss": 0.1213, "step": 9392 }, { "epoch": 91.08313253012048, "grad_norm": 18.129196166992188, "learning_rate": 4.402912621359224e-06, "loss": 0.2063, "step": 9393 }, { "epoch": 91.09277108433734, "grad_norm": 12.6255521774292, "learning_rate": 4.398058252427185e-06, "loss": 0.1061, "step": 9394 }, { "epoch": 91.10240963855422, "grad_norm": 3.4121663570404053, "learning_rate": 4.393203883495145e-06, "loss": 0.25, "step": 9395 }, { "epoch": 91.11204819277108, "grad_norm": 4.060685634613037, "learning_rate": 4.388349514563107e-06, "loss": 0.1278, "step": 9396 }, { "epoch": 91.12168674698795, "grad_norm": 5.696686267852783, "learning_rate": 4.3834951456310685e-06, "loss": 0.1968, "step": 9397 }, { "epoch": 91.13132530120482, "grad_norm": 3.140069007873535, "learning_rate": 4.37864077669903e-06, "loss": 0.4057, "step": 9398 }, { "epoch": 91.14096385542169, "grad_norm": 2.406991958618164, "learning_rate": 4.37378640776699e-06, "loss": 0.2598, "step": 9399 }, { "epoch": 91.15060240963855, "grad_norm": 6.100291728973389, "learning_rate": 4.368932038834952e-06, "loss": 0.3213, "step": 9400 }, { "epoch": 91.16024096385541, "grad_norm": 3.229585647583008, "learning_rate": 4.364077669902913e-06, "loss": 0.3201, "step": 9401 }, { "epoch": 91.16987951807229, "grad_norm": 4.6496052742004395, "learning_rate": 4.359223300970873e-06, "loss": 0.2683, "step": 9402 }, { "epoch": 91.17951807228916, "grad_norm": 5.0880231857299805, "learning_rate": 4.354368932038835e-06, "loss": 0.4585, "step": 9403 }, { "epoch": 91.18915662650602, "grad_norm": 3.735734462738037, "learning_rate": 4.3495145631067965e-06, "loss": 0.1839, "step": 9404 }, { "epoch": 91.1987951807229, "grad_norm": 11.6842041015625, "learning_rate": 4.344660194174758e-06, "loss": 0.3592, "step": 9405 }, { "epoch": 91.20843373493976, "grad_norm": 2.3120017051696777, "learning_rate": 4.339805825242719e-06, "loss": 0.0686, "step": 9406 }, { "epoch": 91.21807228915662, "grad_norm": 9.6475830078125, "learning_rate": 4.33495145631068e-06, "loss": 0.115, "step": 9407 }, { "epoch": 91.2277108433735, "grad_norm": 29.21491050720215, "learning_rate": 4.330097087378641e-06, "loss": 0.3358, "step": 9408 }, { "epoch": 91.23734939759036, "grad_norm": 9.142071723937988, "learning_rate": 4.325242718446602e-06, "loss": 0.2899, "step": 9409 }, { "epoch": 91.24698795180723, "grad_norm": 3.4334325790405273, "learning_rate": 4.320388349514563e-06, "loss": 0.1373, "step": 9410 }, { "epoch": 91.25662650602409, "grad_norm": 15.662156105041504, "learning_rate": 4.3155339805825245e-06, "loss": 0.1377, "step": 9411 }, { "epoch": 91.26626506024097, "grad_norm": 3.738389253616333, "learning_rate": 4.310679611650486e-06, "loss": 0.2016, "step": 9412 }, { "epoch": 91.27590361445783, "grad_norm": 1.1230714321136475, "learning_rate": 4.305825242718447e-06, "loss": 0.1248, "step": 9413 }, { "epoch": 91.2855421686747, "grad_norm": 6.1648454666137695, "learning_rate": 4.300970873786408e-06, "loss": 0.2296, "step": 9414 }, { "epoch": 91.29518072289157, "grad_norm": 11.532547950744629, "learning_rate": 4.296116504854369e-06, "loss": 0.3278, "step": 9415 }, { "epoch": 91.30481927710844, "grad_norm": 4.033961772918701, "learning_rate": 4.29126213592233e-06, "loss": 0.2223, "step": 9416 }, { "epoch": 91.3144578313253, "grad_norm": 5.425708293914795, "learning_rate": 4.286407766990291e-06, "loss": 0.1129, "step": 9417 }, { "epoch": 91.32409638554216, "grad_norm": 3.6251556873321533, "learning_rate": 4.281553398058253e-06, "loss": 0.1327, "step": 9418 }, { "epoch": 91.33373493975904, "grad_norm": 3.060635566711426, "learning_rate": 4.276699029126214e-06, "loss": 0.4146, "step": 9419 }, { "epoch": 91.3433734939759, "grad_norm": 14.845298767089844, "learning_rate": 4.271844660194175e-06, "loss": 0.3368, "step": 9420 }, { "epoch": 91.35301204819277, "grad_norm": 3.833719491958618, "learning_rate": 4.266990291262136e-06, "loss": 0.2177, "step": 9421 }, { "epoch": 91.36265060240964, "grad_norm": 2.332475423812866, "learning_rate": 4.262135922330097e-06, "loss": 0.1272, "step": 9422 }, { "epoch": 91.37228915662651, "grad_norm": 4.820545196533203, "learning_rate": 4.257281553398058e-06, "loss": 0.141, "step": 9423 }, { "epoch": 91.38192771084337, "grad_norm": 2.6321256160736084, "learning_rate": 4.252427184466019e-06, "loss": 0.1908, "step": 9424 }, { "epoch": 91.39156626506023, "grad_norm": 25.439016342163086, "learning_rate": 4.247572815533981e-06, "loss": 0.2694, "step": 9425 }, { "epoch": 91.40120481927711, "grad_norm": 12.151895523071289, "learning_rate": 4.242718446601942e-06, "loss": 0.4348, "step": 9426 }, { "epoch": 91.41084337349398, "grad_norm": 8.021658897399902, "learning_rate": 4.237864077669903e-06, "loss": 0.2212, "step": 9427 }, { "epoch": 91.42048192771084, "grad_norm": 25.40498161315918, "learning_rate": 4.233009708737865e-06, "loss": 0.2232, "step": 9428 }, { "epoch": 91.43012048192772, "grad_norm": 4.000380516052246, "learning_rate": 4.228155339805825e-06, "loss": 0.4458, "step": 9429 }, { "epoch": 91.43975903614458, "grad_norm": 10.616969108581543, "learning_rate": 4.223300970873786e-06, "loss": 0.1045, "step": 9430 }, { "epoch": 91.44939759036144, "grad_norm": 8.567564010620117, "learning_rate": 4.218446601941748e-06, "loss": 0.2899, "step": 9431 }, { "epoch": 91.45903614457832, "grad_norm": 6.649982929229736, "learning_rate": 4.213592233009709e-06, "loss": 0.2939, "step": 9432 }, { "epoch": 91.46867469879518, "grad_norm": 4.455132484436035, "learning_rate": 4.20873786407767e-06, "loss": 0.2469, "step": 9433 }, { "epoch": 91.47831325301205, "grad_norm": 20.941831588745117, "learning_rate": 4.203883495145632e-06, "loss": 0.2318, "step": 9434 }, { "epoch": 91.48795180722891, "grad_norm": 2.8499536514282227, "learning_rate": 4.199029126213593e-06, "loss": 0.1444, "step": 9435 }, { "epoch": 91.49759036144579, "grad_norm": 2.0525286197662354, "learning_rate": 4.194174757281553e-06, "loss": 0.2187, "step": 9436 }, { "epoch": 91.50722891566265, "grad_norm": 3.53378963470459, "learning_rate": 4.189320388349515e-06, "loss": 0.1122, "step": 9437 }, { "epoch": 91.51686746987951, "grad_norm": 2.9371819496154785, "learning_rate": 4.184466019417476e-06, "loss": 0.283, "step": 9438 }, { "epoch": 91.52650602409639, "grad_norm": 18.046405792236328, "learning_rate": 4.179611650485437e-06, "loss": 0.2184, "step": 9439 }, { "epoch": 91.53614457831326, "grad_norm": 3.2228245735168457, "learning_rate": 4.174757281553398e-06, "loss": 0.1607, "step": 9440 }, { "epoch": 91.54578313253012, "grad_norm": 4.733063697814941, "learning_rate": 4.16990291262136e-06, "loss": 0.188, "step": 9441 }, { "epoch": 91.55542168674698, "grad_norm": 14.555365562438965, "learning_rate": 4.165048543689321e-06, "loss": 0.1618, "step": 9442 }, { "epoch": 91.56506024096386, "grad_norm": 11.077686309814453, "learning_rate": 4.160194174757281e-06, "loss": 0.414, "step": 9443 }, { "epoch": 91.57469879518072, "grad_norm": 2.4203004837036133, "learning_rate": 4.155339805825243e-06, "loss": 0.3042, "step": 9444 }, { "epoch": 91.58433734939759, "grad_norm": 13.852395057678223, "learning_rate": 4.150485436893204e-06, "loss": 0.2899, "step": 9445 }, { "epoch": 91.59397590361446, "grad_norm": 4.041886806488037, "learning_rate": 4.145631067961165e-06, "loss": 0.2721, "step": 9446 }, { "epoch": 91.60361445783133, "grad_norm": 23.33333969116211, "learning_rate": 4.140776699029127e-06, "loss": 0.2525, "step": 9447 }, { "epoch": 91.61325301204819, "grad_norm": 2.8168532848358154, "learning_rate": 4.135922330097088e-06, "loss": 0.1222, "step": 9448 }, { "epoch": 91.62289156626505, "grad_norm": 1.6213185787200928, "learning_rate": 4.131067961165049e-06, "loss": 0.1119, "step": 9449 }, { "epoch": 91.63253012048193, "grad_norm": 10.646984100341797, "learning_rate": 4.12621359223301e-06, "loss": 0.4075, "step": 9450 }, { "epoch": 91.6421686746988, "grad_norm": 2.7718265056610107, "learning_rate": 4.121359223300971e-06, "loss": 0.2323, "step": 9451 }, { "epoch": 91.65180722891566, "grad_norm": 2.1606106758117676, "learning_rate": 4.116504854368932e-06, "loss": 0.1807, "step": 9452 }, { "epoch": 91.66144578313254, "grad_norm": 22.298337936401367, "learning_rate": 4.1116504854368935e-06, "loss": 0.4811, "step": 9453 }, { "epoch": 91.6710843373494, "grad_norm": 2.8556745052337646, "learning_rate": 4.106796116504855e-06, "loss": 0.1122, "step": 9454 }, { "epoch": 91.68072289156626, "grad_norm": 3.9396255016326904, "learning_rate": 4.101941747572816e-06, "loss": 0.044, "step": 9455 }, { "epoch": 91.69036144578314, "grad_norm": 127.48729705810547, "learning_rate": 4.097087378640777e-06, "loss": 0.4398, "step": 9456 }, { "epoch": 91.7, "grad_norm": 3.0919442176818848, "learning_rate": 4.092233009708738e-06, "loss": 0.1855, "step": 9457 }, { "epoch": 91.70963855421687, "grad_norm": 14.011598587036133, "learning_rate": 4.087378640776699e-06, "loss": 0.2963, "step": 9458 }, { "epoch": 91.71927710843373, "grad_norm": 9.903712272644043, "learning_rate": 4.08252427184466e-06, "loss": 0.205, "step": 9459 }, { "epoch": 91.7289156626506, "grad_norm": 10.969992637634277, "learning_rate": 4.0776699029126215e-06, "loss": 0.2124, "step": 9460 }, { "epoch": 91.73855421686747, "grad_norm": 4.413294792175293, "learning_rate": 4.072815533980583e-06, "loss": 0.2794, "step": 9461 }, { "epoch": 91.74819277108433, "grad_norm": 9.066916465759277, "learning_rate": 4.067961165048544e-06, "loss": 0.1868, "step": 9462 }, { "epoch": 91.75783132530121, "grad_norm": 12.835489273071289, "learning_rate": 4.063106796116505e-06, "loss": 0.2312, "step": 9463 }, { "epoch": 91.76746987951807, "grad_norm": 4.255037784576416, "learning_rate": 4.058252427184466e-06, "loss": 0.2784, "step": 9464 }, { "epoch": 91.77710843373494, "grad_norm": 7.056379795074463, "learning_rate": 4.053398058252427e-06, "loss": 0.4686, "step": 9465 }, { "epoch": 91.7867469879518, "grad_norm": 5.995213031768799, "learning_rate": 4.048543689320388e-06, "loss": 0.3168, "step": 9466 }, { "epoch": 91.79638554216868, "grad_norm": 15.494457244873047, "learning_rate": 4.0436893203883495e-06, "loss": 0.269, "step": 9467 }, { "epoch": 91.80602409638554, "grad_norm": 7.209287166595459, "learning_rate": 4.038834951456311e-06, "loss": 0.3308, "step": 9468 }, { "epoch": 91.8156626506024, "grad_norm": 12.75628662109375, "learning_rate": 4.033980582524273e-06, "loss": 0.1097, "step": 9469 }, { "epoch": 91.82530120481928, "grad_norm": 8.220709800720215, "learning_rate": 4.029126213592233e-06, "loss": 0.2477, "step": 9470 }, { "epoch": 91.83493975903615, "grad_norm": 6.14871883392334, "learning_rate": 4.024271844660194e-06, "loss": 0.2575, "step": 9471 }, { "epoch": 91.84457831325301, "grad_norm": 7.784073352813721, "learning_rate": 4.019417475728156e-06, "loss": 0.3789, "step": 9472 }, { "epoch": 91.85421686746987, "grad_norm": 5.772677421569824, "learning_rate": 4.014563106796116e-06, "loss": 0.3982, "step": 9473 }, { "epoch": 91.86385542168675, "grad_norm": 17.99980926513672, "learning_rate": 4.0097087378640775e-06, "loss": 0.3664, "step": 9474 }, { "epoch": 91.87349397590361, "grad_norm": 3.3138957023620605, "learning_rate": 4.0048543689320395e-06, "loss": 0.1782, "step": 9475 }, { "epoch": 91.88313253012048, "grad_norm": 4.0470781326293945, "learning_rate": 4.000000000000001e-06, "loss": 0.4593, "step": 9476 }, { "epoch": 91.89277108433735, "grad_norm": 15.468552589416504, "learning_rate": 3.995145631067961e-06, "loss": 0.1885, "step": 9477 }, { "epoch": 91.90240963855422, "grad_norm": 21.920886993408203, "learning_rate": 3.990291262135923e-06, "loss": 0.1918, "step": 9478 }, { "epoch": 91.91204819277108, "grad_norm": 4.342057704925537, "learning_rate": 3.985436893203884e-06, "loss": 0.1597, "step": 9479 }, { "epoch": 91.92168674698796, "grad_norm": 11.900551795959473, "learning_rate": 3.980582524271844e-06, "loss": 0.4217, "step": 9480 }, { "epoch": 91.93132530120482, "grad_norm": 26.49217987060547, "learning_rate": 3.9757281553398055e-06, "loss": 0.577, "step": 9481 }, { "epoch": 91.94096385542169, "grad_norm": 20.008804321289062, "learning_rate": 3.9708737864077675e-06, "loss": 0.3718, "step": 9482 }, { "epoch": 91.95060240963855, "grad_norm": 3.258504629135132, "learning_rate": 3.966019417475729e-06, "loss": 0.0873, "step": 9483 }, { "epoch": 91.96024096385543, "grad_norm": 4.570681571960449, "learning_rate": 3.961165048543689e-06, "loss": 0.2528, "step": 9484 }, { "epoch": 91.96987951807229, "grad_norm": 5.970645427703857, "learning_rate": 3.956310679611651e-06, "loss": 0.3254, "step": 9485 }, { "epoch": 91.97951807228915, "grad_norm": 4.974000453948975, "learning_rate": 3.951456310679612e-06, "loss": 0.412, "step": 9486 }, { "epoch": 91.98915662650603, "grad_norm": 1.9391368627548218, "learning_rate": 3.946601941747572e-06, "loss": 0.1073, "step": 9487 }, { "epoch": 92.00481927710844, "grad_norm": 13.195626258850098, "learning_rate": 3.941747572815534e-06, "loss": 0.2644, "step": 9488 }, { "epoch": 92.0144578313253, "grad_norm": 9.113852500915527, "learning_rate": 3.9368932038834956e-06, "loss": 0.0859, "step": 9489 }, { "epoch": 92.02409638554217, "grad_norm": 1.4930764436721802, "learning_rate": 3.932038834951456e-06, "loss": 0.3496, "step": 9490 }, { "epoch": 92.03373493975904, "grad_norm": 3.597151279449463, "learning_rate": 3.927184466019418e-06, "loss": 0.0971, "step": 9491 }, { "epoch": 92.0433734939759, "grad_norm": 6.032965660095215, "learning_rate": 3.922330097087379e-06, "loss": 0.1161, "step": 9492 }, { "epoch": 92.05301204819277, "grad_norm": 3.265152931213379, "learning_rate": 3.91747572815534e-06, "loss": 0.2124, "step": 9493 }, { "epoch": 92.06265060240963, "grad_norm": 1.665130376815796, "learning_rate": 3.912621359223301e-06, "loss": 0.0553, "step": 9494 }, { "epoch": 92.07228915662651, "grad_norm": 4.35081672668457, "learning_rate": 3.9077669902912624e-06, "loss": 0.2235, "step": 9495 }, { "epoch": 92.08192771084337, "grad_norm": 3.090104341506958, "learning_rate": 3.902912621359224e-06, "loss": 0.1347, "step": 9496 }, { "epoch": 92.09156626506024, "grad_norm": 5.591159343719482, "learning_rate": 3.898058252427185e-06, "loss": 0.2478, "step": 9497 }, { "epoch": 92.10120481927711, "grad_norm": 53.38371658325195, "learning_rate": 3.893203883495146e-06, "loss": 0.1618, "step": 9498 }, { "epoch": 92.11084337349398, "grad_norm": 7.463142395019531, "learning_rate": 3.888349514563107e-06, "loss": 0.3431, "step": 9499 }, { "epoch": 92.12048192771084, "grad_norm": 4.344353675842285, "learning_rate": 3.883495145631068e-06, "loss": 0.1587, "step": 9500 }, { "epoch": 92.1301204819277, "grad_norm": 5.960272789001465, "learning_rate": 3.878640776699029e-06, "loss": 0.1238, "step": 9501 }, { "epoch": 92.13975903614458, "grad_norm": 4.327634334564209, "learning_rate": 3.8737864077669905e-06, "loss": 0.205, "step": 9502 }, { "epoch": 92.14939759036145, "grad_norm": 5.9926371574401855, "learning_rate": 3.868932038834952e-06, "loss": 0.1908, "step": 9503 }, { "epoch": 92.15903614457831, "grad_norm": 7.717174530029297, "learning_rate": 3.864077669902913e-06, "loss": 0.1876, "step": 9504 }, { "epoch": 92.16867469879519, "grad_norm": 10.933144569396973, "learning_rate": 3.859223300970874e-06, "loss": 0.2866, "step": 9505 }, { "epoch": 92.17831325301205, "grad_norm": 8.566479682922363, "learning_rate": 3.854368932038835e-06, "loss": 0.3093, "step": 9506 }, { "epoch": 92.18795180722891, "grad_norm": 11.676371574401855, "learning_rate": 3.849514563106796e-06, "loss": 0.1956, "step": 9507 }, { "epoch": 92.19759036144578, "grad_norm": 7.967612266540527, "learning_rate": 3.844660194174757e-06, "loss": 0.1828, "step": 9508 }, { "epoch": 92.20722891566265, "grad_norm": 16.92743682861328, "learning_rate": 3.8398058252427185e-06, "loss": 0.2991, "step": 9509 }, { "epoch": 92.21686746987952, "grad_norm": 6.3389081954956055, "learning_rate": 3.83495145631068e-06, "loss": 0.2664, "step": 9510 }, { "epoch": 92.22650602409638, "grad_norm": 2.9398200511932373, "learning_rate": 3.830097087378641e-06, "loss": 0.2174, "step": 9511 }, { "epoch": 92.23614457831326, "grad_norm": 5.379055976867676, "learning_rate": 3.825242718446602e-06, "loss": 0.3796, "step": 9512 }, { "epoch": 92.24578313253012, "grad_norm": 1.9182502031326294, "learning_rate": 3.820388349514564e-06, "loss": 0.088, "step": 9513 }, { "epoch": 92.25542168674698, "grad_norm": 2.637453079223633, "learning_rate": 3.815533980582524e-06, "loss": 0.2, "step": 9514 }, { "epoch": 92.26506024096386, "grad_norm": 1.7656681537628174, "learning_rate": 3.8106796116504853e-06, "loss": 0.0996, "step": 9515 }, { "epoch": 92.27469879518073, "grad_norm": 15.822871208190918, "learning_rate": 3.805825242718447e-06, "loss": 0.2785, "step": 9516 }, { "epoch": 92.28433734939759, "grad_norm": 16.228404998779297, "learning_rate": 3.800970873786408e-06, "loss": 0.3366, "step": 9517 }, { "epoch": 92.29397590361445, "grad_norm": 2.0068044662475586, "learning_rate": 3.7961165048543688e-06, "loss": 0.114, "step": 9518 }, { "epoch": 92.30361445783133, "grad_norm": 9.913640022277832, "learning_rate": 3.7912621359223308e-06, "loss": 0.4003, "step": 9519 }, { "epoch": 92.3132530120482, "grad_norm": 4.342304229736328, "learning_rate": 3.7864077669902915e-06, "loss": 0.2988, "step": 9520 }, { "epoch": 92.32289156626506, "grad_norm": 5.319194793701172, "learning_rate": 3.7815533980582526e-06, "loss": 0.4107, "step": 9521 }, { "epoch": 92.33253012048193, "grad_norm": 2.8963098526000977, "learning_rate": 3.7766990291262134e-06, "loss": 0.2938, "step": 9522 }, { "epoch": 92.3421686746988, "grad_norm": 2.797140598297119, "learning_rate": 3.771844660194175e-06, "loss": 0.1491, "step": 9523 }, { "epoch": 92.35180722891566, "grad_norm": 5.724021911621094, "learning_rate": 3.766990291262136e-06, "loss": 0.3932, "step": 9524 }, { "epoch": 92.36144578313252, "grad_norm": 8.185203552246094, "learning_rate": 3.762135922330097e-06, "loss": 0.2319, "step": 9525 }, { "epoch": 92.3710843373494, "grad_norm": 2.8352322578430176, "learning_rate": 3.757281553398059e-06, "loss": 0.1623, "step": 9526 }, { "epoch": 92.38072289156626, "grad_norm": 2.375530958175659, "learning_rate": 3.7524271844660195e-06, "loss": 0.0577, "step": 9527 }, { "epoch": 92.39036144578313, "grad_norm": 3.0954229831695557, "learning_rate": 3.7475728155339807e-06, "loss": 0.2727, "step": 9528 }, { "epoch": 92.4, "grad_norm": 3.459451675415039, "learning_rate": 3.7427184466019422e-06, "loss": 0.2301, "step": 9529 }, { "epoch": 92.40963855421687, "grad_norm": 7.391206741333008, "learning_rate": 3.737864077669903e-06, "loss": 0.2567, "step": 9530 }, { "epoch": 92.41927710843373, "grad_norm": 3.032086133956909, "learning_rate": 3.733009708737864e-06, "loss": 0.174, "step": 9531 }, { "epoch": 92.4289156626506, "grad_norm": 27.254850387573242, "learning_rate": 3.7281553398058257e-06, "loss": 0.215, "step": 9532 }, { "epoch": 92.43855421686747, "grad_norm": 8.166321754455566, "learning_rate": 3.723300970873787e-06, "loss": 0.3775, "step": 9533 }, { "epoch": 92.44819277108434, "grad_norm": 5.320773124694824, "learning_rate": 3.7184466019417475e-06, "loss": 0.1672, "step": 9534 }, { "epoch": 92.4578313253012, "grad_norm": 8.572697639465332, "learning_rate": 3.713592233009709e-06, "loss": 0.2375, "step": 9535 }, { "epoch": 92.46746987951808, "grad_norm": 1.9719866514205933, "learning_rate": 3.7087378640776703e-06, "loss": 0.2513, "step": 9536 }, { "epoch": 92.47710843373494, "grad_norm": 10.064311027526855, "learning_rate": 3.703883495145631e-06, "loss": 0.1887, "step": 9537 }, { "epoch": 92.4867469879518, "grad_norm": 28.65696144104004, "learning_rate": 3.6990291262135925e-06, "loss": 0.2619, "step": 9538 }, { "epoch": 92.49638554216868, "grad_norm": 9.47705364227295, "learning_rate": 3.6941747572815537e-06, "loss": 0.1402, "step": 9539 }, { "epoch": 92.50602409638554, "grad_norm": 4.564528465270996, "learning_rate": 3.6893203883495144e-06, "loss": 0.2922, "step": 9540 }, { "epoch": 92.51566265060241, "grad_norm": 4.764261245727539, "learning_rate": 3.6844660194174756e-06, "loss": 0.186, "step": 9541 }, { "epoch": 92.52530120481927, "grad_norm": 2.3925063610076904, "learning_rate": 3.679611650485437e-06, "loss": 0.1912, "step": 9542 }, { "epoch": 92.53493975903615, "grad_norm": 2.411863088607788, "learning_rate": 3.6747572815533983e-06, "loss": 0.1961, "step": 9543 }, { "epoch": 92.54457831325301, "grad_norm": 3.4338274002075195, "learning_rate": 3.669902912621359e-06, "loss": 0.2337, "step": 9544 }, { "epoch": 92.55421686746988, "grad_norm": 5.697939395904541, "learning_rate": 3.6650485436893206e-06, "loss": 0.3449, "step": 9545 }, { "epoch": 92.56385542168675, "grad_norm": 8.303568840026855, "learning_rate": 3.6601941747572817e-06, "loss": 0.2693, "step": 9546 }, { "epoch": 92.57349397590362, "grad_norm": 12.321171760559082, "learning_rate": 3.6553398058252424e-06, "loss": 0.1991, "step": 9547 }, { "epoch": 92.58313253012048, "grad_norm": 7.383852481842041, "learning_rate": 3.6504854368932044e-06, "loss": 0.3722, "step": 9548 }, { "epoch": 92.59277108433734, "grad_norm": 2.2595221996307373, "learning_rate": 3.645631067961165e-06, "loss": 0.2098, "step": 9549 }, { "epoch": 92.60240963855422, "grad_norm": 11.285406112670898, "learning_rate": 3.6407766990291263e-06, "loss": 0.3465, "step": 9550 }, { "epoch": 92.61204819277108, "grad_norm": 19.140392303466797, "learning_rate": 3.635922330097088e-06, "loss": 0.2384, "step": 9551 }, { "epoch": 92.62168674698795, "grad_norm": 10.872054100036621, "learning_rate": 3.6310679611650486e-06, "loss": 0.1531, "step": 9552 }, { "epoch": 92.63132530120482, "grad_norm": 3.2764406204223633, "learning_rate": 3.6262135922330097e-06, "loss": 0.2454, "step": 9553 }, { "epoch": 92.64096385542169, "grad_norm": 9.814127922058105, "learning_rate": 3.6213592233009713e-06, "loss": 0.261, "step": 9554 }, { "epoch": 92.65060240963855, "grad_norm": 15.600826263427734, "learning_rate": 3.6165048543689324e-06, "loss": 0.2389, "step": 9555 }, { "epoch": 92.66024096385541, "grad_norm": 3.2628567218780518, "learning_rate": 3.611650485436893e-06, "loss": 0.0983, "step": 9556 }, { "epoch": 92.66987951807229, "grad_norm": 8.17911434173584, "learning_rate": 3.6067961165048547e-06, "loss": 0.3039, "step": 9557 }, { "epoch": 92.67951807228916, "grad_norm": 6.458689212799072, "learning_rate": 3.601941747572816e-06, "loss": 0.2036, "step": 9558 }, { "epoch": 92.68915662650602, "grad_norm": 2.5224111080169678, "learning_rate": 3.5970873786407766e-06, "loss": 0.2026, "step": 9559 }, { "epoch": 92.6987951807229, "grad_norm": 7.217814922332764, "learning_rate": 3.5922330097087378e-06, "loss": 0.3281, "step": 9560 }, { "epoch": 92.70843373493976, "grad_norm": 2.0583736896514893, "learning_rate": 3.5873786407766993e-06, "loss": 0.1139, "step": 9561 }, { "epoch": 92.71807228915662, "grad_norm": 5.5401434898376465, "learning_rate": 3.5825242718446605e-06, "loss": 0.1287, "step": 9562 }, { "epoch": 92.7277108433735, "grad_norm": 3.334221601486206, "learning_rate": 3.577669902912621e-06, "loss": 0.2662, "step": 9563 }, { "epoch": 92.73734939759036, "grad_norm": 17.81817054748535, "learning_rate": 3.5728155339805828e-06, "loss": 0.2555, "step": 9564 }, { "epoch": 92.74698795180723, "grad_norm": 5.186746120452881, "learning_rate": 3.567961165048544e-06, "loss": 0.2368, "step": 9565 }, { "epoch": 92.75662650602409, "grad_norm": 1.2472907304763794, "learning_rate": 3.5631067961165046e-06, "loss": 0.0499, "step": 9566 }, { "epoch": 92.76626506024097, "grad_norm": 13.76103687286377, "learning_rate": 3.558252427184466e-06, "loss": 0.2618, "step": 9567 }, { "epoch": 92.77590361445783, "grad_norm": 6.754930019378662, "learning_rate": 3.5533980582524273e-06, "loss": 0.1957, "step": 9568 }, { "epoch": 92.7855421686747, "grad_norm": 6.247767448425293, "learning_rate": 3.548543689320388e-06, "loss": 0.2884, "step": 9569 }, { "epoch": 92.79518072289157, "grad_norm": 7.126150131225586, "learning_rate": 3.54368932038835e-06, "loss": 0.2348, "step": 9570 }, { "epoch": 92.80481927710844, "grad_norm": 12.598001480102539, "learning_rate": 3.5388349514563108e-06, "loss": 0.2477, "step": 9571 }, { "epoch": 92.8144578313253, "grad_norm": 1.745580792427063, "learning_rate": 3.533980582524272e-06, "loss": 0.1512, "step": 9572 }, { "epoch": 92.82409638554216, "grad_norm": 10.895581245422363, "learning_rate": 3.5291262135922335e-06, "loss": 0.217, "step": 9573 }, { "epoch": 92.83373493975904, "grad_norm": 13.21489429473877, "learning_rate": 3.5242718446601942e-06, "loss": 0.2286, "step": 9574 }, { "epoch": 92.8433734939759, "grad_norm": 5.708493709564209, "learning_rate": 3.5194174757281554e-06, "loss": 0.1229, "step": 9575 }, { "epoch": 92.85301204819277, "grad_norm": 43.10310745239258, "learning_rate": 3.514563106796117e-06, "loss": 0.339, "step": 9576 }, { "epoch": 92.86265060240964, "grad_norm": 6.948597431182861, "learning_rate": 3.509708737864078e-06, "loss": 0.2998, "step": 9577 }, { "epoch": 92.87228915662651, "grad_norm": 4.132127285003662, "learning_rate": 3.504854368932039e-06, "loss": 0.2557, "step": 9578 }, { "epoch": 92.88192771084337, "grad_norm": 1.4440631866455078, "learning_rate": 3.5000000000000004e-06, "loss": 0.2675, "step": 9579 }, { "epoch": 92.89156626506023, "grad_norm": 26.203720092773438, "learning_rate": 3.4951456310679615e-06, "loss": 0.2225, "step": 9580 }, { "epoch": 92.90120481927711, "grad_norm": 2.828648328781128, "learning_rate": 3.4902912621359222e-06, "loss": 0.208, "step": 9581 }, { "epoch": 92.91084337349398, "grad_norm": 1.2769149541854858, "learning_rate": 3.4854368932038834e-06, "loss": 0.0636, "step": 9582 }, { "epoch": 92.92048192771084, "grad_norm": 3.8565924167633057, "learning_rate": 3.480582524271845e-06, "loss": 0.3094, "step": 9583 }, { "epoch": 92.93012048192772, "grad_norm": 3.488551139831543, "learning_rate": 3.475728155339806e-06, "loss": 0.2118, "step": 9584 }, { "epoch": 92.93975903614458, "grad_norm": 1.2637360095977783, "learning_rate": 3.470873786407767e-06, "loss": 0.1004, "step": 9585 }, { "epoch": 92.94939759036144, "grad_norm": 8.566314697265625, "learning_rate": 3.4660194174757284e-06, "loss": 0.1537, "step": 9586 }, { "epoch": 92.95903614457832, "grad_norm": 3.53705096244812, "learning_rate": 3.4611650485436895e-06, "loss": 0.2822, "step": 9587 }, { "epoch": 92.96867469879518, "grad_norm": 3.606557607650757, "learning_rate": 3.4563106796116503e-06, "loss": 0.3024, "step": 9588 }, { "epoch": 92.97831325301205, "grad_norm": 5.776426315307617, "learning_rate": 3.451456310679612e-06, "loss": 0.3072, "step": 9589 }, { "epoch": 92.98795180722891, "grad_norm": 3.040987968444824, "learning_rate": 3.446601941747573e-06, "loss": 0.223, "step": 9590 }, { "epoch": 93.00361445783132, "grad_norm": 2.343904733657837, "learning_rate": 3.441747572815534e-06, "loss": 0.116, "step": 9591 }, { "epoch": 93.0132530120482, "grad_norm": 5.905913352966309, "learning_rate": 3.4368932038834957e-06, "loss": 0.2927, "step": 9592 }, { "epoch": 93.02289156626506, "grad_norm": 4.383145809173584, "learning_rate": 3.4320388349514564e-06, "loss": 0.2419, "step": 9593 }, { "epoch": 93.03253012048192, "grad_norm": 2.546717643737793, "learning_rate": 3.4271844660194176e-06, "loss": 0.3363, "step": 9594 }, { "epoch": 93.0421686746988, "grad_norm": 6.550520420074463, "learning_rate": 3.422330097087379e-06, "loss": 0.3199, "step": 9595 }, { "epoch": 93.05180722891566, "grad_norm": 2.5111794471740723, "learning_rate": 3.41747572815534e-06, "loss": 0.074, "step": 9596 }, { "epoch": 93.06144578313253, "grad_norm": 3.735358476638794, "learning_rate": 3.412621359223301e-06, "loss": 0.149, "step": 9597 }, { "epoch": 93.0710843373494, "grad_norm": 7.412866115570068, "learning_rate": 3.4077669902912626e-06, "loss": 0.3575, "step": 9598 }, { "epoch": 93.08072289156627, "grad_norm": 25.61603546142578, "learning_rate": 3.4029126213592237e-06, "loss": 0.3232, "step": 9599 }, { "epoch": 93.09036144578313, "grad_norm": 4.023496150970459, "learning_rate": 3.3980582524271844e-06, "loss": 0.3027, "step": 9600 }, { "epoch": 93.1, "grad_norm": 3.2802817821502686, "learning_rate": 3.3932038834951456e-06, "loss": 0.154, "step": 9601 }, { "epoch": 93.10963855421687, "grad_norm": 11.139115333557129, "learning_rate": 3.388349514563107e-06, "loss": 0.2949, "step": 9602 }, { "epoch": 93.11927710843374, "grad_norm": 4.332959175109863, "learning_rate": 3.383495145631068e-06, "loss": 0.2921, "step": 9603 }, { "epoch": 93.1289156626506, "grad_norm": 3.9677295684814453, "learning_rate": 3.378640776699029e-06, "loss": 0.205, "step": 9604 }, { "epoch": 93.13855421686748, "grad_norm": 6.21303653717041, "learning_rate": 3.3737864077669906e-06, "loss": 0.2671, "step": 9605 }, { "epoch": 93.14819277108434, "grad_norm": 5.660436630249023, "learning_rate": 3.3689320388349517e-06, "loss": 0.2262, "step": 9606 }, { "epoch": 93.1578313253012, "grad_norm": 8.954781532287598, "learning_rate": 3.3640776699029125e-06, "loss": 0.3094, "step": 9607 }, { "epoch": 93.16746987951807, "grad_norm": 14.653135299682617, "learning_rate": 3.359223300970874e-06, "loss": 0.4052, "step": 9608 }, { "epoch": 93.17710843373494, "grad_norm": 6.846851825714111, "learning_rate": 3.354368932038835e-06, "loss": 0.1944, "step": 9609 }, { "epoch": 93.1867469879518, "grad_norm": 6.77238130569458, "learning_rate": 3.349514563106796e-06, "loss": 0.474, "step": 9610 }, { "epoch": 93.19638554216867, "grad_norm": 1.3521519899368286, "learning_rate": 3.344660194174758e-06, "loss": 0.0629, "step": 9611 }, { "epoch": 93.20602409638555, "grad_norm": 2.7097368240356445, "learning_rate": 3.3398058252427186e-06, "loss": 0.16, "step": 9612 }, { "epoch": 93.21566265060241, "grad_norm": 20.892662048339844, "learning_rate": 3.3349514563106797e-06, "loss": 0.3569, "step": 9613 }, { "epoch": 93.22530120481927, "grad_norm": 2.7781925201416016, "learning_rate": 3.3300970873786413e-06, "loss": 0.2399, "step": 9614 }, { "epoch": 93.23493975903614, "grad_norm": 1.22736394405365, "learning_rate": 3.325242718446602e-06, "loss": 0.0414, "step": 9615 }, { "epoch": 93.24457831325302, "grad_norm": 5.257336616516113, "learning_rate": 3.320388349514563e-06, "loss": 0.2693, "step": 9616 }, { "epoch": 93.25421686746988, "grad_norm": 2.474883794784546, "learning_rate": 3.3155339805825248e-06, "loss": 0.0886, "step": 9617 }, { "epoch": 93.26385542168674, "grad_norm": 6.352017879486084, "learning_rate": 3.3106796116504855e-06, "loss": 0.2595, "step": 9618 }, { "epoch": 93.27349397590362, "grad_norm": 3.333505392074585, "learning_rate": 3.3058252427184466e-06, "loss": 0.1219, "step": 9619 }, { "epoch": 93.28313253012048, "grad_norm": 13.374166488647461, "learning_rate": 3.300970873786408e-06, "loss": 0.4668, "step": 9620 }, { "epoch": 93.29277108433735, "grad_norm": 7.514772415161133, "learning_rate": 3.2961165048543693e-06, "loss": 0.4489, "step": 9621 }, { "epoch": 93.30240963855422, "grad_norm": 4.308225154876709, "learning_rate": 3.29126213592233e-06, "loss": 0.2314, "step": 9622 }, { "epoch": 93.31204819277109, "grad_norm": 9.51126480102539, "learning_rate": 3.286407766990291e-06, "loss": 0.1151, "step": 9623 }, { "epoch": 93.32168674698795, "grad_norm": 6.121075630187988, "learning_rate": 3.2815533980582528e-06, "loss": 0.2843, "step": 9624 }, { "epoch": 93.33132530120481, "grad_norm": 5.886709690093994, "learning_rate": 3.2766990291262135e-06, "loss": 0.3814, "step": 9625 }, { "epoch": 93.34096385542169, "grad_norm": 3.6734416484832764, "learning_rate": 3.2718446601941746e-06, "loss": 0.1948, "step": 9626 }, { "epoch": 93.35060240963855, "grad_norm": 15.782285690307617, "learning_rate": 3.2669902912621362e-06, "loss": 0.4198, "step": 9627 }, { "epoch": 93.36024096385542, "grad_norm": 8.826159477233887, "learning_rate": 3.2621359223300974e-06, "loss": 0.2033, "step": 9628 }, { "epoch": 93.3698795180723, "grad_norm": 5.538915634155273, "learning_rate": 3.257281553398058e-06, "loss": 0.1848, "step": 9629 }, { "epoch": 93.37951807228916, "grad_norm": 4.470310688018799, "learning_rate": 3.2524271844660197e-06, "loss": 0.4454, "step": 9630 }, { "epoch": 93.38915662650602, "grad_norm": 6.0657854080200195, "learning_rate": 3.247572815533981e-06, "loss": 0.261, "step": 9631 }, { "epoch": 93.39879518072289, "grad_norm": 5.775463104248047, "learning_rate": 3.2427184466019415e-06, "loss": 0.3945, "step": 9632 }, { "epoch": 93.40843373493976, "grad_norm": 6.8182597160339355, "learning_rate": 3.2378640776699035e-06, "loss": 0.2569, "step": 9633 }, { "epoch": 93.41807228915663, "grad_norm": 4.398425102233887, "learning_rate": 3.2330097087378642e-06, "loss": 0.1589, "step": 9634 }, { "epoch": 93.42771084337349, "grad_norm": 3.565394401550293, "learning_rate": 3.2281553398058254e-06, "loss": 0.2049, "step": 9635 }, { "epoch": 93.43734939759037, "grad_norm": 1.335785984992981, "learning_rate": 3.223300970873787e-06, "loss": 0.079, "step": 9636 }, { "epoch": 93.44698795180723, "grad_norm": 3.514638662338257, "learning_rate": 3.2184466019417477e-06, "loss": 0.1024, "step": 9637 }, { "epoch": 93.4566265060241, "grad_norm": 21.01215934753418, "learning_rate": 3.213592233009709e-06, "loss": 0.3231, "step": 9638 }, { "epoch": 93.46626506024096, "grad_norm": 2.8168106079101562, "learning_rate": 3.2087378640776704e-06, "loss": 0.2516, "step": 9639 }, { "epoch": 93.47590361445783, "grad_norm": 2.0926125049591064, "learning_rate": 3.2038834951456315e-06, "loss": 0.1604, "step": 9640 }, { "epoch": 93.4855421686747, "grad_norm": 2.5993764400482178, "learning_rate": 3.1990291262135923e-06, "loss": 0.1364, "step": 9641 }, { "epoch": 93.49518072289156, "grad_norm": 2.2622077465057373, "learning_rate": 3.1941747572815534e-06, "loss": 0.1378, "step": 9642 }, { "epoch": 93.50481927710844, "grad_norm": 12.855101585388184, "learning_rate": 3.189320388349515e-06, "loss": 0.3393, "step": 9643 }, { "epoch": 93.5144578313253, "grad_norm": 5.173026084899902, "learning_rate": 3.1844660194174757e-06, "loss": 0.1217, "step": 9644 }, { "epoch": 93.52409638554217, "grad_norm": 6.200153827667236, "learning_rate": 3.179611650485437e-06, "loss": 0.1589, "step": 9645 }, { "epoch": 93.53373493975904, "grad_norm": 12.204501152038574, "learning_rate": 3.1747572815533984e-06, "loss": 0.3627, "step": 9646 }, { "epoch": 93.5433734939759, "grad_norm": 3.7186522483825684, "learning_rate": 3.169902912621359e-06, "loss": 0.2356, "step": 9647 }, { "epoch": 93.55301204819277, "grad_norm": 11.559362411499023, "learning_rate": 3.1650485436893203e-06, "loss": 0.3123, "step": 9648 }, { "epoch": 93.56265060240963, "grad_norm": 1.8948472738265991, "learning_rate": 3.160194174757282e-06, "loss": 0.1296, "step": 9649 }, { "epoch": 93.57228915662651, "grad_norm": 5.659159183502197, "learning_rate": 3.155339805825243e-06, "loss": 0.2473, "step": 9650 }, { "epoch": 93.58192771084337, "grad_norm": 7.693260192871094, "learning_rate": 3.1504854368932037e-06, "loss": 0.1881, "step": 9651 }, { "epoch": 93.59156626506024, "grad_norm": 19.602617263793945, "learning_rate": 3.1456310679611653e-06, "loss": 0.3499, "step": 9652 }, { "epoch": 93.60120481927711, "grad_norm": 15.714473724365234, "learning_rate": 3.1407766990291264e-06, "loss": 0.1863, "step": 9653 }, { "epoch": 93.61084337349398, "grad_norm": 14.298713684082031, "learning_rate": 3.135922330097087e-06, "loss": 0.2821, "step": 9654 }, { "epoch": 93.62048192771084, "grad_norm": 10.02454948425293, "learning_rate": 3.131067961165049e-06, "loss": 0.4101, "step": 9655 }, { "epoch": 93.6301204819277, "grad_norm": 5.151079177856445, "learning_rate": 3.12621359223301e-06, "loss": 0.1858, "step": 9656 }, { "epoch": 93.63975903614458, "grad_norm": 5.066648483276367, "learning_rate": 3.121359223300971e-06, "loss": 0.1912, "step": 9657 }, { "epoch": 93.64939759036145, "grad_norm": 4.159425735473633, "learning_rate": 3.116504854368932e-06, "loss": 0.1743, "step": 9658 }, { "epoch": 93.65903614457831, "grad_norm": 6.571491241455078, "learning_rate": 3.1116504854368933e-06, "loss": 0.1139, "step": 9659 }, { "epoch": 93.66867469879519, "grad_norm": 35.49956130981445, "learning_rate": 3.1067961165048544e-06, "loss": 0.3965, "step": 9660 }, { "epoch": 93.67831325301205, "grad_norm": 7.937840461730957, "learning_rate": 3.1019417475728156e-06, "loss": 0.2509, "step": 9661 }, { "epoch": 93.68795180722891, "grad_norm": 4.05144739151001, "learning_rate": 3.097087378640777e-06, "loss": 0.185, "step": 9662 }, { "epoch": 93.69759036144578, "grad_norm": 2.666879892349243, "learning_rate": 3.092233009708738e-06, "loss": 0.0926, "step": 9663 }, { "epoch": 93.70722891566265, "grad_norm": 5.097727298736572, "learning_rate": 3.087378640776699e-06, "loss": 0.1548, "step": 9664 }, { "epoch": 93.71686746987952, "grad_norm": 11.6482515335083, "learning_rate": 3.0825242718446606e-06, "loss": 0.2268, "step": 9665 }, { "epoch": 93.72650602409638, "grad_norm": 25.776227951049805, "learning_rate": 3.0776699029126213e-06, "loss": 0.264, "step": 9666 }, { "epoch": 93.73614457831326, "grad_norm": 8.311452865600586, "learning_rate": 3.072815533980583e-06, "loss": 0.2143, "step": 9667 }, { "epoch": 93.74578313253012, "grad_norm": 31.526512145996094, "learning_rate": 3.0679611650485436e-06, "loss": 0.2451, "step": 9668 }, { "epoch": 93.75542168674698, "grad_norm": 16.17124366760254, "learning_rate": 3.063106796116505e-06, "loss": 0.2777, "step": 9669 }, { "epoch": 93.76506024096386, "grad_norm": 8.022104263305664, "learning_rate": 3.0582524271844663e-06, "loss": 0.281, "step": 9670 }, { "epoch": 93.77469879518073, "grad_norm": 4.484757423400879, "learning_rate": 3.053398058252427e-06, "loss": 0.272, "step": 9671 }, { "epoch": 93.78433734939759, "grad_norm": 2.1921842098236084, "learning_rate": 3.0485436893203886e-06, "loss": 0.0759, "step": 9672 }, { "epoch": 93.79397590361445, "grad_norm": 3.4577813148498535, "learning_rate": 3.0436893203883498e-06, "loss": 0.1185, "step": 9673 }, { "epoch": 93.80361445783133, "grad_norm": 1.6141064167022705, "learning_rate": 3.038834951456311e-06, "loss": 0.1151, "step": 9674 }, { "epoch": 93.8132530120482, "grad_norm": 6.403615951538086, "learning_rate": 3.033980582524272e-06, "loss": 0.1577, "step": 9675 }, { "epoch": 93.82289156626506, "grad_norm": 3.750795841217041, "learning_rate": 3.029126213592233e-06, "loss": 0.1894, "step": 9676 }, { "epoch": 93.83253012048193, "grad_norm": 4.653385639190674, "learning_rate": 3.0242718446601943e-06, "loss": 0.2592, "step": 9677 }, { "epoch": 93.8421686746988, "grad_norm": 2.504581928253174, "learning_rate": 3.0194174757281555e-06, "loss": 0.3164, "step": 9678 }, { "epoch": 93.85180722891566, "grad_norm": 12.848737716674805, "learning_rate": 3.0145631067961166e-06, "loss": 0.4195, "step": 9679 }, { "epoch": 93.86144578313252, "grad_norm": 2.9465489387512207, "learning_rate": 3.0097087378640778e-06, "loss": 0.0872, "step": 9680 }, { "epoch": 93.8710843373494, "grad_norm": 5.374655246734619, "learning_rate": 3.004854368932039e-06, "loss": 0.3125, "step": 9681 }, { "epoch": 93.88072289156626, "grad_norm": 7.091792106628418, "learning_rate": 3e-06, "loss": 0.355, "step": 9682 }, { "epoch": 93.89036144578313, "grad_norm": 2.6698408126831055, "learning_rate": 2.9951456310679612e-06, "loss": 0.1164, "step": 9683 }, { "epoch": 93.9, "grad_norm": 8.676945686340332, "learning_rate": 2.990291262135923e-06, "loss": 0.1216, "step": 9684 }, { "epoch": 93.90963855421687, "grad_norm": 5.27105188369751, "learning_rate": 2.9854368932038835e-06, "loss": 0.1699, "step": 9685 }, { "epoch": 93.91927710843373, "grad_norm": 6.715002536773682, "learning_rate": 2.9805825242718447e-06, "loss": 0.2628, "step": 9686 }, { "epoch": 93.9289156626506, "grad_norm": 17.616857528686523, "learning_rate": 2.975728155339806e-06, "loss": 0.1979, "step": 9687 }, { "epoch": 93.93855421686747, "grad_norm": 3.7758865356445312, "learning_rate": 2.970873786407767e-06, "loss": 0.1068, "step": 9688 }, { "epoch": 93.94819277108434, "grad_norm": 8.245464324951172, "learning_rate": 2.9660194174757285e-06, "loss": 0.1686, "step": 9689 }, { "epoch": 93.9578313253012, "grad_norm": 11.148221015930176, "learning_rate": 2.9611650485436892e-06, "loss": 0.2003, "step": 9690 }, { "epoch": 93.96746987951808, "grad_norm": 2.6236562728881836, "learning_rate": 2.956310679611651e-06, "loss": 0.1076, "step": 9691 }, { "epoch": 93.97710843373494, "grad_norm": 2.9112839698791504, "learning_rate": 2.951456310679612e-06, "loss": 0.0873, "step": 9692 }, { "epoch": 93.9867469879518, "grad_norm": 3.34385085105896, "learning_rate": 2.9466019417475727e-06, "loss": 0.3596, "step": 9693 }, { "epoch": 94.00240963855421, "grad_norm": 19.38253402709961, "learning_rate": 2.9417475728155342e-06, "loss": 0.3802, "step": 9694 }, { "epoch": 94.01204819277109, "grad_norm": 6.344365119934082, "learning_rate": 2.9368932038834954e-06, "loss": 0.1705, "step": 9695 }, { "epoch": 94.02168674698795, "grad_norm": 5.925331115722656, "learning_rate": 2.9320388349514565e-06, "loss": 0.3389, "step": 9696 }, { "epoch": 94.03132530120482, "grad_norm": 5.075525760650635, "learning_rate": 2.9271844660194177e-06, "loss": 0.2008, "step": 9697 }, { "epoch": 94.04096385542168, "grad_norm": 24.68500328063965, "learning_rate": 2.922330097087379e-06, "loss": 0.4095, "step": 9698 }, { "epoch": 94.05060240963856, "grad_norm": 4.008010387420654, "learning_rate": 2.91747572815534e-06, "loss": 0.131, "step": 9699 }, { "epoch": 94.06024096385542, "grad_norm": 3.6876461505889893, "learning_rate": 2.912621359223301e-06, "loss": 0.2084, "step": 9700 }, { "epoch": 94.06987951807228, "grad_norm": 37.252105712890625, "learning_rate": 2.9077669902912623e-06, "loss": 0.2105, "step": 9701 }, { "epoch": 94.07951807228916, "grad_norm": 10.32996940612793, "learning_rate": 2.9029126213592234e-06, "loss": 0.3639, "step": 9702 }, { "epoch": 94.08915662650602, "grad_norm": 4.340452194213867, "learning_rate": 2.8980582524271846e-06, "loss": 0.1162, "step": 9703 }, { "epoch": 94.09879518072289, "grad_norm": 3.1462600231170654, "learning_rate": 2.8932038834951457e-06, "loss": 0.1764, "step": 9704 }, { "epoch": 94.10843373493977, "grad_norm": 9.536924362182617, "learning_rate": 2.888349514563107e-06, "loss": 0.3109, "step": 9705 }, { "epoch": 94.11807228915663, "grad_norm": 6.175723075866699, "learning_rate": 2.8834951456310684e-06, "loss": 0.2514, "step": 9706 }, { "epoch": 94.12771084337349, "grad_norm": 2.396070957183838, "learning_rate": 2.878640776699029e-06, "loss": 0.2189, "step": 9707 }, { "epoch": 94.13734939759036, "grad_norm": 4.359650611877441, "learning_rate": 2.8737864077669903e-06, "loss": 0.1452, "step": 9708 }, { "epoch": 94.14698795180723, "grad_norm": 3.8563249111175537, "learning_rate": 2.8689320388349514e-06, "loss": 0.2099, "step": 9709 }, { "epoch": 94.1566265060241, "grad_norm": 5.248540878295898, "learning_rate": 2.8640776699029126e-06, "loss": 0.2375, "step": 9710 }, { "epoch": 94.16626506024096, "grad_norm": 2.3745222091674805, "learning_rate": 2.859223300970874e-06, "loss": 0.1863, "step": 9711 }, { "epoch": 94.17590361445784, "grad_norm": 9.74498176574707, "learning_rate": 2.854368932038835e-06, "loss": 0.231, "step": 9712 }, { "epoch": 94.1855421686747, "grad_norm": 4.258443355560303, "learning_rate": 2.8495145631067964e-06, "loss": 0.2125, "step": 9713 }, { "epoch": 94.19518072289156, "grad_norm": 11.23723030090332, "learning_rate": 2.8446601941747576e-06, "loss": 0.385, "step": 9714 }, { "epoch": 94.20481927710843, "grad_norm": 40.43442153930664, "learning_rate": 2.8398058252427183e-06, "loss": 0.2372, "step": 9715 }, { "epoch": 94.2144578313253, "grad_norm": 5.167787551879883, "learning_rate": 2.83495145631068e-06, "loss": 0.215, "step": 9716 }, { "epoch": 94.22409638554217, "grad_norm": 17.02345848083496, "learning_rate": 2.830097087378641e-06, "loss": 0.5011, "step": 9717 }, { "epoch": 94.23373493975903, "grad_norm": 10.557262420654297, "learning_rate": 2.825242718446602e-06, "loss": 0.4727, "step": 9718 }, { "epoch": 94.24337349397591, "grad_norm": 3.6065673828125, "learning_rate": 2.8203883495145633e-06, "loss": 0.2159, "step": 9719 }, { "epoch": 94.25301204819277, "grad_norm": 6.075960159301758, "learning_rate": 2.8155339805825245e-06, "loss": 0.2686, "step": 9720 }, { "epoch": 94.26265060240964, "grad_norm": 4.563961029052734, "learning_rate": 2.8106796116504856e-06, "loss": 0.2343, "step": 9721 }, { "epoch": 94.2722891566265, "grad_norm": 4.3400492668151855, "learning_rate": 2.8058252427184468e-06, "loss": 0.3211, "step": 9722 }, { "epoch": 94.28192771084338, "grad_norm": 5.0306596755981445, "learning_rate": 2.800970873786408e-06, "loss": 0.1577, "step": 9723 }, { "epoch": 94.29156626506024, "grad_norm": 15.452739715576172, "learning_rate": 2.796116504854369e-06, "loss": 0.3397, "step": 9724 }, { "epoch": 94.3012048192771, "grad_norm": 5.143658638000488, "learning_rate": 2.79126213592233e-06, "loss": 0.4388, "step": 9725 }, { "epoch": 94.31084337349398, "grad_norm": 2.3799798488616943, "learning_rate": 2.7864077669902913e-06, "loss": 0.2733, "step": 9726 }, { "epoch": 94.32048192771084, "grad_norm": 5.189241886138916, "learning_rate": 2.7815533980582525e-06, "loss": 0.4224, "step": 9727 }, { "epoch": 94.33012048192771, "grad_norm": 8.588075637817383, "learning_rate": 2.7766990291262136e-06, "loss": 0.2387, "step": 9728 }, { "epoch": 94.33975903614459, "grad_norm": 7.070442199707031, "learning_rate": 2.7718446601941748e-06, "loss": 0.2035, "step": 9729 }, { "epoch": 94.34939759036145, "grad_norm": 9.37143325805664, "learning_rate": 2.7669902912621363e-06, "loss": 0.0905, "step": 9730 }, { "epoch": 94.35903614457831, "grad_norm": 4.796552658081055, "learning_rate": 2.762135922330097e-06, "loss": 0.1754, "step": 9731 }, { "epoch": 94.36867469879518, "grad_norm": 3.1949620246887207, "learning_rate": 2.757281553398058e-06, "loss": 0.1589, "step": 9732 }, { "epoch": 94.37831325301205, "grad_norm": 18.105022430419922, "learning_rate": 2.7524271844660198e-06, "loss": 0.5987, "step": 9733 }, { "epoch": 94.38795180722892, "grad_norm": 30.389232635498047, "learning_rate": 2.7475728155339805e-06, "loss": 0.3028, "step": 9734 }, { "epoch": 94.39759036144578, "grad_norm": 5.64732551574707, "learning_rate": 2.742718446601942e-06, "loss": 0.3131, "step": 9735 }, { "epoch": 94.40722891566266, "grad_norm": 13.951955795288086, "learning_rate": 2.7378640776699032e-06, "loss": 0.3445, "step": 9736 }, { "epoch": 94.41686746987952, "grad_norm": 7.478175163269043, "learning_rate": 2.733009708737864e-06, "loss": 0.1992, "step": 9737 }, { "epoch": 94.42650602409638, "grad_norm": 4.481407165527344, "learning_rate": 2.7281553398058255e-06, "loss": 0.2292, "step": 9738 }, { "epoch": 94.43614457831325, "grad_norm": 6.545784950256348, "learning_rate": 2.7233009708737862e-06, "loss": 0.2908, "step": 9739 }, { "epoch": 94.44578313253012, "grad_norm": 3.2348039150238037, "learning_rate": 2.718446601941748e-06, "loss": 0.2057, "step": 9740 }, { "epoch": 94.45542168674699, "grad_norm": 4.595223903656006, "learning_rate": 2.713592233009709e-06, "loss": 0.2373, "step": 9741 }, { "epoch": 94.46506024096385, "grad_norm": 9.702003479003906, "learning_rate": 2.70873786407767e-06, "loss": 0.0939, "step": 9742 }, { "epoch": 94.47469879518073, "grad_norm": 4.300387382507324, "learning_rate": 2.7038834951456312e-06, "loss": 0.3502, "step": 9743 }, { "epoch": 94.48433734939759, "grad_norm": 5.3981852531433105, "learning_rate": 2.6990291262135924e-06, "loss": 0.2326, "step": 9744 }, { "epoch": 94.49397590361446, "grad_norm": 4.685131072998047, "learning_rate": 2.6941747572815535e-06, "loss": 0.2224, "step": 9745 }, { "epoch": 94.50361445783132, "grad_norm": 5.083244323730469, "learning_rate": 2.6893203883495147e-06, "loss": 0.1984, "step": 9746 }, { "epoch": 94.5132530120482, "grad_norm": 6.611755847930908, "learning_rate": 2.684466019417476e-06, "loss": 0.3112, "step": 9747 }, { "epoch": 94.52289156626506, "grad_norm": 3.026494264602661, "learning_rate": 2.679611650485437e-06, "loss": 0.1333, "step": 9748 }, { "epoch": 94.53253012048192, "grad_norm": 8.726099014282227, "learning_rate": 2.674757281553398e-06, "loss": 0.1769, "step": 9749 }, { "epoch": 94.5421686746988, "grad_norm": 4.584390640258789, "learning_rate": 2.6699029126213593e-06, "loss": 0.2032, "step": 9750 }, { "epoch": 94.55180722891566, "grad_norm": 5.809202671051025, "learning_rate": 2.6650485436893204e-06, "loss": 0.0843, "step": 9751 }, { "epoch": 94.56144578313253, "grad_norm": 9.452958106994629, "learning_rate": 2.660194174757282e-06, "loss": 0.2912, "step": 9752 }, { "epoch": 94.5710843373494, "grad_norm": 6.185125827789307, "learning_rate": 2.6553398058252427e-06, "loss": 0.2804, "step": 9753 }, { "epoch": 94.58072289156627, "grad_norm": 1.985563039779663, "learning_rate": 2.650485436893204e-06, "loss": 0.065, "step": 9754 }, { "epoch": 94.59036144578313, "grad_norm": 8.327717781066895, "learning_rate": 2.6456310679611654e-06, "loss": 0.2042, "step": 9755 }, { "epoch": 94.6, "grad_norm": 4.403199672698975, "learning_rate": 2.640776699029126e-06, "loss": 0.1243, "step": 9756 }, { "epoch": 94.60963855421687, "grad_norm": 4.5053300857543945, "learning_rate": 2.6359223300970877e-06, "loss": 0.1241, "step": 9757 }, { "epoch": 94.61927710843374, "grad_norm": 8.11474609375, "learning_rate": 2.6310679611650484e-06, "loss": 0.3899, "step": 9758 }, { "epoch": 94.6289156626506, "grad_norm": 4.87663459777832, "learning_rate": 2.62621359223301e-06, "loss": 0.2152, "step": 9759 }, { "epoch": 94.63855421686748, "grad_norm": 6.552071571350098, "learning_rate": 2.621359223300971e-06, "loss": 0.2812, "step": 9760 }, { "epoch": 94.64819277108434, "grad_norm": 2.1612861156463623, "learning_rate": 2.616504854368932e-06, "loss": 0.0816, "step": 9761 }, { "epoch": 94.6578313253012, "grad_norm": 4.017874717712402, "learning_rate": 2.6116504854368934e-06, "loss": 0.3048, "step": 9762 }, { "epoch": 94.66746987951807, "grad_norm": 6.908864498138428, "learning_rate": 2.6067961165048546e-06, "loss": 0.1858, "step": 9763 }, { "epoch": 94.67710843373494, "grad_norm": 15.099794387817383, "learning_rate": 2.6019417475728157e-06, "loss": 0.3387, "step": 9764 }, { "epoch": 94.6867469879518, "grad_norm": 4.462533473968506, "learning_rate": 2.597087378640777e-06, "loss": 0.3125, "step": 9765 }, { "epoch": 94.69638554216867, "grad_norm": 3.639662981033325, "learning_rate": 2.592233009708738e-06, "loss": 0.1379, "step": 9766 }, { "epoch": 94.70602409638555, "grad_norm": 11.125045776367188, "learning_rate": 2.587378640776699e-06, "loss": 0.347, "step": 9767 }, { "epoch": 94.71566265060241, "grad_norm": 2.4627115726470947, "learning_rate": 2.5825242718446603e-06, "loss": 0.1991, "step": 9768 }, { "epoch": 94.72530120481927, "grad_norm": 6.227365016937256, "learning_rate": 2.5776699029126214e-06, "loss": 0.1333, "step": 9769 }, { "epoch": 94.73493975903614, "grad_norm": 3.9319167137145996, "learning_rate": 2.5728155339805826e-06, "loss": 0.1973, "step": 9770 }, { "epoch": 94.74457831325302, "grad_norm": 4.196417808532715, "learning_rate": 2.5679611650485437e-06, "loss": 0.1737, "step": 9771 }, { "epoch": 94.75421686746988, "grad_norm": 1.9895744323730469, "learning_rate": 2.563106796116505e-06, "loss": 0.0989, "step": 9772 }, { "epoch": 94.76385542168674, "grad_norm": 5.983292579650879, "learning_rate": 2.558252427184466e-06, "loss": 0.1839, "step": 9773 }, { "epoch": 94.77349397590362, "grad_norm": 3.859854221343994, "learning_rate": 2.5533980582524276e-06, "loss": 0.3212, "step": 9774 }, { "epoch": 94.78313253012048, "grad_norm": 2.572969913482666, "learning_rate": 2.5485436893203883e-06, "loss": 0.2477, "step": 9775 }, { "epoch": 94.79277108433735, "grad_norm": 5.952333450317383, "learning_rate": 2.5436893203883495e-06, "loss": 0.1898, "step": 9776 }, { "epoch": 94.80240963855422, "grad_norm": 11.26639461517334, "learning_rate": 2.538834951456311e-06, "loss": 0.2687, "step": 9777 }, { "epoch": 94.81204819277109, "grad_norm": 19.894861221313477, "learning_rate": 2.5339805825242718e-06, "loss": 0.2372, "step": 9778 }, { "epoch": 94.82168674698795, "grad_norm": 1.6260770559310913, "learning_rate": 2.5291262135922333e-06, "loss": 0.0532, "step": 9779 }, { "epoch": 94.83132530120481, "grad_norm": 3.222684144973755, "learning_rate": 2.524271844660194e-06, "loss": 0.206, "step": 9780 }, { "epoch": 94.84096385542169, "grad_norm": 5.347261905670166, "learning_rate": 2.5194174757281556e-06, "loss": 0.2128, "step": 9781 }, { "epoch": 94.85060240963855, "grad_norm": 10.561314582824707, "learning_rate": 2.5145631067961168e-06, "loss": 0.2706, "step": 9782 }, { "epoch": 94.86024096385542, "grad_norm": 3.948777914047241, "learning_rate": 2.5097087378640775e-06, "loss": 0.2466, "step": 9783 }, { "epoch": 94.8698795180723, "grad_norm": 47.546470642089844, "learning_rate": 2.504854368932039e-06, "loss": 0.1255, "step": 9784 }, { "epoch": 94.87951807228916, "grad_norm": 6.677295207977295, "learning_rate": 2.5e-06, "loss": 0.3066, "step": 9785 }, { "epoch": 94.88915662650602, "grad_norm": 2.2954530715942383, "learning_rate": 2.4951456310679614e-06, "loss": 0.1904, "step": 9786 }, { "epoch": 94.89879518072289, "grad_norm": 7.410593509674072, "learning_rate": 2.4902912621359225e-06, "loss": 0.3908, "step": 9787 }, { "epoch": 94.90843373493976, "grad_norm": 5.0623321533203125, "learning_rate": 2.4854368932038836e-06, "loss": 0.3143, "step": 9788 }, { "epoch": 94.91807228915663, "grad_norm": 7.300514221191406, "learning_rate": 2.4805825242718448e-06, "loss": 0.2434, "step": 9789 }, { "epoch": 94.92771084337349, "grad_norm": 8.438368797302246, "learning_rate": 2.475728155339806e-06, "loss": 0.1303, "step": 9790 }, { "epoch": 94.93734939759037, "grad_norm": 6.835273742675781, "learning_rate": 2.470873786407767e-06, "loss": 0.251, "step": 9791 }, { "epoch": 94.94698795180723, "grad_norm": 3.378842353820801, "learning_rate": 2.4660194174757282e-06, "loss": 0.3185, "step": 9792 }, { "epoch": 94.9566265060241, "grad_norm": 7.284455299377441, "learning_rate": 2.4611650485436894e-06, "loss": 0.4059, "step": 9793 }, { "epoch": 94.96626506024096, "grad_norm": 7.700904846191406, "learning_rate": 2.4563106796116505e-06, "loss": 0.2718, "step": 9794 }, { "epoch": 94.97590361445783, "grad_norm": 3.4823250770568848, "learning_rate": 2.4514563106796117e-06, "loss": 0.2854, "step": 9795 }, { "epoch": 94.9855421686747, "grad_norm": 3.792553663253784, "learning_rate": 2.4466019417475732e-06, "loss": 0.191, "step": 9796 }, { "epoch": 95.0012048192771, "grad_norm": 5.437539577484131, "learning_rate": 2.441747572815534e-06, "loss": 0.2453, "step": 9797 }, { "epoch": 95.01084337349397, "grad_norm": 5.154155731201172, "learning_rate": 2.4368932038834955e-06, "loss": 0.2242, "step": 9798 }, { "epoch": 95.02048192771085, "grad_norm": 11.810608863830566, "learning_rate": 2.4320388349514562e-06, "loss": 0.211, "step": 9799 }, { "epoch": 95.03012048192771, "grad_norm": 4.676708221435547, "learning_rate": 2.4271844660194174e-06, "loss": 0.3538, "step": 9800 }, { "epoch": 95.03975903614457, "grad_norm": 1.7848150730133057, "learning_rate": 2.422330097087379e-06, "loss": 0.1166, "step": 9801 }, { "epoch": 95.04939759036145, "grad_norm": 6.800567626953125, "learning_rate": 2.4174757281553397e-06, "loss": 0.2984, "step": 9802 }, { "epoch": 95.05903614457831, "grad_norm": 4.847336769104004, "learning_rate": 2.4126213592233013e-06, "loss": 0.2822, "step": 9803 }, { "epoch": 95.06867469879518, "grad_norm": 7.373016834259033, "learning_rate": 2.4077669902912624e-06, "loss": 0.3236, "step": 9804 }, { "epoch": 95.07831325301204, "grad_norm": 21.199432373046875, "learning_rate": 2.402912621359223e-06, "loss": 0.1637, "step": 9805 }, { "epoch": 95.08795180722892, "grad_norm": 3.8864026069641113, "learning_rate": 2.3980582524271847e-06, "loss": 0.0801, "step": 9806 }, { "epoch": 95.09759036144578, "grad_norm": 1.6831023693084717, "learning_rate": 2.393203883495146e-06, "loss": 0.048, "step": 9807 }, { "epoch": 95.10722891566265, "grad_norm": 4.644670009613037, "learning_rate": 2.388349514563107e-06, "loss": 0.2197, "step": 9808 }, { "epoch": 95.11686746987952, "grad_norm": 6.137190341949463, "learning_rate": 2.383495145631068e-06, "loss": 0.273, "step": 9809 }, { "epoch": 95.12650602409639, "grad_norm": 7.78072452545166, "learning_rate": 2.3786407766990293e-06, "loss": 0.3951, "step": 9810 }, { "epoch": 95.13614457831325, "grad_norm": 4.355661392211914, "learning_rate": 2.3737864077669904e-06, "loss": 0.082, "step": 9811 }, { "epoch": 95.14578313253013, "grad_norm": 17.67462921142578, "learning_rate": 2.3689320388349516e-06, "loss": 0.2191, "step": 9812 }, { "epoch": 95.15542168674699, "grad_norm": 6.144576072692871, "learning_rate": 2.3640776699029127e-06, "loss": 0.1821, "step": 9813 }, { "epoch": 95.16506024096385, "grad_norm": 7.483146667480469, "learning_rate": 2.359223300970874e-06, "loss": 0.294, "step": 9814 }, { "epoch": 95.17469879518072, "grad_norm": 6.852932453155518, "learning_rate": 2.354368932038835e-06, "loss": 0.237, "step": 9815 }, { "epoch": 95.1843373493976, "grad_norm": 1.9456372261047363, "learning_rate": 2.349514563106796e-06, "loss": 0.2249, "step": 9816 }, { "epoch": 95.19397590361446, "grad_norm": 4.58428430557251, "learning_rate": 2.3446601941747573e-06, "loss": 0.3575, "step": 9817 }, { "epoch": 95.20361445783132, "grad_norm": 10.59117317199707, "learning_rate": 2.339805825242719e-06, "loss": 0.4288, "step": 9818 }, { "epoch": 95.2132530120482, "grad_norm": 3.204765796661377, "learning_rate": 2.3349514563106796e-06, "loss": 0.1278, "step": 9819 }, { "epoch": 95.22289156626506, "grad_norm": 7.520508289337158, "learning_rate": 2.330097087378641e-06, "loss": 0.1598, "step": 9820 }, { "epoch": 95.23253012048193, "grad_norm": 6.725746154785156, "learning_rate": 2.325242718446602e-06, "loss": 0.2082, "step": 9821 }, { "epoch": 95.24216867469879, "grad_norm": 6.216691493988037, "learning_rate": 2.320388349514563e-06, "loss": 0.2724, "step": 9822 }, { "epoch": 95.25180722891567, "grad_norm": 3.7346363067626953, "learning_rate": 2.3155339805825246e-06, "loss": 0.1727, "step": 9823 }, { "epoch": 95.26144578313253, "grad_norm": 7.9420623779296875, "learning_rate": 2.3106796116504853e-06, "loss": 0.3455, "step": 9824 }, { "epoch": 95.2710843373494, "grad_norm": 4.641557693481445, "learning_rate": 2.305825242718447e-06, "loss": 0.3047, "step": 9825 }, { "epoch": 95.28072289156627, "grad_norm": 4.6607666015625, "learning_rate": 2.300970873786408e-06, "loss": 0.2198, "step": 9826 }, { "epoch": 95.29036144578313, "grad_norm": 6.371222972869873, "learning_rate": 2.296116504854369e-06, "loss": 0.5025, "step": 9827 }, { "epoch": 95.3, "grad_norm": 3.5524237155914307, "learning_rate": 2.2912621359223303e-06, "loss": 0.3172, "step": 9828 }, { "epoch": 95.30963855421686, "grad_norm": 8.005513191223145, "learning_rate": 2.286407766990291e-06, "loss": 0.2625, "step": 9829 }, { "epoch": 95.31927710843374, "grad_norm": 15.38720989227295, "learning_rate": 2.2815533980582526e-06, "loss": 0.1892, "step": 9830 }, { "epoch": 95.3289156626506, "grad_norm": 3.5188961029052734, "learning_rate": 2.2766990291262138e-06, "loss": 0.1785, "step": 9831 }, { "epoch": 95.33855421686746, "grad_norm": 8.810179710388184, "learning_rate": 2.271844660194175e-06, "loss": 0.3579, "step": 9832 }, { "epoch": 95.34819277108434, "grad_norm": 5.356700420379639, "learning_rate": 2.266990291262136e-06, "loss": 0.1978, "step": 9833 }, { "epoch": 95.3578313253012, "grad_norm": 3.5248429775238037, "learning_rate": 2.262135922330097e-06, "loss": 0.1204, "step": 9834 }, { "epoch": 95.36746987951807, "grad_norm": 12.799851417541504, "learning_rate": 2.2572815533980583e-06, "loss": 0.2393, "step": 9835 }, { "epoch": 95.37710843373495, "grad_norm": 13.162480354309082, "learning_rate": 2.2524271844660195e-06, "loss": 0.2963, "step": 9836 }, { "epoch": 95.38674698795181, "grad_norm": 4.121290683746338, "learning_rate": 2.247572815533981e-06, "loss": 0.1988, "step": 9837 }, { "epoch": 95.39638554216867, "grad_norm": 3.1692054271698, "learning_rate": 2.2427184466019418e-06, "loss": 0.1392, "step": 9838 }, { "epoch": 95.40602409638554, "grad_norm": 7.342134952545166, "learning_rate": 2.237864077669903e-06, "loss": 0.2867, "step": 9839 }, { "epoch": 95.41566265060241, "grad_norm": 5.217625141143799, "learning_rate": 2.233009708737864e-06, "loss": 0.2832, "step": 9840 }, { "epoch": 95.42530120481928, "grad_norm": 2.759291410446167, "learning_rate": 2.2281553398058252e-06, "loss": 0.0911, "step": 9841 }, { "epoch": 95.43493975903614, "grad_norm": 9.410972595214844, "learning_rate": 2.2233009708737868e-06, "loss": 0.4444, "step": 9842 }, { "epoch": 95.44457831325302, "grad_norm": 7.935653209686279, "learning_rate": 2.2184466019417475e-06, "loss": 0.3596, "step": 9843 }, { "epoch": 95.45421686746988, "grad_norm": 4.077612400054932, "learning_rate": 2.2135922330097087e-06, "loss": 0.2545, "step": 9844 }, { "epoch": 95.46385542168674, "grad_norm": 15.541940689086914, "learning_rate": 2.2087378640776702e-06, "loss": 0.3623, "step": 9845 }, { "epoch": 95.47349397590361, "grad_norm": 4.810474872589111, "learning_rate": 2.203883495145631e-06, "loss": 0.2504, "step": 9846 }, { "epoch": 95.48313253012049, "grad_norm": 6.278636932373047, "learning_rate": 2.1990291262135925e-06, "loss": 0.3307, "step": 9847 }, { "epoch": 95.49277108433735, "grad_norm": 15.405014991760254, "learning_rate": 2.1941747572815537e-06, "loss": 0.205, "step": 9848 }, { "epoch": 95.50240963855421, "grad_norm": 2.2557566165924072, "learning_rate": 2.189320388349515e-06, "loss": 0.1132, "step": 9849 }, { "epoch": 95.51204819277109, "grad_norm": 4.779977798461914, "learning_rate": 2.184466019417476e-06, "loss": 0.1388, "step": 9850 }, { "epoch": 95.52168674698795, "grad_norm": 5.448363304138184, "learning_rate": 2.1796116504854367e-06, "loss": 0.264, "step": 9851 }, { "epoch": 95.53132530120482, "grad_norm": 3.734866142272949, "learning_rate": 2.1747572815533982e-06, "loss": 0.2231, "step": 9852 }, { "epoch": 95.54096385542168, "grad_norm": 4.738733768463135, "learning_rate": 2.1699029126213594e-06, "loss": 0.2645, "step": 9853 }, { "epoch": 95.55060240963856, "grad_norm": 9.655564308166504, "learning_rate": 2.1650485436893205e-06, "loss": 0.1955, "step": 9854 }, { "epoch": 95.56024096385542, "grad_norm": 7.420496940612793, "learning_rate": 2.1601941747572817e-06, "loss": 0.3365, "step": 9855 }, { "epoch": 95.56987951807228, "grad_norm": 2.844866991043091, "learning_rate": 2.155339805825243e-06, "loss": 0.2003, "step": 9856 }, { "epoch": 95.57951807228916, "grad_norm": 24.94113540649414, "learning_rate": 2.150485436893204e-06, "loss": 0.1715, "step": 9857 }, { "epoch": 95.58915662650602, "grad_norm": 5.681423187255859, "learning_rate": 2.145631067961165e-06, "loss": 0.3717, "step": 9858 }, { "epoch": 95.59879518072289, "grad_norm": 5.820760250091553, "learning_rate": 2.1407766990291267e-06, "loss": 0.1586, "step": 9859 }, { "epoch": 95.60843373493977, "grad_norm": 6.372376918792725, "learning_rate": 2.1359223300970874e-06, "loss": 0.2414, "step": 9860 }, { "epoch": 95.61807228915663, "grad_norm": 9.160008430480957, "learning_rate": 2.1310679611650486e-06, "loss": 0.5484, "step": 9861 }, { "epoch": 95.62771084337349, "grad_norm": 7.971316337585449, "learning_rate": 2.1262135922330097e-06, "loss": 0.2744, "step": 9862 }, { "epoch": 95.63734939759036, "grad_norm": 4.9248223304748535, "learning_rate": 2.121359223300971e-06, "loss": 0.1903, "step": 9863 }, { "epoch": 95.64698795180723, "grad_norm": 6.694882869720459, "learning_rate": 2.1165048543689324e-06, "loss": 0.1936, "step": 9864 }, { "epoch": 95.6566265060241, "grad_norm": 9.846420288085938, "learning_rate": 2.111650485436893e-06, "loss": 0.3699, "step": 9865 }, { "epoch": 95.66626506024096, "grad_norm": 5.679821014404297, "learning_rate": 2.1067961165048547e-06, "loss": 0.3989, "step": 9866 }, { "epoch": 95.67590361445784, "grad_norm": 4.525361061096191, "learning_rate": 2.101941747572816e-06, "loss": 0.2371, "step": 9867 }, { "epoch": 95.6855421686747, "grad_norm": 3.945420742034912, "learning_rate": 2.0970873786407766e-06, "loss": 0.1715, "step": 9868 }, { "epoch": 95.69518072289156, "grad_norm": 5.988572597503662, "learning_rate": 2.092233009708738e-06, "loss": 0.3006, "step": 9869 }, { "epoch": 95.70481927710843, "grad_norm": 4.079058647155762, "learning_rate": 2.087378640776699e-06, "loss": 0.2649, "step": 9870 }, { "epoch": 95.7144578313253, "grad_norm": 9.747746467590332, "learning_rate": 2.0825242718446604e-06, "loss": 0.4419, "step": 9871 }, { "epoch": 95.72409638554217, "grad_norm": 4.493485927581787, "learning_rate": 2.0776699029126216e-06, "loss": 0.2943, "step": 9872 }, { "epoch": 95.73373493975903, "grad_norm": 3.99092698097229, "learning_rate": 2.0728155339805823e-06, "loss": 0.1422, "step": 9873 }, { "epoch": 95.74337349397591, "grad_norm": 8.493420600891113, "learning_rate": 2.067961165048544e-06, "loss": 0.2088, "step": 9874 }, { "epoch": 95.75301204819277, "grad_norm": 5.756849765777588, "learning_rate": 2.063106796116505e-06, "loss": 0.1126, "step": 9875 }, { "epoch": 95.76265060240964, "grad_norm": 5.246143817901611, "learning_rate": 2.058252427184466e-06, "loss": 0.3708, "step": 9876 }, { "epoch": 95.7722891566265, "grad_norm": 6.094534873962402, "learning_rate": 2.0533980582524273e-06, "loss": 0.1561, "step": 9877 }, { "epoch": 95.78192771084338, "grad_norm": 4.505152225494385, "learning_rate": 2.0485436893203885e-06, "loss": 0.188, "step": 9878 }, { "epoch": 95.79156626506024, "grad_norm": 2.553750514984131, "learning_rate": 2.0436893203883496e-06, "loss": 0.1245, "step": 9879 }, { "epoch": 95.8012048192771, "grad_norm": 2.070223331451416, "learning_rate": 2.0388349514563107e-06, "loss": 0.0714, "step": 9880 }, { "epoch": 95.81084337349398, "grad_norm": 2.52673077583313, "learning_rate": 2.033980582524272e-06, "loss": 0.2383, "step": 9881 }, { "epoch": 95.82048192771084, "grad_norm": 4.908441543579102, "learning_rate": 2.029126213592233e-06, "loss": 0.1429, "step": 9882 }, { "epoch": 95.83012048192771, "grad_norm": 9.928934097290039, "learning_rate": 2.024271844660194e-06, "loss": 0.2639, "step": 9883 }, { "epoch": 95.83975903614459, "grad_norm": 3.4766592979431152, "learning_rate": 2.0194174757281553e-06, "loss": 0.0995, "step": 9884 }, { "epoch": 95.84939759036145, "grad_norm": 6.0926032066345215, "learning_rate": 2.0145631067961165e-06, "loss": 0.1629, "step": 9885 }, { "epoch": 95.85903614457831, "grad_norm": 11.815567970275879, "learning_rate": 2.009708737864078e-06, "loss": 0.3458, "step": 9886 }, { "epoch": 95.86867469879518, "grad_norm": 3.487826347351074, "learning_rate": 2.0048543689320388e-06, "loss": 0.2592, "step": 9887 }, { "epoch": 95.87831325301205, "grad_norm": 16.049156188964844, "learning_rate": 2.0000000000000003e-06, "loss": 0.2291, "step": 9888 }, { "epoch": 95.88795180722892, "grad_norm": 4.091820240020752, "learning_rate": 1.9951456310679615e-06, "loss": 0.2628, "step": 9889 }, { "epoch": 95.89759036144578, "grad_norm": 4.577784061431885, "learning_rate": 1.990291262135922e-06, "loss": 0.2436, "step": 9890 }, { "epoch": 95.90722891566266, "grad_norm": 6.818725109100342, "learning_rate": 1.9854368932038838e-06, "loss": 0.1731, "step": 9891 }, { "epoch": 95.91686746987952, "grad_norm": 6.549217700958252, "learning_rate": 1.9805825242718445e-06, "loss": 0.1432, "step": 9892 }, { "epoch": 95.92650602409638, "grad_norm": 5.575873374938965, "learning_rate": 1.975728155339806e-06, "loss": 0.328, "step": 9893 }, { "epoch": 95.93614457831325, "grad_norm": 2.4058570861816406, "learning_rate": 1.970873786407767e-06, "loss": 0.0652, "step": 9894 }, { "epoch": 95.94578313253012, "grad_norm": 4.939208984375, "learning_rate": 1.966019417475728e-06, "loss": 0.1775, "step": 9895 }, { "epoch": 95.95542168674699, "grad_norm": 2.253488302230835, "learning_rate": 1.9611650485436895e-06, "loss": 0.1041, "step": 9896 }, { "epoch": 95.96506024096385, "grad_norm": 2.9883978366851807, "learning_rate": 1.9563106796116506e-06, "loss": 0.0974, "step": 9897 }, { "epoch": 95.97469879518073, "grad_norm": 6.072626113891602, "learning_rate": 1.951456310679612e-06, "loss": 0.3122, "step": 9898 }, { "epoch": 95.98433734939759, "grad_norm": 5.974035263061523, "learning_rate": 1.946601941747573e-06, "loss": 0.3007, "step": 9899 }, { "epoch": 95.99397590361446, "grad_norm": 7.071403503417969, "learning_rate": 1.941747572815534e-06, "loss": 0.3308, "step": 9900 }, { "epoch": 96.00963855421686, "grad_norm": 6.519647121429443, "learning_rate": 1.9368932038834952e-06, "loss": 0.1515, "step": 9901 }, { "epoch": 96.01927710843374, "grad_norm": 9.572288513183594, "learning_rate": 1.9320388349514564e-06, "loss": 0.27, "step": 9902 }, { "epoch": 96.0289156626506, "grad_norm": 4.078728675842285, "learning_rate": 1.9271844660194175e-06, "loss": 0.1869, "step": 9903 }, { "epoch": 96.03855421686747, "grad_norm": 9.265519142150879, "learning_rate": 1.9223300970873787e-06, "loss": 0.3575, "step": 9904 }, { "epoch": 96.04819277108433, "grad_norm": 4.068563938140869, "learning_rate": 1.91747572815534e-06, "loss": 0.1733, "step": 9905 }, { "epoch": 96.05783132530121, "grad_norm": 5.542032718658447, "learning_rate": 1.912621359223301e-06, "loss": 0.3646, "step": 9906 }, { "epoch": 96.06746987951807, "grad_norm": 4.7040019035339355, "learning_rate": 1.907766990291262e-06, "loss": 0.2168, "step": 9907 }, { "epoch": 96.07710843373494, "grad_norm": 3.7556140422821045, "learning_rate": 1.9029126213592235e-06, "loss": 0.221, "step": 9908 }, { "epoch": 96.08674698795181, "grad_norm": 5.178526878356934, "learning_rate": 1.8980582524271844e-06, "loss": 0.1885, "step": 9909 }, { "epoch": 96.09638554216868, "grad_norm": 4.054610729217529, "learning_rate": 1.8932038834951458e-06, "loss": 0.1689, "step": 9910 }, { "epoch": 96.10602409638554, "grad_norm": 1.0618617534637451, "learning_rate": 1.8883495145631067e-06, "loss": 0.0335, "step": 9911 }, { "epoch": 96.1156626506024, "grad_norm": 7.882120132446289, "learning_rate": 1.883495145631068e-06, "loss": 0.1743, "step": 9912 }, { "epoch": 96.12530120481928, "grad_norm": 2.800396680831909, "learning_rate": 1.8786407766990294e-06, "loss": 0.1515, "step": 9913 }, { "epoch": 96.13493975903614, "grad_norm": 12.739153861999512, "learning_rate": 1.8737864077669903e-06, "loss": 0.1394, "step": 9914 }, { "epoch": 96.144578313253, "grad_norm": 5.522619247436523, "learning_rate": 1.8689320388349515e-06, "loss": 0.3409, "step": 9915 }, { "epoch": 96.15421686746988, "grad_norm": 5.460393905639648, "learning_rate": 1.8640776699029128e-06, "loss": 0.2535, "step": 9916 }, { "epoch": 96.16385542168675, "grad_norm": 7.894667148590088, "learning_rate": 1.8592233009708738e-06, "loss": 0.2814, "step": 9917 }, { "epoch": 96.17349397590361, "grad_norm": 3.7985546588897705, "learning_rate": 1.8543689320388351e-06, "loss": 0.253, "step": 9918 }, { "epoch": 96.18313253012049, "grad_norm": 2.7469546794891357, "learning_rate": 1.8495145631067963e-06, "loss": 0.1428, "step": 9919 }, { "epoch": 96.19277108433735, "grad_norm": 3.347294330596924, "learning_rate": 1.8446601941747572e-06, "loss": 0.1266, "step": 9920 }, { "epoch": 96.20240963855422, "grad_norm": 7.607194423675537, "learning_rate": 1.8398058252427186e-06, "loss": 0.3573, "step": 9921 }, { "epoch": 96.21204819277108, "grad_norm": 4.546878337860107, "learning_rate": 1.8349514563106795e-06, "loss": 0.1636, "step": 9922 }, { "epoch": 96.22168674698796, "grad_norm": 2.159264326095581, "learning_rate": 1.8300970873786409e-06, "loss": 0.1439, "step": 9923 }, { "epoch": 96.23132530120482, "grad_norm": 5.582036018371582, "learning_rate": 1.8252427184466022e-06, "loss": 0.2138, "step": 9924 }, { "epoch": 96.24096385542168, "grad_norm": 3.8391060829162598, "learning_rate": 1.8203883495145631e-06, "loss": 0.2193, "step": 9925 }, { "epoch": 96.25060240963856, "grad_norm": 6.56218957901001, "learning_rate": 1.8155339805825243e-06, "loss": 0.3715, "step": 9926 }, { "epoch": 96.26024096385542, "grad_norm": 5.125417709350586, "learning_rate": 1.8106796116504857e-06, "loss": 0.1647, "step": 9927 }, { "epoch": 96.26987951807229, "grad_norm": 1.9338550567626953, "learning_rate": 1.8058252427184466e-06, "loss": 0.1104, "step": 9928 }, { "epoch": 96.27951807228915, "grad_norm": 7.745937347412109, "learning_rate": 1.800970873786408e-06, "loss": 0.2757, "step": 9929 }, { "epoch": 96.28915662650603, "grad_norm": 4.424197673797607, "learning_rate": 1.7961165048543689e-06, "loss": 0.1487, "step": 9930 }, { "epoch": 96.29879518072289, "grad_norm": 6.12689733505249, "learning_rate": 1.7912621359223302e-06, "loss": 0.1836, "step": 9931 }, { "epoch": 96.30843373493975, "grad_norm": 4.0843915939331055, "learning_rate": 1.7864077669902914e-06, "loss": 0.0998, "step": 9932 }, { "epoch": 96.31807228915663, "grad_norm": 3.720764636993408, "learning_rate": 1.7815533980582523e-06, "loss": 0.1848, "step": 9933 }, { "epoch": 96.3277108433735, "grad_norm": 4.80274772644043, "learning_rate": 1.7766990291262137e-06, "loss": 0.1269, "step": 9934 }, { "epoch": 96.33734939759036, "grad_norm": 5.885231971740723, "learning_rate": 1.771844660194175e-06, "loss": 0.159, "step": 9935 }, { "epoch": 96.34698795180722, "grad_norm": 2.904669761657715, "learning_rate": 1.766990291262136e-06, "loss": 0.2488, "step": 9936 }, { "epoch": 96.3566265060241, "grad_norm": 4.098212718963623, "learning_rate": 1.7621359223300971e-06, "loss": 0.2097, "step": 9937 }, { "epoch": 96.36626506024096, "grad_norm": 2.470505714416504, "learning_rate": 1.7572815533980585e-06, "loss": 0.13, "step": 9938 }, { "epoch": 96.37590361445783, "grad_norm": 5.696125030517578, "learning_rate": 1.7524271844660194e-06, "loss": 0.3298, "step": 9939 }, { "epoch": 96.3855421686747, "grad_norm": 2.885674476623535, "learning_rate": 1.7475728155339808e-06, "loss": 0.1865, "step": 9940 }, { "epoch": 96.39518072289157, "grad_norm": 4.600649356842041, "learning_rate": 1.7427184466019417e-06, "loss": 0.2188, "step": 9941 }, { "epoch": 96.40481927710843, "grad_norm": 3.8711204528808594, "learning_rate": 1.737864077669903e-06, "loss": 0.1531, "step": 9942 }, { "epoch": 96.41445783132531, "grad_norm": 3.6092169284820557, "learning_rate": 1.7330097087378642e-06, "loss": 0.1659, "step": 9943 }, { "epoch": 96.42409638554217, "grad_norm": 5.915864944458008, "learning_rate": 1.7281553398058251e-06, "loss": 0.2067, "step": 9944 }, { "epoch": 96.43373493975903, "grad_norm": 5.5170578956604, "learning_rate": 1.7233009708737865e-06, "loss": 0.2021, "step": 9945 }, { "epoch": 96.4433734939759, "grad_norm": 6.713395118713379, "learning_rate": 1.7184466019417478e-06, "loss": 0.2524, "step": 9946 }, { "epoch": 96.45301204819278, "grad_norm": 4.051933288574219, "learning_rate": 1.7135922330097088e-06, "loss": 0.2079, "step": 9947 }, { "epoch": 96.46265060240964, "grad_norm": 4.449484348297119, "learning_rate": 1.70873786407767e-06, "loss": 0.2127, "step": 9948 }, { "epoch": 96.4722891566265, "grad_norm": 7.595247745513916, "learning_rate": 1.7038834951456313e-06, "loss": 0.3968, "step": 9949 }, { "epoch": 96.48192771084338, "grad_norm": 5.671057224273682, "learning_rate": 1.6990291262135922e-06, "loss": 0.4278, "step": 9950 }, { "epoch": 96.49156626506024, "grad_norm": 4.743671417236328, "learning_rate": 1.6941747572815536e-06, "loss": 0.2884, "step": 9951 }, { "epoch": 96.5012048192771, "grad_norm": 8.907835006713867, "learning_rate": 1.6893203883495145e-06, "loss": 0.1815, "step": 9952 }, { "epoch": 96.51084337349397, "grad_norm": 5.7228240966796875, "learning_rate": 1.6844660194174759e-06, "loss": 0.1763, "step": 9953 }, { "epoch": 96.52048192771085, "grad_norm": 6.9623589515686035, "learning_rate": 1.679611650485437e-06, "loss": 0.3315, "step": 9954 }, { "epoch": 96.53012048192771, "grad_norm": 5.918176174163818, "learning_rate": 1.674757281553398e-06, "loss": 0.3225, "step": 9955 }, { "epoch": 96.53975903614457, "grad_norm": 4.21279764175415, "learning_rate": 1.6699029126213593e-06, "loss": 0.0891, "step": 9956 }, { "epoch": 96.54939759036145, "grad_norm": 4.588578224182129, "learning_rate": 1.6650485436893207e-06, "loss": 0.1202, "step": 9957 }, { "epoch": 96.55903614457831, "grad_norm": 4.685133457183838, "learning_rate": 1.6601941747572816e-06, "loss": 0.2682, "step": 9958 }, { "epoch": 96.56867469879518, "grad_norm": 4.438490867614746, "learning_rate": 1.6553398058252427e-06, "loss": 0.3483, "step": 9959 }, { "epoch": 96.57831325301204, "grad_norm": 13.518725395202637, "learning_rate": 1.650485436893204e-06, "loss": 0.2793, "step": 9960 }, { "epoch": 96.58795180722892, "grad_norm": 3.44885516166687, "learning_rate": 1.645631067961165e-06, "loss": 0.1807, "step": 9961 }, { "epoch": 96.59759036144578, "grad_norm": 2.8437869548797607, "learning_rate": 1.6407766990291264e-06, "loss": 0.0931, "step": 9962 }, { "epoch": 96.60722891566265, "grad_norm": 4.820814609527588, "learning_rate": 1.6359223300970873e-06, "loss": 0.1819, "step": 9963 }, { "epoch": 96.61686746987952, "grad_norm": 4.125539779663086, "learning_rate": 1.6310679611650487e-06, "loss": 0.2016, "step": 9964 }, { "epoch": 96.62650602409639, "grad_norm": 5.167506694793701, "learning_rate": 1.6262135922330098e-06, "loss": 0.1544, "step": 9965 }, { "epoch": 96.63614457831325, "grad_norm": 6.324057102203369, "learning_rate": 1.6213592233009708e-06, "loss": 0.204, "step": 9966 }, { "epoch": 96.64578313253013, "grad_norm": 2.4834790229797363, "learning_rate": 1.6165048543689321e-06, "loss": 0.1097, "step": 9967 }, { "epoch": 96.65542168674699, "grad_norm": 4.700379371643066, "learning_rate": 1.6116504854368935e-06, "loss": 0.0634, "step": 9968 }, { "epoch": 96.66506024096385, "grad_norm": 7.490963935852051, "learning_rate": 1.6067961165048544e-06, "loss": 0.5155, "step": 9969 }, { "epoch": 96.67469879518072, "grad_norm": 4.325498580932617, "learning_rate": 1.6019417475728158e-06, "loss": 0.1486, "step": 9970 }, { "epoch": 96.6843373493976, "grad_norm": 3.4714157581329346, "learning_rate": 1.5970873786407767e-06, "loss": 0.2092, "step": 9971 }, { "epoch": 96.69397590361446, "grad_norm": 4.691205978393555, "learning_rate": 1.5922330097087378e-06, "loss": 0.3197, "step": 9972 }, { "epoch": 96.70361445783132, "grad_norm": 7.098106384277344, "learning_rate": 1.5873786407766992e-06, "loss": 0.5562, "step": 9973 }, { "epoch": 96.7132530120482, "grad_norm": 7.442965984344482, "learning_rate": 1.5825242718446601e-06, "loss": 0.4473, "step": 9974 }, { "epoch": 96.72289156626506, "grad_norm": 3.4163551330566406, "learning_rate": 1.5776699029126215e-06, "loss": 0.2681, "step": 9975 }, { "epoch": 96.73253012048193, "grad_norm": 4.317081451416016, "learning_rate": 1.5728155339805826e-06, "loss": 0.2381, "step": 9976 }, { "epoch": 96.74216867469879, "grad_norm": 2.4195430278778076, "learning_rate": 1.5679611650485436e-06, "loss": 0.1089, "step": 9977 }, { "epoch": 96.75180722891567, "grad_norm": 3.8014767169952393, "learning_rate": 1.563106796116505e-06, "loss": 0.3795, "step": 9978 }, { "epoch": 96.76144578313253, "grad_norm": 2.2962698936462402, "learning_rate": 1.558252427184466e-06, "loss": 0.1117, "step": 9979 }, { "epoch": 96.7710843373494, "grad_norm": 4.372409820556641, "learning_rate": 1.5533980582524272e-06, "loss": 0.2879, "step": 9980 }, { "epoch": 96.78072289156627, "grad_norm": 3.492727041244507, "learning_rate": 1.5485436893203886e-06, "loss": 0.2052, "step": 9981 }, { "epoch": 96.79036144578313, "grad_norm": 5.5207390785217285, "learning_rate": 1.5436893203883495e-06, "loss": 0.2645, "step": 9982 }, { "epoch": 96.8, "grad_norm": 5.755579948425293, "learning_rate": 1.5388349514563107e-06, "loss": 0.3569, "step": 9983 }, { "epoch": 96.80963855421686, "grad_norm": 4.443293571472168, "learning_rate": 1.5339805825242718e-06, "loss": 0.2435, "step": 9984 }, { "epoch": 96.81927710843374, "grad_norm": 4.982975959777832, "learning_rate": 1.5291262135922332e-06, "loss": 0.2863, "step": 9985 }, { "epoch": 96.8289156626506, "grad_norm": 5.5547308921813965, "learning_rate": 1.5242718446601943e-06, "loss": 0.258, "step": 9986 }, { "epoch": 96.83855421686746, "grad_norm": 4.929712295532227, "learning_rate": 1.5194174757281555e-06, "loss": 0.1679, "step": 9987 }, { "epoch": 96.84819277108434, "grad_norm": 4.790177345275879, "learning_rate": 1.5145631067961166e-06, "loss": 0.1274, "step": 9988 }, { "epoch": 96.8578313253012, "grad_norm": 9.481369972229004, "learning_rate": 1.5097087378640777e-06, "loss": 0.2416, "step": 9989 }, { "epoch": 96.86746987951807, "grad_norm": 2.407477617263794, "learning_rate": 1.5048543689320389e-06, "loss": 0.1669, "step": 9990 }, { "epoch": 96.87710843373495, "grad_norm": 4.433596134185791, "learning_rate": 1.5e-06, "loss": 0.1882, "step": 9991 }, { "epoch": 96.88674698795181, "grad_norm": 9.024727821350098, "learning_rate": 1.4951456310679614e-06, "loss": 0.2923, "step": 9992 }, { "epoch": 96.89638554216867, "grad_norm": 2.7567594051361084, "learning_rate": 1.4902912621359223e-06, "loss": 0.1245, "step": 9993 }, { "epoch": 96.90602409638554, "grad_norm": 5.800221920013428, "learning_rate": 1.4854368932038835e-06, "loss": 0.1576, "step": 9994 }, { "epoch": 96.91566265060241, "grad_norm": 3.9876511096954346, "learning_rate": 1.4805825242718446e-06, "loss": 0.1079, "step": 9995 }, { "epoch": 96.92530120481928, "grad_norm": 4.256157875061035, "learning_rate": 1.475728155339806e-06, "loss": 0.3218, "step": 9996 }, { "epoch": 96.93493975903614, "grad_norm": 6.242279052734375, "learning_rate": 1.4708737864077671e-06, "loss": 0.2349, "step": 9997 }, { "epoch": 96.94457831325302, "grad_norm": 3.965733766555786, "learning_rate": 1.4660194174757283e-06, "loss": 0.1687, "step": 9998 }, { "epoch": 96.95421686746988, "grad_norm": 12.971552848815918, "learning_rate": 1.4611650485436894e-06, "loss": 0.2593, "step": 9999 }, { "epoch": 96.96385542168674, "grad_norm": 6.21834135055542, "learning_rate": 1.4563106796116506e-06, "loss": 0.432, "step": 10000 }, { "epoch": 96.97349397590361, "grad_norm": 2.4095752239227295, "learning_rate": 1.4514563106796117e-06, "loss": 0.1149, "step": 10001 }, { "epoch": 96.98313253012049, "grad_norm": 4.166393280029297, "learning_rate": 1.4466019417475729e-06, "loss": 0.1747, "step": 10002 }, { "epoch": 96.99277108433735, "grad_norm": 5.638331890106201, "learning_rate": 1.4417475728155342e-06, "loss": 0.2908, "step": 10003 }, { "epoch": 97.00843373493976, "grad_norm": 34.209136962890625, "learning_rate": 1.4368932038834951e-06, "loss": 0.1411, "step": 10004 }, { "epoch": 97.01807228915662, "grad_norm": 6.255698204040527, "learning_rate": 1.4320388349514563e-06, "loss": 0.17, "step": 10005 }, { "epoch": 97.0277108433735, "grad_norm": 2.340972900390625, "learning_rate": 1.4271844660194174e-06, "loss": 0.1552, "step": 10006 }, { "epoch": 97.03734939759036, "grad_norm": 9.682961463928223, "learning_rate": 1.4223300970873788e-06, "loss": 0.1218, "step": 10007 }, { "epoch": 97.04698795180722, "grad_norm": 14.87345027923584, "learning_rate": 1.41747572815534e-06, "loss": 0.1916, "step": 10008 }, { "epoch": 97.0566265060241, "grad_norm": 2.862739324569702, "learning_rate": 1.412621359223301e-06, "loss": 0.1332, "step": 10009 }, { "epoch": 97.06626506024097, "grad_norm": 16.173166275024414, "learning_rate": 1.4077669902912622e-06, "loss": 0.2472, "step": 10010 }, { "epoch": 97.07590361445783, "grad_norm": 3.580650568008423, "learning_rate": 1.4029126213592234e-06, "loss": 0.1833, "step": 10011 }, { "epoch": 97.08554216867469, "grad_norm": 6.335146903991699, "learning_rate": 1.3980582524271845e-06, "loss": 0.2975, "step": 10012 }, { "epoch": 97.09518072289157, "grad_norm": 6.316359996795654, "learning_rate": 1.3932038834951457e-06, "loss": 0.3193, "step": 10013 }, { "epoch": 97.10481927710843, "grad_norm": 63.67409896850586, "learning_rate": 1.3883495145631068e-06, "loss": 0.358, "step": 10014 }, { "epoch": 97.1144578313253, "grad_norm": 8.367744445800781, "learning_rate": 1.3834951456310682e-06, "loss": 0.2959, "step": 10015 }, { "epoch": 97.12409638554217, "grad_norm": 10.382550239562988, "learning_rate": 1.378640776699029e-06, "loss": 0.2979, "step": 10016 }, { "epoch": 97.13373493975904, "grad_norm": 5.733649730682373, "learning_rate": 1.3737864077669903e-06, "loss": 0.239, "step": 10017 }, { "epoch": 97.1433734939759, "grad_norm": 11.829391479492188, "learning_rate": 1.3689320388349516e-06, "loss": 0.2944, "step": 10018 }, { "epoch": 97.15301204819278, "grad_norm": 6.606571674346924, "learning_rate": 1.3640776699029128e-06, "loss": 0.4206, "step": 10019 }, { "epoch": 97.16265060240964, "grad_norm": 6.46931266784668, "learning_rate": 1.359223300970874e-06, "loss": 0.2195, "step": 10020 }, { "epoch": 97.1722891566265, "grad_norm": 14.918499946594238, "learning_rate": 1.354368932038835e-06, "loss": 0.1102, "step": 10021 }, { "epoch": 97.18192771084337, "grad_norm": 1.6304258108139038, "learning_rate": 1.3495145631067962e-06, "loss": 0.1087, "step": 10022 }, { "epoch": 97.19156626506025, "grad_norm": 2.091452121734619, "learning_rate": 1.3446601941747573e-06, "loss": 0.168, "step": 10023 }, { "epoch": 97.20120481927711, "grad_norm": 10.877053260803223, "learning_rate": 1.3398058252427185e-06, "loss": 0.4195, "step": 10024 }, { "epoch": 97.21084337349397, "grad_norm": 21.55146026611328, "learning_rate": 1.3349514563106796e-06, "loss": 0.2098, "step": 10025 }, { "epoch": 97.22048192771085, "grad_norm": 4.215376853942871, "learning_rate": 1.330097087378641e-06, "loss": 0.1508, "step": 10026 }, { "epoch": 97.23012048192771, "grad_norm": 7.270651817321777, "learning_rate": 1.325242718446602e-06, "loss": 0.2442, "step": 10027 }, { "epoch": 97.23975903614458, "grad_norm": 27.807327270507812, "learning_rate": 1.320388349514563e-06, "loss": 0.2504, "step": 10028 }, { "epoch": 97.24939759036144, "grad_norm": 4.48378324508667, "learning_rate": 1.3155339805825242e-06, "loss": 0.1008, "step": 10029 }, { "epoch": 97.25903614457832, "grad_norm": 2.271782875061035, "learning_rate": 1.3106796116504856e-06, "loss": 0.3563, "step": 10030 }, { "epoch": 97.26867469879518, "grad_norm": 12.086729049682617, "learning_rate": 1.3058252427184467e-06, "loss": 0.2375, "step": 10031 }, { "epoch": 97.27831325301204, "grad_norm": 8.24238395690918, "learning_rate": 1.3009708737864079e-06, "loss": 0.2798, "step": 10032 }, { "epoch": 97.28795180722892, "grad_norm": 2.636533737182617, "learning_rate": 1.296116504854369e-06, "loss": 0.1691, "step": 10033 }, { "epoch": 97.29759036144578, "grad_norm": 5.774722576141357, "learning_rate": 1.2912621359223302e-06, "loss": 0.6335, "step": 10034 }, { "epoch": 97.30722891566265, "grad_norm": 8.381270408630371, "learning_rate": 1.2864077669902913e-06, "loss": 0.1086, "step": 10035 }, { "epoch": 97.31686746987951, "grad_norm": 3.0453641414642334, "learning_rate": 1.2815533980582524e-06, "loss": 0.2444, "step": 10036 }, { "epoch": 97.32650602409639, "grad_norm": 3.515181064605713, "learning_rate": 1.2766990291262138e-06, "loss": 0.2379, "step": 10037 }, { "epoch": 97.33614457831325, "grad_norm": 6.34205961227417, "learning_rate": 1.2718446601941747e-06, "loss": 0.2685, "step": 10038 }, { "epoch": 97.34578313253012, "grad_norm": 8.534684181213379, "learning_rate": 1.2669902912621359e-06, "loss": 0.1437, "step": 10039 }, { "epoch": 97.355421686747, "grad_norm": 3.618232250213623, "learning_rate": 1.262135922330097e-06, "loss": 0.2449, "step": 10040 }, { "epoch": 97.36506024096386, "grad_norm": 14.774942398071289, "learning_rate": 1.2572815533980584e-06, "loss": 0.2031, "step": 10041 }, { "epoch": 97.37469879518072, "grad_norm": 4.002391338348389, "learning_rate": 1.2524271844660195e-06, "loss": 0.1698, "step": 10042 }, { "epoch": 97.38433734939758, "grad_norm": 6.097592353820801, "learning_rate": 1.2475728155339807e-06, "loss": 0.183, "step": 10043 }, { "epoch": 97.39397590361446, "grad_norm": 11.925023078918457, "learning_rate": 1.2427184466019418e-06, "loss": 0.4296, "step": 10044 }, { "epoch": 97.40361445783132, "grad_norm": 6.853326320648193, "learning_rate": 1.237864077669903e-06, "loss": 0.4143, "step": 10045 }, { "epoch": 97.41325301204819, "grad_norm": 7.720587253570557, "learning_rate": 1.2330097087378641e-06, "loss": 0.1487, "step": 10046 }, { "epoch": 97.42289156626506, "grad_norm": 11.866097450256348, "learning_rate": 1.2281553398058253e-06, "loss": 0.3051, "step": 10047 }, { "epoch": 97.43253012048193, "grad_norm": 15.701708793640137, "learning_rate": 1.2233009708737866e-06, "loss": 0.1955, "step": 10048 }, { "epoch": 97.44216867469879, "grad_norm": 10.29208755493164, "learning_rate": 1.2184466019417478e-06, "loss": 0.1535, "step": 10049 }, { "epoch": 97.45180722891567, "grad_norm": 4.466795444488525, "learning_rate": 1.2135922330097087e-06, "loss": 0.4069, "step": 10050 }, { "epoch": 97.46144578313253, "grad_norm": 8.849760055541992, "learning_rate": 1.2087378640776698e-06, "loss": 0.1769, "step": 10051 }, { "epoch": 97.4710843373494, "grad_norm": 12.433730125427246, "learning_rate": 1.2038834951456312e-06, "loss": 0.2069, "step": 10052 }, { "epoch": 97.48072289156626, "grad_norm": 6.677312850952148, "learning_rate": 1.1990291262135923e-06, "loss": 0.467, "step": 10053 }, { "epoch": 97.49036144578314, "grad_norm": 13.907112121582031, "learning_rate": 1.1941747572815535e-06, "loss": 0.3239, "step": 10054 }, { "epoch": 97.5, "grad_norm": 1.6245598793029785, "learning_rate": 1.1893203883495146e-06, "loss": 0.2273, "step": 10055 }, { "epoch": 97.50963855421686, "grad_norm": 3.338587999343872, "learning_rate": 1.1844660194174758e-06, "loss": 0.2818, "step": 10056 }, { "epoch": 97.51927710843374, "grad_norm": 3.0984761714935303, "learning_rate": 1.179611650485437e-06, "loss": 0.077, "step": 10057 }, { "epoch": 97.5289156626506, "grad_norm": 14.36044692993164, "learning_rate": 1.174757281553398e-06, "loss": 0.2145, "step": 10058 }, { "epoch": 97.53855421686747, "grad_norm": 26.44110870361328, "learning_rate": 1.1699029126213594e-06, "loss": 0.0904, "step": 10059 }, { "epoch": 97.54819277108433, "grad_norm": 6.8519978523254395, "learning_rate": 1.1650485436893206e-06, "loss": 0.307, "step": 10060 }, { "epoch": 97.55783132530121, "grad_norm": 6.608664512634277, "learning_rate": 1.1601941747572815e-06, "loss": 0.2969, "step": 10061 }, { "epoch": 97.56746987951807, "grad_norm": 2.382139205932617, "learning_rate": 1.1553398058252427e-06, "loss": 0.0925, "step": 10062 }, { "epoch": 97.57710843373494, "grad_norm": 2.4575412273406982, "learning_rate": 1.150485436893204e-06, "loss": 0.1621, "step": 10063 }, { "epoch": 97.58674698795181, "grad_norm": 110.7374038696289, "learning_rate": 1.1456310679611652e-06, "loss": 0.1145, "step": 10064 }, { "epoch": 97.59638554216868, "grad_norm": 9.094319343566895, "learning_rate": 1.1407766990291263e-06, "loss": 0.2108, "step": 10065 }, { "epoch": 97.60602409638554, "grad_norm": 3.290431499481201, "learning_rate": 1.1359223300970875e-06, "loss": 0.2461, "step": 10066 }, { "epoch": 97.61566265060242, "grad_norm": 6.946500301361084, "learning_rate": 1.1310679611650486e-06, "loss": 0.2714, "step": 10067 }, { "epoch": 97.62530120481928, "grad_norm": 14.735548973083496, "learning_rate": 1.1262135922330097e-06, "loss": 0.3003, "step": 10068 }, { "epoch": 97.63493975903614, "grad_norm": 14.719311714172363, "learning_rate": 1.1213592233009709e-06, "loss": 0.2309, "step": 10069 }, { "epoch": 97.644578313253, "grad_norm": 2.9718048572540283, "learning_rate": 1.116504854368932e-06, "loss": 0.2104, "step": 10070 }, { "epoch": 97.65421686746988, "grad_norm": 6.512138843536377, "learning_rate": 1.1116504854368934e-06, "loss": 0.2617, "step": 10071 }, { "epoch": 97.66385542168675, "grad_norm": 6.217282295227051, "learning_rate": 1.1067961165048543e-06, "loss": 0.1096, "step": 10072 }, { "epoch": 97.67349397590361, "grad_norm": 3.4439926147460938, "learning_rate": 1.1019417475728155e-06, "loss": 0.1502, "step": 10073 }, { "epoch": 97.68313253012049, "grad_norm": 2.017566204071045, "learning_rate": 1.0970873786407768e-06, "loss": 0.1203, "step": 10074 }, { "epoch": 97.69277108433735, "grad_norm": 14.896286964416504, "learning_rate": 1.092233009708738e-06, "loss": 0.3486, "step": 10075 }, { "epoch": 97.70240963855422, "grad_norm": 3.214235305786133, "learning_rate": 1.0873786407766991e-06, "loss": 0.166, "step": 10076 }, { "epoch": 97.71204819277108, "grad_norm": 11.04320240020752, "learning_rate": 1.0825242718446603e-06, "loss": 0.2042, "step": 10077 }, { "epoch": 97.72168674698796, "grad_norm": 22.16773796081543, "learning_rate": 1.0776699029126214e-06, "loss": 0.1636, "step": 10078 }, { "epoch": 97.73132530120482, "grad_norm": 19.12773895263672, "learning_rate": 1.0728155339805826e-06, "loss": 0.2655, "step": 10079 }, { "epoch": 97.74096385542168, "grad_norm": 9.632644653320312, "learning_rate": 1.0679611650485437e-06, "loss": 0.2253, "step": 10080 }, { "epoch": 97.75060240963856, "grad_norm": 2.172842264175415, "learning_rate": 1.0631067961165048e-06, "loss": 0.2074, "step": 10081 }, { "epoch": 97.76024096385542, "grad_norm": 2.454066753387451, "learning_rate": 1.0582524271844662e-06, "loss": 0.2833, "step": 10082 }, { "epoch": 97.76987951807229, "grad_norm": 9.768370628356934, "learning_rate": 1.0533980582524274e-06, "loss": 0.182, "step": 10083 }, { "epoch": 97.77951807228915, "grad_norm": 10.780905723571777, "learning_rate": 1.0485436893203883e-06, "loss": 0.1094, "step": 10084 }, { "epoch": 97.78915662650603, "grad_norm": 5.025300979614258, "learning_rate": 1.0436893203883494e-06, "loss": 0.2675, "step": 10085 }, { "epoch": 97.79879518072289, "grad_norm": 13.272716522216797, "learning_rate": 1.0388349514563108e-06, "loss": 0.135, "step": 10086 }, { "epoch": 97.80843373493975, "grad_norm": 1.6998263597488403, "learning_rate": 1.033980582524272e-06, "loss": 0.1824, "step": 10087 }, { "epoch": 97.81807228915663, "grad_norm": 25.440401077270508, "learning_rate": 1.029126213592233e-06, "loss": 0.2568, "step": 10088 }, { "epoch": 97.8277108433735, "grad_norm": 2.6934547424316406, "learning_rate": 1.0242718446601942e-06, "loss": 0.1298, "step": 10089 }, { "epoch": 97.83734939759036, "grad_norm": 3.4510653018951416, "learning_rate": 1.0194174757281554e-06, "loss": 0.1324, "step": 10090 }, { "epoch": 97.84698795180722, "grad_norm": 11.914668083190918, "learning_rate": 1.0145631067961165e-06, "loss": 0.2783, "step": 10091 }, { "epoch": 97.8566265060241, "grad_norm": 16.24652862548828, "learning_rate": 1.0097087378640777e-06, "loss": 0.2671, "step": 10092 }, { "epoch": 97.86626506024096, "grad_norm": 16.94619369506836, "learning_rate": 1.004854368932039e-06, "loss": 0.2366, "step": 10093 }, { "epoch": 97.87590361445783, "grad_norm": 17.260337829589844, "learning_rate": 1.0000000000000002e-06, "loss": 0.2564, "step": 10094 }, { "epoch": 97.8855421686747, "grad_norm": 5.963753700256348, "learning_rate": 9.95145631067961e-07, "loss": 0.319, "step": 10095 }, { "epoch": 97.89518072289157, "grad_norm": 22.25075340270996, "learning_rate": 9.902912621359222e-07, "loss": 0.3137, "step": 10096 }, { "epoch": 97.90481927710843, "grad_norm": 6.355190753936768, "learning_rate": 9.854368932038836e-07, "loss": 0.0754, "step": 10097 }, { "epoch": 97.91445783132531, "grad_norm": 12.963835716247559, "learning_rate": 9.805825242718447e-07, "loss": 0.2263, "step": 10098 }, { "epoch": 97.92409638554217, "grad_norm": 6.590553283691406, "learning_rate": 9.75728155339806e-07, "loss": 0.3055, "step": 10099 }, { "epoch": 97.93373493975903, "grad_norm": 21.218961715698242, "learning_rate": 9.70873786407767e-07, "loss": 0.3483, "step": 10100 }, { "epoch": 97.9433734939759, "grad_norm": 16.446386337280273, "learning_rate": 9.660194174757282e-07, "loss": 0.1539, "step": 10101 }, { "epoch": 97.95301204819278, "grad_norm": 1.2630865573883057, "learning_rate": 9.611650485436893e-07, "loss": 0.1612, "step": 10102 }, { "epoch": 97.96265060240964, "grad_norm": 2.6655118465423584, "learning_rate": 9.563106796116505e-07, "loss": 0.1493, "step": 10103 }, { "epoch": 97.9722891566265, "grad_norm": 9.550360679626465, "learning_rate": 9.514563106796117e-07, "loss": 0.2052, "step": 10104 }, { "epoch": 97.98192771084338, "grad_norm": 23.098257064819336, "learning_rate": 9.466019417475729e-07, "loss": 0.3382, "step": 10105 }, { "epoch": 97.99156626506024, "grad_norm": 17.909704208374023, "learning_rate": 9.41747572815534e-07, "loss": 0.1505, "step": 10106 }, { "epoch": 98.00722891566265, "grad_norm": 8.453843116760254, "learning_rate": 9.368932038834952e-07, "loss": 0.1718, "step": 10107 }, { "epoch": 98.01686746987951, "grad_norm": 8.11262321472168, "learning_rate": 9.320388349514564e-07, "loss": 0.2549, "step": 10108 }, { "epoch": 98.02650602409639, "grad_norm": 9.404192924499512, "learning_rate": 9.271844660194176e-07, "loss": 0.3647, "step": 10109 }, { "epoch": 98.03614457831326, "grad_norm": 8.166914939880371, "learning_rate": 9.223300970873786e-07, "loss": 0.0475, "step": 10110 }, { "epoch": 98.04578313253012, "grad_norm": 1.4618333578109741, "learning_rate": 9.174757281553397e-07, "loss": 0.137, "step": 10111 }, { "epoch": 98.05542168674698, "grad_norm": 2.678661346435547, "learning_rate": 9.126213592233011e-07, "loss": 0.1128, "step": 10112 }, { "epoch": 98.06506024096386, "grad_norm": 2.508331060409546, "learning_rate": 9.077669902912621e-07, "loss": 0.2152, "step": 10113 }, { "epoch": 98.07469879518072, "grad_norm": 0.8785597681999207, "learning_rate": 9.029126213592233e-07, "loss": 0.1675, "step": 10114 }, { "epoch": 98.08433734939759, "grad_norm": 28.69635772705078, "learning_rate": 8.980582524271844e-07, "loss": 0.4302, "step": 10115 }, { "epoch": 98.09397590361446, "grad_norm": 15.013662338256836, "learning_rate": 8.932038834951457e-07, "loss": 0.4421, "step": 10116 }, { "epoch": 98.10361445783133, "grad_norm": 2.8715388774871826, "learning_rate": 8.883495145631068e-07, "loss": 0.2009, "step": 10117 }, { "epoch": 98.11325301204819, "grad_norm": 3.8735227584838867, "learning_rate": 8.83495145631068e-07, "loss": 0.2519, "step": 10118 }, { "epoch": 98.12289156626505, "grad_norm": 2.1533594131469727, "learning_rate": 8.786407766990292e-07, "loss": 0.1212, "step": 10119 }, { "epoch": 98.13253012048193, "grad_norm": 6.581277847290039, "learning_rate": 8.737864077669904e-07, "loss": 0.2018, "step": 10120 }, { "epoch": 98.1421686746988, "grad_norm": 3.2390198707580566, "learning_rate": 8.689320388349515e-07, "loss": 0.1268, "step": 10121 }, { "epoch": 98.15180722891566, "grad_norm": 0.724547803401947, "learning_rate": 8.640776699029126e-07, "loss": 0.1022, "step": 10122 }, { "epoch": 98.16144578313254, "grad_norm": 17.636369705200195, "learning_rate": 8.592233009708739e-07, "loss": 0.2723, "step": 10123 }, { "epoch": 98.1710843373494, "grad_norm": 11.20794677734375, "learning_rate": 8.54368932038835e-07, "loss": 0.3323, "step": 10124 }, { "epoch": 98.18072289156626, "grad_norm": 2.924626588821411, "learning_rate": 8.495145631067961e-07, "loss": 0.1191, "step": 10125 }, { "epoch": 98.19036144578314, "grad_norm": 11.012542724609375, "learning_rate": 8.446601941747573e-07, "loss": 0.4405, "step": 10126 }, { "epoch": 98.2, "grad_norm": 3.363846778869629, "learning_rate": 8.398058252427185e-07, "loss": 0.2128, "step": 10127 }, { "epoch": 98.20963855421687, "grad_norm": 4.349563121795654, "learning_rate": 8.349514563106797e-07, "loss": 0.2415, "step": 10128 }, { "epoch": 98.21927710843373, "grad_norm": 9.912349700927734, "learning_rate": 8.300970873786408e-07, "loss": 0.2256, "step": 10129 }, { "epoch": 98.2289156626506, "grad_norm": 24.109241485595703, "learning_rate": 8.25242718446602e-07, "loss": 0.3713, "step": 10130 }, { "epoch": 98.23855421686747, "grad_norm": 8.072701454162598, "learning_rate": 8.203883495145632e-07, "loss": 0.3024, "step": 10131 }, { "epoch": 98.24819277108433, "grad_norm": 7.972222328186035, "learning_rate": 8.155339805825243e-07, "loss": 0.1559, "step": 10132 }, { "epoch": 98.25783132530121, "grad_norm": 4.6976318359375, "learning_rate": 8.106796116504854e-07, "loss": 0.1757, "step": 10133 }, { "epoch": 98.26746987951807, "grad_norm": 14.336421012878418, "learning_rate": 8.058252427184467e-07, "loss": 0.205, "step": 10134 }, { "epoch": 98.27710843373494, "grad_norm": 2.00260066986084, "learning_rate": 8.009708737864079e-07, "loss": 0.1583, "step": 10135 }, { "epoch": 98.2867469879518, "grad_norm": 0.5420354604721069, "learning_rate": 7.961165048543689e-07, "loss": 0.1831, "step": 10136 }, { "epoch": 98.29638554216868, "grad_norm": 17.33089828491211, "learning_rate": 7.912621359223301e-07, "loss": 0.1373, "step": 10137 }, { "epoch": 98.30602409638554, "grad_norm": 16.255844116210938, "learning_rate": 7.864077669902913e-07, "loss": 0.3737, "step": 10138 }, { "epoch": 98.3156626506024, "grad_norm": 3.693897247314453, "learning_rate": 7.815533980582525e-07, "loss": 0.1621, "step": 10139 }, { "epoch": 98.32530120481928, "grad_norm": 18.108394622802734, "learning_rate": 7.766990291262136e-07, "loss": 0.0769, "step": 10140 }, { "epoch": 98.33493975903615, "grad_norm": 2.1942601203918457, "learning_rate": 7.718446601941748e-07, "loss": 0.127, "step": 10141 }, { "epoch": 98.34457831325301, "grad_norm": 7.464439868927002, "learning_rate": 7.669902912621359e-07, "loss": 0.1393, "step": 10142 }, { "epoch": 98.35421686746987, "grad_norm": 3.3021187782287598, "learning_rate": 7.621359223300972e-07, "loss": 0.3178, "step": 10143 }, { "epoch": 98.36385542168675, "grad_norm": 3.7021281719207764, "learning_rate": 7.572815533980583e-07, "loss": 0.2283, "step": 10144 }, { "epoch": 98.37349397590361, "grad_norm": 3.4534707069396973, "learning_rate": 7.524271844660194e-07, "loss": 0.1157, "step": 10145 }, { "epoch": 98.38313253012048, "grad_norm": 48.82894515991211, "learning_rate": 7.475728155339807e-07, "loss": 0.3984, "step": 10146 }, { "epoch": 98.39277108433735, "grad_norm": 20.857397079467773, "learning_rate": 7.427184466019417e-07, "loss": 0.3843, "step": 10147 }, { "epoch": 98.40240963855422, "grad_norm": 6.374627590179443, "learning_rate": 7.37864077669903e-07, "loss": 0.2045, "step": 10148 }, { "epoch": 98.41204819277108, "grad_norm": 12.815770149230957, "learning_rate": 7.330097087378641e-07, "loss": 0.1448, "step": 10149 }, { "epoch": 98.42168674698796, "grad_norm": 2.672391891479492, "learning_rate": 7.281553398058253e-07, "loss": 0.1897, "step": 10150 }, { "epoch": 98.43132530120482, "grad_norm": 13.980255126953125, "learning_rate": 7.233009708737864e-07, "loss": 0.3039, "step": 10151 }, { "epoch": 98.44096385542169, "grad_norm": 8.591696739196777, "learning_rate": 7.184466019417476e-07, "loss": 0.1235, "step": 10152 }, { "epoch": 98.45060240963855, "grad_norm": 3.4849443435668945, "learning_rate": 7.135922330097087e-07, "loss": 0.2121, "step": 10153 }, { "epoch": 98.46024096385543, "grad_norm": 5.684849739074707, "learning_rate": 7.0873786407767e-07, "loss": 0.1986, "step": 10154 }, { "epoch": 98.46987951807229, "grad_norm": 10.995152473449707, "learning_rate": 7.038834951456311e-07, "loss": 0.1596, "step": 10155 }, { "epoch": 98.47951807228915, "grad_norm": 16.19082260131836, "learning_rate": 6.990291262135923e-07, "loss": 0.1556, "step": 10156 }, { "epoch": 98.48915662650603, "grad_norm": 30.874858856201172, "learning_rate": 6.941747572815534e-07, "loss": 0.2229, "step": 10157 }, { "epoch": 98.4987951807229, "grad_norm": 10.072789192199707, "learning_rate": 6.893203883495146e-07, "loss": 0.4449, "step": 10158 }, { "epoch": 98.50843373493976, "grad_norm": 20.396039962768555, "learning_rate": 6.844660194174758e-07, "loss": 0.2156, "step": 10159 }, { "epoch": 98.51807228915662, "grad_norm": 0.5361161231994629, "learning_rate": 6.79611650485437e-07, "loss": 0.1174, "step": 10160 }, { "epoch": 98.5277108433735, "grad_norm": 1.743719220161438, "learning_rate": 6.747572815533981e-07, "loss": 0.2571, "step": 10161 }, { "epoch": 98.53734939759036, "grad_norm": 17.01435089111328, "learning_rate": 6.699029126213592e-07, "loss": 0.3159, "step": 10162 }, { "epoch": 98.54698795180722, "grad_norm": 4.983397483825684, "learning_rate": 6.650485436893205e-07, "loss": 0.2422, "step": 10163 }, { "epoch": 98.5566265060241, "grad_norm": 9.480997085571289, "learning_rate": 6.601941747572815e-07, "loss": 0.278, "step": 10164 }, { "epoch": 98.56626506024097, "grad_norm": 1.2114177942276, "learning_rate": 6.553398058252428e-07, "loss": 0.2425, "step": 10165 }, { "epoch": 98.57590361445783, "grad_norm": 47.72532653808594, "learning_rate": 6.504854368932039e-07, "loss": 0.1011, "step": 10166 }, { "epoch": 98.58554216867469, "grad_norm": 14.136075019836426, "learning_rate": 6.456310679611651e-07, "loss": 0.1939, "step": 10167 }, { "epoch": 98.59518072289157, "grad_norm": 9.912663459777832, "learning_rate": 6.407766990291262e-07, "loss": 0.124, "step": 10168 }, { "epoch": 98.60481927710843, "grad_norm": 2.336930990219116, "learning_rate": 6.359223300970874e-07, "loss": 0.1448, "step": 10169 }, { "epoch": 98.6144578313253, "grad_norm": 18.93570899963379, "learning_rate": 6.310679611650485e-07, "loss": 0.2536, "step": 10170 }, { "epoch": 98.62409638554217, "grad_norm": 37.74201965332031, "learning_rate": 6.262135922330098e-07, "loss": 0.2909, "step": 10171 }, { "epoch": 98.63373493975904, "grad_norm": 14.248114585876465, "learning_rate": 6.213592233009709e-07, "loss": 0.3419, "step": 10172 }, { "epoch": 98.6433734939759, "grad_norm": 5.795365810394287, "learning_rate": 6.165048543689321e-07, "loss": 0.2097, "step": 10173 }, { "epoch": 98.65301204819278, "grad_norm": 10.32579517364502, "learning_rate": 6.116504854368933e-07, "loss": 0.1438, "step": 10174 }, { "epoch": 98.66265060240964, "grad_norm": 1.4195644855499268, "learning_rate": 6.067961165048543e-07, "loss": 0.1475, "step": 10175 }, { "epoch": 98.6722891566265, "grad_norm": 5.8744916915893555, "learning_rate": 6.019417475728156e-07, "loss": 0.1382, "step": 10176 }, { "epoch": 98.68192771084337, "grad_norm": 11.92685317993164, "learning_rate": 5.970873786407767e-07, "loss": 0.4198, "step": 10177 }, { "epoch": 98.69156626506025, "grad_norm": 14.968707084655762, "learning_rate": 5.922330097087379e-07, "loss": 0.3592, "step": 10178 }, { "epoch": 98.70120481927711, "grad_norm": 4.1341962814331055, "learning_rate": 5.87378640776699e-07, "loss": 0.2616, "step": 10179 }, { "epoch": 98.71084337349397, "grad_norm": 19.54228973388672, "learning_rate": 5.825242718446603e-07, "loss": 0.1629, "step": 10180 }, { "epoch": 98.72048192771085, "grad_norm": 5.3483428955078125, "learning_rate": 5.776699029126213e-07, "loss": 0.1855, "step": 10181 }, { "epoch": 98.73012048192771, "grad_norm": 6.814778804779053, "learning_rate": 5.728155339805826e-07, "loss": 0.3227, "step": 10182 }, { "epoch": 98.73975903614458, "grad_norm": 14.540751457214355, "learning_rate": 5.679611650485437e-07, "loss": 0.2768, "step": 10183 }, { "epoch": 98.74939759036144, "grad_norm": 10.032700538635254, "learning_rate": 5.631067961165049e-07, "loss": 0.3238, "step": 10184 }, { "epoch": 98.75903614457832, "grad_norm": 2.5291244983673096, "learning_rate": 5.58252427184466e-07, "loss": 0.1115, "step": 10185 }, { "epoch": 98.76867469879518, "grad_norm": 2.7552785873413086, "learning_rate": 5.533980582524272e-07, "loss": 0.1574, "step": 10186 }, { "epoch": 98.77831325301204, "grad_norm": 37.696468353271484, "learning_rate": 5.485436893203884e-07, "loss": 0.2827, "step": 10187 }, { "epoch": 98.78795180722892, "grad_norm": 2.89998459815979, "learning_rate": 5.436893203883496e-07, "loss": 0.1798, "step": 10188 }, { "epoch": 98.79759036144578, "grad_norm": 24.545799255371094, "learning_rate": 5.388349514563107e-07, "loss": 0.2934, "step": 10189 }, { "epoch": 98.80722891566265, "grad_norm": 6.82612943649292, "learning_rate": 5.339805825242719e-07, "loss": 0.3331, "step": 10190 }, { "epoch": 98.81686746987951, "grad_norm": 17.040441513061523, "learning_rate": 5.291262135922331e-07, "loss": 0.4314, "step": 10191 }, { "epoch": 98.82650602409639, "grad_norm": 8.780009269714355, "learning_rate": 5.242718446601941e-07, "loss": 0.3954, "step": 10192 }, { "epoch": 98.83614457831325, "grad_norm": 7.911192893981934, "learning_rate": 5.194174757281554e-07, "loss": 0.2504, "step": 10193 }, { "epoch": 98.84578313253012, "grad_norm": 15.636551856994629, "learning_rate": 5.145631067961165e-07, "loss": 0.3194, "step": 10194 }, { "epoch": 98.855421686747, "grad_norm": 2.6155176162719727, "learning_rate": 5.097087378640777e-07, "loss": 0.1657, "step": 10195 }, { "epoch": 98.86506024096386, "grad_norm": 5.562469482421875, "learning_rate": 5.048543689320388e-07, "loss": 0.2642, "step": 10196 }, { "epoch": 98.87469879518072, "grad_norm": 2.1066739559173584, "learning_rate": 5.000000000000001e-07, "loss": 0.2289, "step": 10197 }, { "epoch": 98.88433734939758, "grad_norm": 4.580683708190918, "learning_rate": 4.951456310679611e-07, "loss": 0.1951, "step": 10198 }, { "epoch": 98.89397590361446, "grad_norm": 23.306215286254883, "learning_rate": 4.902912621359224e-07, "loss": 0.1488, "step": 10199 }, { "epoch": 98.90361445783132, "grad_norm": 47.56072998046875, "learning_rate": 4.854368932038835e-07, "loss": 0.2334, "step": 10200 }, { "epoch": 98.91325301204819, "grad_norm": 7.302976131439209, "learning_rate": 4.805825242718447e-07, "loss": 0.249, "step": 10201 }, { "epoch": 98.92289156626506, "grad_norm": 3.7464492321014404, "learning_rate": 4.7572815533980586e-07, "loss": 0.2482, "step": 10202 }, { "epoch": 98.93253012048193, "grad_norm": 26.731998443603516, "learning_rate": 4.70873786407767e-07, "loss": 0.2023, "step": 10203 }, { "epoch": 98.94216867469879, "grad_norm": 27.695964813232422, "learning_rate": 4.660194174757282e-07, "loss": 0.3298, "step": 10204 }, { "epoch": 98.95180722891567, "grad_norm": 2.5523815155029297, "learning_rate": 4.611650485436893e-07, "loss": 0.1729, "step": 10205 }, { "epoch": 98.96144578313253, "grad_norm": 15.939440727233887, "learning_rate": 4.5631067961165055e-07, "loss": 0.3835, "step": 10206 }, { "epoch": 98.9710843373494, "grad_norm": 5.7075042724609375, "learning_rate": 4.5145631067961165e-07, "loss": 0.2026, "step": 10207 }, { "epoch": 98.98072289156626, "grad_norm": 7.189345836639404, "learning_rate": 4.4660194174757285e-07, "loss": 0.1179, "step": 10208 }, { "epoch": 98.99036144578314, "grad_norm": 5.701066493988037, "learning_rate": 4.41747572815534e-07, "loss": 0.1267, "step": 10209 }, { "epoch": 99.00602409638554, "grad_norm": 10.176619529724121, "learning_rate": 4.368932038834952e-07, "loss": 0.303, "step": 10210 }, { "epoch": 99.01566265060241, "grad_norm": 20.589059829711914, "learning_rate": 4.320388349514563e-07, "loss": 0.2954, "step": 10211 }, { "epoch": 99.02530120481927, "grad_norm": 9.515780448913574, "learning_rate": 4.271844660194175e-07, "loss": 0.1476, "step": 10212 }, { "epoch": 99.03493975903615, "grad_norm": 5.499911308288574, "learning_rate": 4.2233009708737863e-07, "loss": 0.2568, "step": 10213 }, { "epoch": 99.04457831325301, "grad_norm": 6.334814548492432, "learning_rate": 4.174757281553398e-07, "loss": 0.4195, "step": 10214 }, { "epoch": 99.05421686746988, "grad_norm": 2.116140604019165, "learning_rate": 4.12621359223301e-07, "loss": 0.089, "step": 10215 }, { "epoch": 99.06385542168675, "grad_norm": 19.487165451049805, "learning_rate": 4.0776699029126217e-07, "loss": 0.2893, "step": 10216 }, { "epoch": 99.07349397590362, "grad_norm": 5.113729000091553, "learning_rate": 4.0291262135922337e-07, "loss": 0.256, "step": 10217 }, { "epoch": 99.08313253012048, "grad_norm": 7.551650047302246, "learning_rate": 3.9805825242718446e-07, "loss": 0.2258, "step": 10218 }, { "epoch": 99.09277108433734, "grad_norm": 5.53582763671875, "learning_rate": 3.9320388349514566e-07, "loss": 0.2454, "step": 10219 }, { "epoch": 99.10240963855422, "grad_norm": 8.457615852355957, "learning_rate": 3.883495145631068e-07, "loss": 0.2568, "step": 10220 }, { "epoch": 99.11204819277108, "grad_norm": 4.224174976348877, "learning_rate": 3.8349514563106795e-07, "loss": 0.4061, "step": 10221 }, { "epoch": 99.12168674698795, "grad_norm": 10.677207946777344, "learning_rate": 3.7864077669902915e-07, "loss": 0.092, "step": 10222 }, { "epoch": 99.13132530120482, "grad_norm": 11.608473777770996, "learning_rate": 3.7378640776699035e-07, "loss": 0.2324, "step": 10223 }, { "epoch": 99.14096385542169, "grad_norm": 21.70769500732422, "learning_rate": 3.689320388349515e-07, "loss": 0.4157, "step": 10224 }, { "epoch": 99.15060240963855, "grad_norm": 25.567392349243164, "learning_rate": 3.6407766990291264e-07, "loss": 0.3844, "step": 10225 }, { "epoch": 99.16024096385541, "grad_norm": 8.538290977478027, "learning_rate": 3.592233009708738e-07, "loss": 0.2209, "step": 10226 }, { "epoch": 99.16987951807229, "grad_norm": 9.23681640625, "learning_rate": 3.54368932038835e-07, "loss": 0.227, "step": 10227 }, { "epoch": 99.17951807228916, "grad_norm": 23.145214080810547, "learning_rate": 3.4951456310679613e-07, "loss": 0.2145, "step": 10228 }, { "epoch": 99.18915662650602, "grad_norm": 4.44747257232666, "learning_rate": 3.446601941747573e-07, "loss": 0.2458, "step": 10229 }, { "epoch": 99.1987951807229, "grad_norm": 35.318519592285156, "learning_rate": 3.398058252427185e-07, "loss": 0.3972, "step": 10230 }, { "epoch": 99.20843373493976, "grad_norm": 25.45937728881836, "learning_rate": 3.349514563106796e-07, "loss": 0.4235, "step": 10231 }, { "epoch": 99.21807228915662, "grad_norm": 8.205029487609863, "learning_rate": 3.3009708737864077e-07, "loss": 0.3061, "step": 10232 }, { "epoch": 99.2277108433735, "grad_norm": 19.973772048950195, "learning_rate": 3.2524271844660197e-07, "loss": 0.3314, "step": 10233 }, { "epoch": 99.23734939759036, "grad_norm": 9.045900344848633, "learning_rate": 3.203883495145631e-07, "loss": 0.1351, "step": 10234 }, { "epoch": 99.24698795180723, "grad_norm": 8.237865447998047, "learning_rate": 3.1553398058252426e-07, "loss": 0.288, "step": 10235 }, { "epoch": 99.25662650602409, "grad_norm": 25.153560638427734, "learning_rate": 3.1067961165048546e-07, "loss": 0.3587, "step": 10236 }, { "epoch": 99.26626506024097, "grad_norm": 22.55760955810547, "learning_rate": 3.0582524271844665e-07, "loss": 0.1671, "step": 10237 }, { "epoch": 99.27590361445783, "grad_norm": 2.1132750511169434, "learning_rate": 3.009708737864078e-07, "loss": 0.1861, "step": 10238 }, { "epoch": 99.2855421686747, "grad_norm": 3.019756317138672, "learning_rate": 2.9611650485436895e-07, "loss": 0.1466, "step": 10239 }, { "epoch": 99.29518072289157, "grad_norm": 10.517848014831543, "learning_rate": 2.9126213592233014e-07, "loss": 0.2505, "step": 10240 }, { "epoch": 99.30481927710844, "grad_norm": 6.486692428588867, "learning_rate": 2.864077669902913e-07, "loss": 0.1412, "step": 10241 }, { "epoch": 99.3144578313253, "grad_norm": 4.040076732635498, "learning_rate": 2.8155339805825244e-07, "loss": 0.2278, "step": 10242 }, { "epoch": 99.32409638554216, "grad_norm": 7.386366844177246, "learning_rate": 2.766990291262136e-07, "loss": 0.2938, "step": 10243 }, { "epoch": 99.33373493975904, "grad_norm": 4.606614112854004, "learning_rate": 2.718446601941748e-07, "loss": 0.272, "step": 10244 }, { "epoch": 99.3433734939759, "grad_norm": 10.894006729125977, "learning_rate": 2.669902912621359e-07, "loss": 0.2888, "step": 10245 }, { "epoch": 99.35301204819277, "grad_norm": 4.322603225708008, "learning_rate": 2.6213592233009707e-07, "loss": 0.1966, "step": 10246 }, { "epoch": 99.36265060240964, "grad_norm": 10.161637306213379, "learning_rate": 2.5728155339805827e-07, "loss": 0.2285, "step": 10247 }, { "epoch": 99.37228915662651, "grad_norm": 2.8937625885009766, "learning_rate": 2.524271844660194e-07, "loss": 0.2092, "step": 10248 }, { "epoch": 99.38192771084337, "grad_norm": 6.514683723449707, "learning_rate": 2.4757281553398056e-07, "loss": 0.2858, "step": 10249 }, { "epoch": 99.39156626506023, "grad_norm": 22.025110244750977, "learning_rate": 2.4271844660194176e-07, "loss": 0.2464, "step": 10250 }, { "epoch": 99.40120481927711, "grad_norm": 4.509922504425049, "learning_rate": 2.3786407766990293e-07, "loss": 0.2621, "step": 10251 }, { "epoch": 99.41084337349398, "grad_norm": 21.62734603881836, "learning_rate": 2.330097087378641e-07, "loss": 0.3236, "step": 10252 }, { "epoch": 99.42048192771084, "grad_norm": 25.408634185791016, "learning_rate": 2.2815533980582528e-07, "loss": 0.2121, "step": 10253 }, { "epoch": 99.43012048192772, "grad_norm": 14.571371078491211, "learning_rate": 2.2330097087378642e-07, "loss": 0.2412, "step": 10254 }, { "epoch": 99.43975903614458, "grad_norm": 11.354940414428711, "learning_rate": 2.184466019417476e-07, "loss": 0.3054, "step": 10255 }, { "epoch": 99.44939759036144, "grad_norm": 21.852285385131836, "learning_rate": 2.1359223300970874e-07, "loss": 0.4095, "step": 10256 }, { "epoch": 99.45903614457832, "grad_norm": 3.8351528644561768, "learning_rate": 2.087378640776699e-07, "loss": 0.207, "step": 10257 }, { "epoch": 99.46867469879518, "grad_norm": 14.486533164978027, "learning_rate": 2.0388349514563108e-07, "loss": 0.2571, "step": 10258 }, { "epoch": 99.47831325301205, "grad_norm": 30.973331451416016, "learning_rate": 1.9902912621359223e-07, "loss": 0.2622, "step": 10259 }, { "epoch": 99.48795180722891, "grad_norm": 51.39873123168945, "learning_rate": 1.941747572815534e-07, "loss": 0.1581, "step": 10260 }, { "epoch": 99.49759036144579, "grad_norm": 13.724178314208984, "learning_rate": 1.8932038834951458e-07, "loss": 0.344, "step": 10261 }, { "epoch": 99.50722891566265, "grad_norm": 6.791562557220459, "learning_rate": 1.8446601941747575e-07, "loss": 0.3308, "step": 10262 }, { "epoch": 99.51686746987951, "grad_norm": 1.2827483415603638, "learning_rate": 1.796116504854369e-07, "loss": 0.1369, "step": 10263 }, { "epoch": 99.52650602409639, "grad_norm": 10.589681625366211, "learning_rate": 1.7475728155339807e-07, "loss": 0.3322, "step": 10264 }, { "epoch": 99.53614457831326, "grad_norm": 39.00662612915039, "learning_rate": 1.6990291262135924e-07, "loss": 0.1715, "step": 10265 }, { "epoch": 99.54578313253012, "grad_norm": 13.053214073181152, "learning_rate": 1.6504854368932038e-07, "loss": 0.2479, "step": 10266 }, { "epoch": 99.55542168674698, "grad_norm": 11.759278297424316, "learning_rate": 1.6019417475728156e-07, "loss": 0.3158, "step": 10267 }, { "epoch": 99.56506024096386, "grad_norm": 18.631603240966797, "learning_rate": 1.5533980582524273e-07, "loss": 0.32, "step": 10268 }, { "epoch": 99.57469879518072, "grad_norm": 18.627073287963867, "learning_rate": 1.504854368932039e-07, "loss": 0.4131, "step": 10269 }, { "epoch": 99.58433734939759, "grad_norm": 4.26225471496582, "learning_rate": 1.4563106796116507e-07, "loss": 0.2019, "step": 10270 }, { "epoch": 99.59397590361446, "grad_norm": 6.337092876434326, "learning_rate": 1.4077669902912622e-07, "loss": 0.2982, "step": 10271 }, { "epoch": 99.60361445783133, "grad_norm": 10.52979850769043, "learning_rate": 1.359223300970874e-07, "loss": 0.2355, "step": 10272 }, { "epoch": 99.61325301204819, "grad_norm": 8.915982246398926, "learning_rate": 1.3106796116504854e-07, "loss": 0.365, "step": 10273 }, { "epoch": 99.62289156626505, "grad_norm": 5.904829502105713, "learning_rate": 1.262135922330097e-07, "loss": 0.1982, "step": 10274 }, { "epoch": 99.63253012048193, "grad_norm": 17.945770263671875, "learning_rate": 1.2135922330097088e-07, "loss": 0.2375, "step": 10275 }, { "epoch": 99.6421686746988, "grad_norm": 11.04223346710205, "learning_rate": 1.1650485436893205e-07, "loss": 0.2729, "step": 10276 }, { "epoch": 99.65180722891566, "grad_norm": 7.607748985290527, "learning_rate": 1.1165048543689321e-07, "loss": 0.3542, "step": 10277 }, { "epoch": 99.66144578313254, "grad_norm": 2.638014078140259, "learning_rate": 1.0679611650485437e-07, "loss": 0.1513, "step": 10278 }, { "epoch": 99.6710843373494, "grad_norm": 3.328749418258667, "learning_rate": 1.0194174757281554e-07, "loss": 0.2117, "step": 10279 }, { "epoch": 99.68072289156626, "grad_norm": 2.9694066047668457, "learning_rate": 9.70873786407767e-08, "loss": 0.1914, "step": 10280 }, { "epoch": 99.69036144578314, "grad_norm": 20.912384033203125, "learning_rate": 9.223300970873787e-08, "loss": 0.2906, "step": 10281 }, { "epoch": 99.7, "grad_norm": 8.83984088897705, "learning_rate": 8.737864077669903e-08, "loss": 0.3167, "step": 10282 }, { "epoch": 99.70963855421687, "grad_norm": 3.6895956993103027, "learning_rate": 8.252427184466019e-08, "loss": 0.2559, "step": 10283 }, { "epoch": 99.71927710843373, "grad_norm": 14.606685638427734, "learning_rate": 7.766990291262136e-08, "loss": 0.2826, "step": 10284 }, { "epoch": 99.7289156626506, "grad_norm": 2.5035598278045654, "learning_rate": 7.281553398058254e-08, "loss": 0.1466, "step": 10285 }, { "epoch": 99.73855421686747, "grad_norm": 16.458967208862305, "learning_rate": 6.79611650485437e-08, "loss": 0.1607, "step": 10286 }, { "epoch": 99.74819277108433, "grad_norm": 7.295142650604248, "learning_rate": 6.310679611650485e-08, "loss": 0.157, "step": 10287 }, { "epoch": 99.75783132530121, "grad_norm": 4.608581066131592, "learning_rate": 5.8252427184466026e-08, "loss": 0.457, "step": 10288 }, { "epoch": 99.76746987951807, "grad_norm": 20.58669662475586, "learning_rate": 5.3398058252427185e-08, "loss": 0.0846, "step": 10289 }, { "epoch": 99.77710843373494, "grad_norm": 11.982714653015137, "learning_rate": 4.854368932038835e-08, "loss": 0.2546, "step": 10290 }, { "epoch": 99.7867469879518, "grad_norm": 3.2385647296905518, "learning_rate": 4.3689320388349516e-08, "loss": 0.2996, "step": 10291 }, { "epoch": 99.79638554216868, "grad_norm": 3.2821290493011475, "learning_rate": 3.883495145631068e-08, "loss": 0.1701, "step": 10292 }, { "epoch": 99.80602409638554, "grad_norm": 4.060266971588135, "learning_rate": 3.398058252427185e-08, "loss": 0.1038, "step": 10293 }, { "epoch": 99.8156626506024, "grad_norm": 19.61920928955078, "learning_rate": 2.9126213592233013e-08, "loss": 0.3286, "step": 10294 }, { "epoch": 99.82530120481928, "grad_norm": 2.904825210571289, "learning_rate": 2.4271844660194175e-08, "loss": 0.246, "step": 10295 }, { "epoch": 99.83493975903615, "grad_norm": 10.573074340820312, "learning_rate": 1.941747572815534e-08, "loss": 0.2252, "step": 10296 }, { "epoch": 99.84457831325301, "grad_norm": 19.32904624938965, "learning_rate": 1.4563106796116507e-08, "loss": 0.2171, "step": 10297 }, { "epoch": 99.85421686746987, "grad_norm": 9.727706909179688, "learning_rate": 9.70873786407767e-09, "loss": 0.2524, "step": 10298 }, { "epoch": 99.86385542168675, "grad_norm": 3.6202993392944336, "learning_rate": 4.854368932038835e-09, "loss": 0.0903, "step": 10299 }, { "epoch": 99.87349397590361, "grad_norm": 21.268156051635742, "learning_rate": 0.0, "loss": 0.2877, "step": 10300 } ], "logging_steps": 1, "max_steps": 10300, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.7760731006775067e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }